2015-10-12 7 views
21

Sto usando l'Appache Jackrabbit JCA 2.7.5, il problema è che i file .docx e .xlsx non sono indicizzati.Apache Jackrabbit JCA 2.7.5 .docx e .xlsx indicizzazione

miei passi:

  • distribuire il Jackrabbit JCA come resource adapter su GlassFish
  • creare un Connector Connection Pool per la resource adapter indicando il ConfigFile=path/to/the/repository.xml e HomeDir=path/to/the //miss the repository.xml
  • creare un Connector Resources per la piscina connettore (JNDI)
  • crea applicazione web
  • Crea classe per ottenere sessione dal ressources connettore (codice qui sotto)

    import java.io.Serializable; 
    import java.net.MalformedURLException; 
    import javax.annotation.Resource; 
    import javax.ejb.Stateless; 
    import javax.jcr.LoginException; 
    import javax.jcr.Repository; 
    import javax.jcr.RepositoryException; 
    import javax.jcr.Session; 
    import javax.jcr.SimpleCredentials; 
    import javax.naming.InitialContext; 
    import javax.naming.NamingException; 
    @Stateless 
    public class OcmRepository implements Serializable { 
    
        public Repository repository; 
        public Session session; 
    
        public OcmRepository() { 
        } 
    
        public Session getSession(String log, String mdp) throws LoginException, RepositoryException, NamingException, MalformedURLException { 
         InitialContext initalContext = new InitialContext(); 
         repository = (Repository) initalContext.lookup("jndi/jca"); 
         session = repository.login(new SimpleCredentials(log, mdp.toCharArray()), null); 
         return session; 
        } 
    } 
    
  • Crea filetype personalizzato

    import javax.jcr.PropertyType; 
    import javax.jcr.Session; 
    import javax.jcr.nodetype.NodeType; 
    import javax.jcr.nodetype.NodeTypeManager; 
    import javax.jcr.nodetype.NodeTypeTemplate; 
    import javax.jcr.nodetype.PropertyDefinitionTemplate; 
    
    /** 
    * 
    * @author nathan 
    */ 
    public class FileType { 
        public static void RegisterFileType(Session session) throws Exception {   
         NodeTypeManager nodeTypeManager = session.getWorkspace().getNodeTypeManager(); 
    
         NodeTypeTemplate nodeType = nodeTypeManager.createNodeTypeTemplate(); 
         nodeType.setName("FileType"); 
         String[] str = {"nt:resource"};   
         nodeType.setDeclaredSuperTypeNames(str); 
         nodeType.setMixin(false); 
         nodeType.setQueryable(true); 
    
    
         PropertyDefinitionTemplate path = nodeTypeManager.createPropertyDefinitionTemplate(); 
         path.setName("jcr:path"); 
         path.setRequiredType(PropertyType.PATH); 
         path.setQueryOrderable(false); 
         path.setFullTextSearchable(false); 
         nodeType.getPropertyDefinitionTemplates().add(path); 
    
         PropertyDefinitionTemplate nom = nodeTypeManager.createPropertyDefinitionTemplate(); 
         nom.setName("jcr:nom"); 
         nom.setRequiredType(PropertyType.STRING); 
         nom.setQueryOrderable(true); 
         nom.setFullTextSearchable(true); 
         nodeType.getPropertyDefinitionTemplates().add(nom); 
    
         PropertyDefinitionTemplate description = nodeTypeManager.createPropertyDefinitionTemplate(); 
         description.setName("jcr:description"); 
         description.setRequiredType(PropertyType.STRING); 
         description.setQueryOrderable(true); 
         description.setFullTextSearchable(true); 
         nodeType.getPropertyDefinitionTemplates().add(description); 
    
         PropertyDefinitionTemplate motsCles = nodeTypeManager.createPropertyDefinitionTemplate(); 
         motsCles.setName("jcr:motsCles"); 
         motsCles.setRequiredType(PropertyType.STRING); 
         motsCles.setQueryOrderable(true); 
         motsCles.setFullTextSearchable(true); 
         nodeType.getPropertyDefinitionTemplates().add(motsCles); 
    
         PropertyDefinitionTemplate size = nodeTypeManager.createPropertyDefinitionTemplate(); 
         size.setName("jcr:size"); 
         size.setRequiredType(PropertyType.STRING); 
         size.setQueryOrderable(true); 
         size.setFullTextSearchable(false); 
         nodeType.getPropertyDefinitionTemplates().add(size); 
    
         PropertyDefinitionTemplate users = nodeTypeManager.createPropertyDefinitionTemplate(); 
         users.setName("jcr:users"); 
         users.setRequiredType(PropertyType.STRING); 
         users.setQueryOrderable(true); 
         users.setFullTextSearchable(false); 
         nodeType.getPropertyDefinitionTemplates().add(users); 
    
         PropertyDefinitionTemplate groupe = nodeTypeManager.createPropertyDefinitionTemplate(); 
         groupe.setName("jcr:groupe"); 
         groupe.setRequiredType(PropertyType.STRING); 
         groupe.setQueryOrderable(true); 
         groupe.setFullTextSearchable(false); 
         nodeType.getPropertyDefinitionTemplates().add(groupe); 
    
         NodeType newnodetype = nodeTypeManager.registerNodeType(nodeType, true);    
         session.save();   
        } 
    
    } 
    
  • creare la classe astratta per la persistenza

    import java.util.ArrayList; 
    import java.util.List; 
    import java.util.Map; 
    
    import javax.jcr.Session; 
    
    import org.apache.jackrabbit.ocm.query.Filter; 
    import org.apache.jackrabbit.ocm.query.impl.FilterImpl; 
    import org.apache.jackrabbit.ocm.query.impl.QueryImpl; 
    import org.apache.jackrabbit.ocm.query.Query; 
    import org.apache.jackrabbit.ocm.query.QueryManager; 
    
    import org.apache.jackrabbit.ocm.manager.ObjectContentManager; 
    import org.apache.jackrabbit.ocm.manager.impl.ObjectContentManagerImpl; 
    
    import org.apache.jackrabbit.ocm.mapper.Mapper; 
    import org.apache.jackrabbit.ocm.mapper.impl.annotation.AnnotationMapperImpl; 
    
    import org.apache.jackrabbit.ocm.reflection.ReflectionUtils; 
    
    
    /** 
    * 
    * @author nathan 
    */ 
    public abstract class AbstractBean<T> { 
    
        private Class<T> entityClass; 
        private ObjectContentManager ocm; 
        private Mapper mapper; 
    
        public AbstractBean(Class<T> entityClass){ 
         this.entityClass = entityClass; 
        } 
    
        /** 
        * Construct the Bean according to the extended class 
        * This will be also construct the ObjectContentManager nammed ocm with the default Mapper 
        * @param session javax.jcr.Session attached to the Bean 
        * @return The mapping class found for the desired java bean class 
        */ 
        public AbstractBean(Class<T> entityClass,Session session){ 
         this.entityClass = entityClass; 
         ocm = new ObjectContentManagerImpl(session, this.getDefaultMapper()); 
        } 
    
        /** 
        * @return ObjectContentManager of the Bean 
        */ 
        public ObjectContentManager getOcm() throws Exception{ 
         return ocm; 
        } 
    
        /** 
        * Construct the Bean according to the extended class 
        * This will be also construct the ObjectContentManager nammed ocm with the param Mapper given 
        * @param session from "javax.jcr.Session" attached to the Bean 
        * @param map from "org.apache.jackrabbit.ocm.mapper.Mapper" which 
        * is the use to map entity between apllication and The repository 
        * @return ObjectContentManager of the Bean 
        */ 
        public ObjectContentManager getOcm(Session session, Mapper map) throws Exception{ 
         return new ObjectContentManagerImpl(session, map); 
        } 
    
        public void setOcm(ObjectContentManager ocm) { 
         this.ocm = ocm; 
        } 
    
        private Mapper getDefaultMapper(){ 
         ReflectionUtils.setClassLoader(com.ged.ocm.entity.Groupe.class.getClassLoader()); 
         List<Class> classes = new ArrayList<Class>(); 
         classes.add(com.ged.ocm.entity.Fichier.class); 
         classes.add(com.ged.ocm.entity.Dossier.class); 
         classes.add(com.ged.ocm.entity.Groupe.class); 
         classes.add(com.ged.ocm.entity.SimpleNode.class); 
         return new AnnotationMapperImpl(classes); 
        } 
    
        public Mapper getMapper() { 
         return mapper; 
        } 
    
        public void setMapper(Mapper mapper) { 
         this.mapper = mapper; 
        } 
    
        public void setLoader(Class classe){   
         ReflectionUtils.setClassLoader(classe.getClassLoader()); 
        } 
    
        public void create(T entity) { 
         ocm.insert(entity); 
         ocm.save(); 
        } 
    
        public void edit(T entity) { 
         ocm.update(entity); 
         ocm.save(); 
        } 
    
        public void remove(T entity) { 
         ocm.remove(entity); 
         ocm.save(); 
        } 
    
        public void refresh(){ 
         ocm.refresh(true); 
         ocm.save(); 
        } 
    
        public void copy(String orgPath, String destPath){ 
         ocm.copy(orgPath, destPath); 
         ocm.save(); 
        } 
    
        public void move(String orgPath, String destPath){ 
         ocm.move(orgPath, destPath); 
         ocm.save(); 
        } 
        public void removeByPath(String path) { 
         ocm.remove(path); 
         ocm.save(); 
        } 
    
        public void removeAllByEqual(Map<String,String> filters){ 
         QueryManager queryManager = ocm.getQueryManager(); 
    
         Filter filter; 
         filter = queryManager.createFilter(entityClass); 
         for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); 
    
         Query query = queryManager.createQuery(filter); 
    
         ocm.remove(query); 
         ocm.save(); 
        } 
    
        public void removeAllByEqual(String nodePath,Map<String,String> filters){ 
         QueryManager queryManager = ocm.getQueryManager(); 
    
         Filter filter; 
         filter = queryManager.createFilter(entityClass); 
         filter.setScope(nodePath); 
         for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); 
    
         Query query = queryManager.createQuery(filter); 
    
         ocm.remove(query); 
         ocm.save(); 
        } 
    
        public boolean isPathExist(String path){ 
         return ocm.objectExists(path); 
        } 
    
        public T findByPath(String path) { 
         try {    
          return (T)ocm.getObject(path); 
         } catch (Exception e) { 
          return null; 
         } 
        } 
    
        public T findOneByEqual(Map<String,String> filters){ 
         QueryManager queryManager = ocm.getQueryManager(); 
    
         Filter filter; 
         filter = queryManager.createFilter(entityClass); 
         for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); 
    
         Query query = queryManager.createQuery(filter); 
    
         List<T> results = (List<T>) ocm.getObjects(query); 
    
         T result = null; 
         try {    
          result = results.get(0); 
         } catch (Exception e) { 
         } 
    
         return result; 
        } 
    
        public List<T> findAllByEqual(Map<String,String> filters){ 
         QueryManager queryManager = ocm.getQueryManager(); 
    
         Filter filter; 
         filter = queryManager.createFilter(entityClass); 
         filter.setScope("//"); 
         for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); 
    
         Query query = queryManager.createQuery(filter); 
    
         List<T> results = (List<T>) ocm.getObjects(query); 
         return results; 
        } 
    
    
        public List<T> findAllByLike(Map<String,String> filters){ 
         QueryManager queryManager = ocm.getQueryManager(); 
    
         Filter filter; 
         filter = queryManager.createFilter(entityClass); 
         filter.setScope("//"); 
         for (String key : filters.keySet())filter.addLike(key, filters.get(key)); 
    
         Query query = queryManager.createQuery(filter); 
    
         List<T> results = (List<T>) ocm.getObjects(query); 
         return results; 
        } 
    
        public List<T> findAllByLikeScoped(String scope,Map<String,String> filters){ 
         QueryManager queryManager = ocm.getQueryManager(); 
    
         Filter filter; 
         filter = queryManager.createFilter(entityClass); 
         filter.setScope(scope); 
         for (String key : filters.keySet())filter.addLike(key, filters.get(key)); 
    
         Query query = queryManager.createQuery(filter); 
    
         List<T> results = (List<T>) ocm.getObjects(query); 
         return results; 
        } 
    
        public List<T> findAllByOrLike(String attr,String[] val){ 
         QueryManager queryManager = ocm.getQueryManager(); 
    
         Filter filter; 
         filter = queryManager.createFilter(entityClass); 
         filter.setScope("//"); 
         filter.addOrFilter(attr, val); 
    
         Query query = queryManager.createQuery(filter); 
    
         List<T> results = (List<T>) ocm.getObjects(query); 
         return results; 
        } 
    
        public T findOneByEqual(String nodePath, Map<String,String> filters){ 
         QueryManager queryManager = ocm.getQueryManager(); 
    
         Filter filter; 
         filter = queryManager.createFilter(entityClass); 
         filter.setScope(nodePath); 
         for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); 
    
         Query query = queryManager.createQuery(filter); 
    
         List<T> results = (List<T>) ocm.getObjects(query); 
         T result = results.get(0); 
         return result; 
        } 
    
        public List<T> findAllByEqual(String nodePath, Map<String,String> filters){ 
         QueryManager queryManager = ocm.getQueryManager(); 
    
         Filter filter; 
         filter = queryManager.createFilter(entityClass); 
         filter.setScope(nodePath); 
         for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key)); 
    
         Query query = queryManager.createQuery(filter); 
    
         List<T> results = (List<T>) ocm.getObjects(query); 
         return results; 
        } 
    
        public List<T> findAllByString(String query){   
         List<T> results = (List<T>) ocm.getObjects(query,javax.jcr.query.Query.JCR_SQL2); 
         return results; 
        } 
    
    
        public List<T> findAllByParentPath(String nodePath){ 
         QueryManager queryManager = ocm.getQueryManager(); 
    
         Filter filter; 
         filter = queryManager.createFilter(entityClass); 
         filter.setScope(nodePath); 
    
         Query query = queryManager.createQuery(filter); 
         List<T> results = (List<T>) ocm.getObjects(query); 
         return results; 
    
        } 
        public List<T> findAllByParentPathOrder(String nodePath, String ordering){ 
         QueryManager queryManager = ocm.getQueryManager(); 
    
         Filter filter; 
         filter = queryManager.createFilter(entityClass); 
         filter.setScope(nodePath); 
    
         Query query = queryManager.createQuery(filter); 
    //  query.addOrderByDescending(ordering); 
         query.addOrderByAscending(ordering); 
    
         List<T> results = (List<T>) ocm.getObjects(query); 
         return results; 
    
        } 
    
        public int coutChild(String nodePath){ 
         QueryManager queryManager = ocm.getQueryManager(); 
    
         Filter filter; 
         filter = queryManager.createFilter(entityClass); 
         filter.setScope(nodePath); 
    
         Query query = queryManager.createQuery(filter); 
    
         List<T> results = (List<T>) ocm.getObjects(query); 
         return results.size(); 
        } 
    
        public boolean ifExistByPath(String path){ 
         return ocm.objectExists(path); 
        } 
    
        public String getParentPath(String path){ 
         String parent=""; 
         String[] tmp=path.split("/"); 
         for (int i = 1; i < (tmp.length-1); i++) { 
          parent+="/"+tmp[i]; 
         } 
         return parent;     
        } 
    } 
    
  • Crea il fagiolo

    import javax.ejb.Stateless; 
    import com.ged.ocm.entity.Fichier; 
    import java.io.InputStream; 
    import java.util.ArrayList; 
    import java.util.List; 
    import java.util.Map; 
    import javax.jcr.Node; 
    import javax.jcr.NodeIterator; 
    import javax.jcr.Session; 
    import javax.jcr.Workspace; 
    import javax.jcr.query.QueryResult; 
    import javax.jcr.query.qom.FullTextSearch; 
    import javax.jcr.query.qom.StaticOperand; 
    import org.apache.jackrabbit.ocm.query.Filter; 
    import org.apache.jackrabbit.ocm.query.Query; 
    import org.apache.jackrabbit.ocm.query.QueryManager; 
    
    @Stateless 
    public class FichierBean extends AbstractBean<Fichier>{  
        public FichierBean() { 
         super(Fichier.class); 
        } 
        public FichierBean(Session session) { 
         super(Fichier.class,session); 
        } 
    
        public List<Fichier> findAllByContains(String motCles) throws Exception { 
         String requette = "SELECT * FROM FileType AS Res WHERE CONTAINS (Res.*, '*"+motCles+"*')"; 
         List<Fichier> results = (List<Fichier>) this.getOcm().getObjects(requette, javax.jcr.query.Query.JCR_SQL2); 
         return results;     
        } 
        public List<Fichier> findAllByContains(String path,String motCles) throws Exception { 
         String requette = "SELECT * FROM FileType AS Res WHERE CONTAINS (Res.*, '*"+motCles+"*') ORDER BY Res.nom"; 
         List<Fichier> tmp = (List<Fichier>) this.getOcm().getObjects(requette, javax.jcr.query.Query.JCR_SQL2); 
    
         List<Fichier> results = new ArrayList<Fichier>(); 
         for (Fichier fichier : tmp) { 
          if(fichier.getPath().startsWith(path))results.add(fichier); 
         } 
         return results;     
        } 
    
    
        public List<Fichier> fulltextByOCM(String motCles) throws Exception { 
         QueryManager queryManager = this.getOcm().getQueryManager(); 
    
         Filter filter; 
         filter = queryManager.createFilter(com.ged.ocm.entity.Fichier.class); 
         filter.addContains(".", "*"+motCles+"*"); 
    
         Query query = queryManager.createQuery(filter); 
    
         List<Fichier> results = (List<Fichier>) this.getOcm().getObjects(query); 
         return results; 
        } 
    
    } 
    

I miei file di configurazione:

  • repository.xml

    <?xml version="1.0"?> 
    <!DOCTYPE Repository PUBLIC "-//The Apache Software Foundation//DTD Jackrabbit 1.6//EN" 
             "http://jackrabbit.apache.org/dtd/repository-1.6.dtd"> 
    <Repository>   
    <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem"> 
        <param name="path" value="${rep.home}/repository"/> 
    </FileSystem> 
    --> 
    
    <FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem"> 
        <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/> 
        <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" /> 
        <param name="user" value="root" /> 
        <param name="password" value="root" /> 
        <param name="schema" value="mysql"/> 
        <param name="schemaObjectPrefix" value="J_R_FS_"/> 
    </FileSystem> 
    
    <!-- 
        security configuration 
    --> 
    <Security appName="Jackrabbit"> 
        <AccessManager class="org.apache.jackrabbit.core.security.SimpleAccessManager" /> 
        <LoginModule class="org.apache.jackrabbit.core.security.SimpleLoginModule"> 
         <param name="anonymousId" value="anonymous" /> 
        </LoginModule> 
    </Security> 
    
    <!-- 
        location of workspaces root directory and name of default workspace 
    --> 
    <Workspaces rootPath="${rep.home}/workspaces" defaultWorkspace="default"/> 
    <!-- 
        workspace configuration template: 
        used to create the initial workspace if there's no workspace yet 
    --> 
    <Workspace name="${wsp.name}"> 
    
        <PersistenceManager class="org.apache.jackrabbit.core.state.db.SimpleDbPersistenceManager"> 
         <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/> 
         <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" /> 
         <param name="user" value="root" /> 
         <param name="password" value="root" /> 
         <param name="schema" value="mysql" /> 
         <param name="schemaObjectPrefix" value="J_PM_${wsp.name}_" /> 
         <param name="externalBLOBs" value="false" /> 
        </PersistenceManager> 
        <FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem"> 
         <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/> 
         <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" /> 
         <param name="user" value="root" /> 
         <param name="password" value="root" /> 
         <param name="schema" value="mysql"/> 
         <param name="schemaObjectPrefix" value="J_FS_${wsp.name}_"/> 
        </FileSystem> 
    
        <!-- 
         Search index and the file system it uses. 
         class: FQN of class implementing the QueryHandler interface 
        --> 
        <SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex"> 
         <param name="path" value="${rep.home}/workspaces/${wsp.name}/index"/> 
         <param name="tikaConfigPath" value="${rep.home}/tika-config.xml"/> 
         <param name="useCompoundFile" value="true"/> 
         <param name="minMergeDocs" value="100"/> 
         <param name="volatileIdleTime" value="3"/> 
         <param name="maxMergeDocs" value="2147483647"/> 
         <param name="mergeFactor" value="10"/> 
         <param name="maxFieldLength" value="10000"/> 
         <param name="bufferSize" value="10"/> 
         <param name="cacheSize" value="1000"/> 
         <param name="forceConsistencyCheck" value="false"/> 
         <param name="enableConsistencyCheck" value="false"/> 
         <param name="autoRepair" value="true"/> 
         <param name="analyzer" value="org.apache.lucene.analysis.standard.StandardAnalyzer"/> 
         <param name="queryClass" value="org.apache.jackrabbit.core.query.QueryImpl"/> 
         <param name="respectDocumentOrder" value="true"/> 
         <param name="resultFetchSize" value="2147483647"/> 
         <param name="extractorPoolSize" value="0"/> 
         <param name="extractorTimeout" value="100"/> 
         <param name="extractorBackLogSize" value="100"/> 
         <param name="supportHighlighting" value="true"/> 
         <param name="excerptProviderClass" value="org.apache.jackrabbit.core.query.lucene.DefaultXMLExcerpt"/> 
        </SearchIndex> 
    </Workspace> 
    
    <!-- 
        Configures the versioning 
    --> 
    <Versioning rootPath="${rep.home}/version"> 
        <FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem"> 
         <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/> 
         <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" /> 
         <param name="user" value="root" /> 
         <param name="password" value="root" /> 
         <param name="schema" value="mysql"/> 
         <param name="schemaObjectPrefix" value="J_V_FS_"/> 
        </FileSystem> 
        <PersistenceManager class="org.apache.jackrabbit.core.state.db.SimpleDbPersistenceManager"> 
         <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/> 
         <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" /> 
         <param name="user" value="root" /> 
         <param name="password" value="root" /> 
         <param name="schema" value="mysql" /> 
         <param name="schemaObjectPrefix" value="J_V_PM_" /> 
         <param name="externalBLOBs" value="false" /> 
        </PersistenceManager> 
    </Versioning> 
    
    <!-- 
        Search index for content that is shared repository wide 
        (/jcr:system tree, contains mainly versions) 
    
    <SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex"> 
        <param name="path" value="${rep.home}/repository/index"/> 
        <param name="extractorPoolSize" value="2"/> 
        <param name="supportHighlighting" value="true"/> 
    </SearchIndex> 
    --> 
    
    <!-- 
        Cluster configuration with system variables. 
    
    --> 
    
    <RepositoryLockMechanism class="org.apache.jackrabbit.core.util.CooperativeFileLock" /> 
    
    </Repository> 
    
  • tika-config.xml

    <?xml version="1.0" encoding="UTF-8"?> 
    <properties> 
    
    <mimeTypeRepository resource="/org/apache/tika/mime/tika-mimetypes.xml" magic="false"/> 
    
    <parsers> 
    
    <parser name="parse-dcxml" class="org.apache.tika.parser.xml.DcXMLParser"> 
        <mime>application/xml</mime> 
        <mime>image/svg+xml</mime> 
    </parser> 
    
    <parser name="parse-office" class="org.apache.tika.parser.microsoft.OfficeParser"> 
        <mime>application/x-tika-msoffice</mime> 
        <mime>application/msword</mime> 
        <mime>application/vnd.ms-excel</mime> 
        <mime>application/vnd.ms-excel.sheet.binary.macroenabled.12</mime> 
        <mime>application/vnd.ms-powerpoint</mime> 
        <mime>application/vnd.visio</mime> 
        <mime>application/vnd.ms-outlook</mime> 
    </parser> 
    
    <parser name="parse-ooxml" class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser"> 
        <mime>application/x-tika-ooxml</mime> 
        <mime>application/vnd.openxmlformats-package.core-properties+xml</mime> 
        <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</mime> 
        <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.template</mime> 
        <mime>application/vnd.ms-excel.sheet.macroenabled.12</mime> 
        <mime>application/vnd.ms-excel.template.macroenabled.12</mime> 
        <mime>application/vnd.ms-excel.addin.macroenabled.12</mime> 
        <mime>application/vnd.openxmlformats-officedocument.presentationml.presentation</mime> 
        <mime>application/vnd.openxmlformats-officedocument.presentationml.template</mime> 
        <mime>application/vnd.openxmlformats-officedocument.presentationml.slideshow</mime> 
        <mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime> 
        <mime>application/vnd.ms-powerpoint.slideshow.macroenabled.12</mime> 
        <mime>application/vnd.ms-powerpoint.addin.macroenabled.12</mime> 
        <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.document</mime> 
        <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.template</mime> 
        <mime>application/vnd.ms-word.document.macroenabled.12</mime> 
        <mime>application/vnd.ms-word.template.macroenabled.12</mime> 
    </parser> 
    
    <parser name="parse-html" class="org.apache.tika.parser.html.HtmlParser"> 
        <mime>text/html</mime> 
        <mime>application/xhtml+xml</mime> 
        <mime>application/x-asp</mime> 
    </parser> 
    
    <parser mame="parse-rtf" class="org.apache.tika.parser.rtf.RTFParser"> 
        <mime>application/rtf</mime> 
    </parser> 
    
    <parser name="parse-pdf" class="org.apache.tika.parser.pdf.PDFParser"> 
        <mime>application/pdf</mime> 
    </parser> 
    
    <parser name="parse-txt" class="org.apache.tika.parser.txt.TXTParser"> 
        <mime>text/plain</mime> 
    </parser> 
    
    <parser name="parse-openoffice" class="org.apache.tika.parser.opendocument.OpenOfficeParser"> 
        <mime>application/vnd.sun.xml.writer</mime> 
        <mime>application/vnd.oasis.opendocument.text</mime> 
        <mime>application/vnd.oasis.opendocument.graphics</mime> 
        <mime>application/vnd.oasis.opendocument.presentation</mime> 
        <mime>application/vnd.oasis.opendocument.spreadsheet</mime> 
        <mime>application/vnd.oasis.opendocument.chart</mime> 
        <mime>application/vnd.oasis.opendocument.image</mime> 
        <mime>application/vnd.oasis.opendocument.formula</mime> 
        <mime>application/vnd.oasis.opendocument.text-master</mime> 
        <mime>application/vnd.oasis.opendocument.text-web</mime> 
        <mime>application/vnd.oasis.opendocument.text-template</mime> 
        <mime>application/vnd.oasis.opendocument.graphics-template</mime> 
        <mime>application/vnd.oasis.opendocument.presentation-template</mime> 
        <mime>application/vnd.oasis.opendocument.spreadsheet-template</mime> 
        <mime>application/vnd.oasis.opendocument.chart-template</mime> 
        <mime>application/vnd.oasis.opendocument.image-template</mime> 
        <mime>application/vnd.oasis.opendocument.formula-template</mime> 
        <mime>application/x-vnd.oasis.opendocument.text</mime> 
        <mime>application/x-vnd.oasis.opendocument.graphics</mime> 
        <mime>application/x-vnd.oasis.opendocument.presentation</mime> 
        <mime>application/x-vnd.oasis.opendocument.spreadsheet</mime> 
        <mime>application/x-vnd.oasis.opendocument.chart</mime> 
        <mime>application/x-vnd.oasis.opendocument.image</mime> 
        <mime>application/x-vnd.oasis.opendocument.formula</mime> 
        <mime>application/x-vnd.oasis.opendocument.text-master</mime> 
        <mime>application/x-vnd.oasis.opendocument.text-web</mime> 
        <mime>application/x-vnd.oasis.opendocument.text-template</mime> 
        <mime>application/x-vnd.oasis.opendocument.graphics-template</mime> 
        <mime>application/x-vnd.oasis.opendocument.presentation-template</mime> 
        <mime>application/x-vnd.oasis.opendocument.spreadsheet-template</mime> 
        <mime>application/x-vnd.oasis.opendocument.chart-template</mime> 
        <mime>application/x-vnd.oasis.opendocument.image-template</mime> 
        <mime>application/x-vnd.oasis.opendocument.formula-template</mime> 
    </parser> 
    
    <parser name="parse-image" class="org.apache.tika.parser.image.ImageParser"> 
        <mime>image/bmp</mime> 
        <mime>image/gif</mime> 
        <mime>image/jpeg</mime> 
        <mime>image/png</mime> 
        <mime>image/tiff</mime> 
        <mime>image/vnd.wap.wbmp</mime> 
        <mime>image/x-icon</mime> 
        <mime>image/x-psd</mime> 
        <mime>image/x-xcf</mime> 
    </parser> 
    
    <parser name="parse-class" class="org.apache.tika.parser.asm.ClassParser"> 
        <mime>application/x-tika-java-class</mime> 
    </parser> 
    
    <parser name="parse-mp3" class="org.apache.tika.parser.mp3.Mp3Parser"> 
        <mime>audio/mpeg</mime> 
    </parser> 
    
    <parser name="parse-midi" class="org.apache.tika.parser.audio.MidiParser"> 
        <mime>application/x-midi</mime> 
        <mime>audio/midi</mime> 
    </parser> 
    
    <parser name="parse-audio" class="org.apache.tika.parser.audio.AudioParser"> 
        <mime>audio/basic</mime> 
        <mime>audio/x-wav</mime> 
        <mime>audio/x-aiff</mime> 
    </parser> 
    
    </parsers> 
    
    </properties> 
    

Tutte query dal lavoro di fagioli ad eccezione quando chiamo la funzione public List<Fichier> findAllByContains(String path,String motCles) alla ricerca di testo completo nel documento .docx e .xslx. Ricerca fulltext su .pdf, .txt, .xml, .xls, .doc, ... funziona perfettamente.

+0

Qualcuno ha trovato una soluzione? – Aroniaina

risposta

0

La soluzione si concentra sui JAR dello jackrabbit-jca-2.7.5.rar!

ci sono errori sulla dipendenza in modo da rendere questi cambiamenti:

  • aggiungere apache-mime4j-0.6.jar
  • aggiungere apache-mime4j-core-0.7.jar
  • aggiungere commons-comprime- 1.5.jar

Aggiungere questi JAR nel jackrabbit-jca-2.7.5.rar prima di distribuire questo!

E l'indicizzazione di .docx, .xlsx, ... wors con successo!

Grazie per @Ashok Felix

1

Rif: http://jackrabbit.510166.n4.nabble.com/Office-2007-documents-not-being-indexed-in-Jackrabbit-2-4-3-td4657380.html

Sulla stessa linea, ho osservato commons-impacco-1.5.jar è richiesto da Tika parser in caso di tipi di documenti OOXML (cioè di Office 2007 documenti).

Ora, io sono in grado di indicizzare & ricerca la maggior parte dei tipi di documenti (ufficio 2007 - docx, pptx, xlsx, Office 2003 - doc, ppt, xls, pdf) utilizzando sotto 2 fasi:

(1) repository.xml aggiornato & aggiunto Ulteriori dettagli sono disponibili all'indirizzo https://issues.apache.org/jira/browse/JCR-3287

(2) aggiunto classpath commons-impacco-1.5.jar durante l'esecuzione lepre-alone-2.6.2.jar

Problemi correlati