You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by ju...@apache.org on 2011/04/12 16:53:36 UTC
svn commit: r1091439 -
/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
Author: jukka
Date: Tue Apr 12 14:53:36 2011
New Revision: 1091439
URL: http://svn.apache.org/viewvc?rev=1091439&view=rev
Log:
JCR-2864: Use out-of-process text extraction
Add a forkJavaCommand configuration option that enables forked parsing.
Modified:
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java?rev=1091439&r1=1091438&r2=1091439&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java Tue Apr 12 14:53:36 2011
@@ -61,11 +61,16 @@ import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fork.ForkParser;
+import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Element;
+import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import javax.jcr.RepositoryException;
@@ -216,6 +221,12 @@ public class SearchIndex extends Abstrac
private String tikaConfigPath = null;
/**
+ * Java command used to fork external parser processes,
+ * or <code>null</code> (the default) for in-process text extraction.
+ */
+ private String forkJavaCommand = null;
+
+ /**
* The Tika parser for extracting text content from binary properties.
* Initialized by the {@link #getParser()} method during first access.
*/
@@ -895,6 +906,26 @@ public class SearchIndex extends Abstrac
}
/**
+ * Returns the java command used to fork external parser processes,
+ * or <code>null</code> (the default) for in-process text extraction.
+ *
+ * @return fork java command
+ */
+ public String getForkJavaCommand() {
+ return forkJavaCommand;
+ }
+
+ /**
+ * Sets the java command used to fork external parser processes.
+ *
+ * @param command fork java command,
+ * or <code>null</code> for in-process extraction
+ */
+ public void setForkJavaCommand(String command) {
+ this.forkJavaCommand = command;
+ }
+
+ /**
* Returns the parser used for extracting text content
* from binary properties for full text indexing.
*
@@ -932,7 +963,16 @@ public class SearchIndex extends Abstrac
config = TikaConfig.getDefaultConfig();
}
- parser = new AutoDetectParser(config);
+ if (forkJavaCommand == null) {
+ parser = new AutoDetectParser(config);
+ } else {
+ ForkParser forkParser = new ForkParser(
+ SearchIndex.class.getClassLoader(),
+ new AutoDetectParser(config));
+ forkParser.setJavaCommand(forkJavaCommand);
+ forkParser.setPoolSize(extractorPoolSize);
+ parser = forkParser;
+ }
}
return parser;
}