You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2017/06/19 10:22:13 UTC

svn commit: r1799173 - in /jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index: IndexCommand.java IndexHelper.java IndexOptions.java LuceneIndexHelper.java

Author: chetanm
Date: Mon Jun 19 10:22:13 2017
New Revision: 1799173

URL: http://svn.apache.org/viewvc?rev=1799173&view=rev
Log:
OAK-6248 - Enable use of pre extracted text cache

Pre extracted text directory can be specified with `--pre-extracted-text-dir` option

Modified:
    jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java
    jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexHelper.java
    jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
    jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java

Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java?rev=1799173&r1=1799172&r2=1799173&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexCommand.java Mon Jun 19 10:22:13 2017
@@ -101,6 +101,8 @@ public class IndexCommand implements Com
         IndexHelper indexHelper = new IndexHelper(store, blobStore, statisticsProvider, indexOpts.getOutDir(),
                 indexOpts.getWorkDir(), indexOpts.getIndexPaths());
 
+        configurePreExtractionSupport(indexOpts, indexHelper);
+
         closer.register(indexHelper);
 
         dumpIndexStats(indexOpts, indexHelper);
@@ -110,6 +112,14 @@ public class IndexCommand implements Com
         reindexIndex(indexOpts, indexHelper);
     }
 
+    private void configurePreExtractionSupport(IndexOptions indexOpts, IndexHelper indexHelper) throws IOException {
+        File preExtractedTextDir = indexOpts.getPreExtractedTextDir();
+        if (preExtractedTextDir != null) {
+            indexHelper.setPreExtractedTextDir(preExtractedTextDir);
+            log.info("Using pre-extracted text directory {}", getPath(preExtractedTextDir));
+        }
+    }
+
     private void reindexIndex(IndexOptions indexOpts, IndexHelper indexHelper) throws IOException, CommitFailedException {
         if (!indexOpts.isReindex()){
             return;

Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexHelper.java?rev=1799173&r1=1799172&r2=1799173&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexHelper.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexHelper.java Mon Jun 19 10:22:13 2017
@@ -35,6 +35,7 @@ import javax.annotation.Nonnull;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.io.Closer;
+import org.apache.commons.io.FileUtils;
 import org.apache.jackrabbit.oak.commons.concurrent.ExecutorCloser;
 import org.apache.jackrabbit.oak.plugins.index.AsyncIndexInfoService;
 import org.apache.jackrabbit.oak.plugins.index.AsyncIndexInfoServiceImpl;
@@ -42,8 +43,10 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.plugins.index.IndexInfoServiceImpl;
 import org.apache.jackrabbit.oak.plugins.index.IndexPathService;
 import org.apache.jackrabbit.oak.plugins.index.IndexPathServiceImpl;
+import org.apache.jackrabbit.oak.plugins.index.datastore.DataStoreTextWriter;
 import org.apache.jackrabbit.oak.plugins.index.inventory.IndexDefinitionPrinter;
 import org.apache.jackrabbit.oak.plugins.index.inventory.IndexPrinter;
+import org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache;
 import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexInfoProvider;
 import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexInfoProvider;
 import org.apache.jackrabbit.oak.spi.blob.BlobStore;
@@ -68,6 +71,7 @@ class IndexHelper implements Closeable{
     private final Closer closer = Closer.create();
     private final BlobStore blobStore;
     private final StatisticsProvider statisticsProvider;
+    private ExtractedTextCache extractedTextCache;
 
     IndexHelper(NodeStore store, BlobStore blobStore, StatisticsProvider statisticsProvider,
                 File outputDir, File workDir, List<String> indexPaths) {
@@ -143,6 +147,17 @@ class IndexHelper implements Closeable{
         return luceneIndexHelper;
     }
 
+    public ExtractedTextCache getExtractedTextCache() {
+        if (extractedTextCache == null) {
+            extractedTextCache = new ExtractedTextCache(FileUtils.ONE_MB * 5, TimeUnit.HOURS.toSeconds(5));
+        }
+        return extractedTextCache;
+    }
+
+    public void setPreExtractedTextDir(File dir) throws IOException {
+        getExtractedTextCache().setExtractedTextProvider(new DataStoreTextWriter(dir, true));
+    }
+
     @Override
     public void close() throws IOException {
         closer.close();

Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java?rev=1799173&r1=1799172&r2=1799173&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/IndexOptions.java Mon Jun 19 10:22:13 2017
@@ -39,15 +39,11 @@ import org.apache.jackrabbit.oak.run.cli
 
 public class IndexOptions implements OptionsBean {
 
-    public static final OptionsBeanFactory FACTORY = new OptionsBeanFactory() {
-        @Override
-        public OptionsBean newInstance(OptionParser parser) {
-            return new IndexOptions(parser);
-        }
-    };
+    public static final OptionsBeanFactory FACTORY = IndexOptions::new;
 
     private final OptionSpec<File> workDirOpt;
     private final OptionSpec<File> outputDirOpt;
+    private final OptionSpec<File> preExtractedTextOpt;
     private final OptionSpec<Void> stats;
     private final OptionSpec<Void> definitions;
     private final OptionSpec<Void> dumpIndex;
@@ -65,6 +61,8 @@ public class IndexOptions implements Opt
                 .withRequiredArg().ofType(File.class).defaultsTo(new File("temp"));
         outputDirOpt = parser.accepts("index-out-dir", "Directory used for output files")
                 .withRequiredArg().ofType(File.class).defaultsTo(new File("indexing-result"));
+        preExtractedTextOpt = parser.accepts("pre-extracted-text-dir", "Directory storing pre extracted text")
+                .withRequiredArg().ofType(File.class);
 
         stats = parser.accepts("index-info", "Collects and dumps various statistics related to the indexes");
         definitions = parser.accepts("index-definitions", "Collects and dumps index definitions");
@@ -128,6 +126,10 @@ public class IndexOptions implements Opt
         return outputDirOpt.value(options);
     }
 
+    public File getPreExtractedTextDir() {
+        return preExtractedTextOpt.value(options);
+    }
+
     public boolean dumpStats(){
         return options.has(stats) || !anyActionSelected();
     }

Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java?rev=1799173&r1=1799172&r2=1799173&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java Mon Jun 19 10:22:13 2017
@@ -22,10 +22,7 @@ package org.apache.jackrabbit.oak.index;
 import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
-import java.util.concurrent.TimeUnit;
 
-import org.apache.commons.io.FileUtils;
-import org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache;
 import org.apache.jackrabbit.oak.plugins.index.lucene.IndexCopier;
 import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider;
 import org.apache.jackrabbit.oak.plugins.index.lucene.directory.ActiveDeletedBlobCollectorFactory.BlobDeletionCallback;
@@ -35,9 +32,6 @@ import org.apache.jackrabbit.oak.spi.blo
 class LuceneIndexHelper implements Closeable {
     private final IndexHelper indexHelper;
     private IndexCopier indexCopier;
-    //TODO Set pre extracted text provider
-    private final ExtractedTextCache textCache =
-            new ExtractedTextCache(FileUtils.ONE_MB * 5, TimeUnit.HOURS.toSeconds(5));
     private DirectoryFactory directoryFactory;
 
     LuceneIndexHelper(IndexHelper indexHelper) {
@@ -49,7 +43,7 @@ class LuceneIndexHelper implements Close
         if (directoryFactory != null) {
             editor = new LuceneIndexEditorProvider(
                     getIndexCopier(),
-                    textCache,
+                    indexHelper.getExtractedTextCache(),
                     null,
                     indexHelper.getMountInfoProvider()
             ) {
@@ -61,7 +55,7 @@ class LuceneIndexHelper implements Close
         } else {
             editor = new LuceneIndexEditorProvider(
                     getIndexCopier(),
-                    textCache,
+                    indexHelper.getExtractedTextCache(),
                     null,
                     indexHelper.getMountInfoProvider()
             );