You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2015/07/13 13:27:40 UTC

svn commit: r1690635 - in /jackrabbit/oak/trunk/oak-core: ./ src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/ src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/

Author: chetanm
Date: Mon Jul 13 11:27:39 2015
New Revision: 1690635

URL: http://svn.apache.org/r1690635
Log:
OAK-2892 - Speed up lucene indexing post migration by pre extracting the text content from binaries

-- DataStoreTextProviderService - OSGi component to configure and register DataStoreTextWriter
-- Exported org.apache.jackrabbit.oak.plugins.index.fulltext package
-- Minor refactoring to enable using singleton instance for ERROR and EMPTY cases

Added:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextProviderService.java   (with props)
Modified:
    jackrabbit/oak/trunk/oak-core/pom.xml
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java

Modified: jackrabbit/oak/trunk/oak-core/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/pom.xml?rev=1690635&r1=1690634&r2=1690635&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-core/pom.xml Mon Jul 13 11:27:39 2015
@@ -59,6 +59,7 @@
               org.apache.jackrabbit.oak.plugins.commit,
               org.apache.jackrabbit.oak.plugins.identifier,
               org.apache.jackrabbit.oak.plugins.index,
+              org.apache.jackrabbit.oak.plugins.index.fulltext,
               org.apache.jackrabbit.oak.plugins.index.aggregate,
               org.apache.jackrabbit.oak.plugins.index.counter,
               org.apache.jackrabbit.oak.plugins.index.nodetype,

Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextProviderService.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextProviderService.java?rev=1690635&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextProviderService.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextProviderService.java Mon Jul 13 11:27:39 2015
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.blob.datastore;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.jackrabbit.oak.commons.PropertiesUtil;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
+import org.osgi.framework.BundleContext;
+import org.osgi.framework.ServiceRegistration;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+
+@Component(
+        policy = ConfigurationPolicy.REQUIRE,
+        metatype = true,
+        label = "Apache Jackrabbit Oak DataStore PreExtractedTextProvider",
+        description = "Configures a PreExtractedTextProvider based on extracted text stored on FileSystem"
+)
+public class DataStoreTextProviderService {
+    @Property(
+            label = "Path",
+            description = "Local file system path where extracted text is stored in files."
+    )
+    private static final String PROP_DIR = "dir";
+
+    private DataStoreTextWriter textWriter;
+
+    private ServiceRegistration reg;
+
+    @Activate
+    private void activate(BundleContext context, Map<String,? > config) throws IOException {
+        String dirPath = PropertiesUtil.toString(config.get(PROP_DIR), null);
+
+        checkNotNull(dirPath, "Directory path not configured via '%s", PROP_DIR);
+        File dir = new File(dirPath);
+        checkArgument(dir.exists(), "Directory %s does not exist", dir.getAbsolutePath());
+        textWriter = new DataStoreTextWriter(dir, true);
+        reg = context.registerService(PreExtractedTextProvider.class.getName(), textWriter, null);
+    }
+
+    @Deactivate
+    private void deactivate() throws IOException {
+        textWriter.close();
+
+        if (reg != null){
+            reg.unregister();
+        }
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextProviderService.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java?rev=1690635&r1=1690634&r2=1690635&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java Mon Jul 13 11:27:39 2015
@@ -94,9 +94,9 @@ public class DataStoreTextWriter impleme
         blobId = stripLength(blobId);
         ExtractedText result = null;
         if (getEmptyBlobs().contains(blobId)) {
-            result = new ExtractedText(ExtractionResult.EMPTY, null);
+            result = ExtractedText.EMPTY;
         } else if (getErrorBlobs().contains(blobId)) {
-            result = new ExtractedText(ExtractionResult.ERROR, null);
+            result = ExtractedText.ERROR;
         } else {
             File textFile = getFile(blobId);
             if (textFile.exists()) {
@@ -152,6 +152,11 @@ public class DataStoreTextWriter impleme
         closed = true;
     }
 
+    @Override
+    public String toString() {
+        return "FileDataStore based text provider";
+    }
+
     SetHolder getEmptyBlobsHolder(){
         return emptyBlobsHolder;
     }

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java?rev=1690635&r1=1690634&r2=1690635&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java Mon Jul 13 11:27:39 2015
@@ -44,9 +44,17 @@ public class ExtractedText {
         ERROR
     }
 
+    public static final ExtractedText ERROR = new ExtractedText(ExtractionResult.ERROR);
+
+    public static final ExtractedText EMPTY = new ExtractedText(ExtractionResult.ERROR, "");
+
     private final ExtractionResult extractionResult;
     private final CharSequence extractedText;
 
+    public ExtractedText(@Nonnull ExtractionResult extractionResult){
+        this(extractionResult, null);
+    }
+
     public ExtractedText(@Nonnull ExtractionResult extractionResult,CharSequence extractedText) {
         this.extractionResult = extractionResult;
         this.extractedText = extractedText;