You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2015/07/13 13:27:40 UTC
svn commit: r1690635 - in /jackrabbit/oak/trunk/oak-core: ./
src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/
src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/
Author: chetanm
Date: Mon Jul 13 11:27:39 2015
New Revision: 1690635
URL: http://svn.apache.org/r1690635
Log:
OAK-2892 - Speed up lucene indexing post migration by pre extracting the text content from binaries
-- DataStoreTextProviderService - OSGi component to configure and register DataStoreTextWriter
-- Exported org.apache.jackrabbit.oak.plugins.index.fulltext package
-- Minor refactoring to enable using singleton instance for ERROR and EMPTY cases
Added:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextProviderService.java (with props)
Modified:
jackrabbit/oak/trunk/oak-core/pom.xml
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java
Modified: jackrabbit/oak/trunk/oak-core/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/pom.xml?rev=1690635&r1=1690634&r2=1690635&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-core/pom.xml Mon Jul 13 11:27:39 2015
@@ -59,6 +59,7 @@
org.apache.jackrabbit.oak.plugins.commit,
org.apache.jackrabbit.oak.plugins.identifier,
org.apache.jackrabbit.oak.plugins.index,
+ org.apache.jackrabbit.oak.plugins.index.fulltext,
org.apache.jackrabbit.oak.plugins.index.aggregate,
org.apache.jackrabbit.oak.plugins.index.counter,
org.apache.jackrabbit.oak.plugins.index.nodetype,
Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextProviderService.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextProviderService.java?rev=1690635&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextProviderService.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextProviderService.java Mon Jul 13 11:27:39 2015
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.blob.datastore;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.jackrabbit.oak.commons.PropertiesUtil;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
+import org.osgi.framework.BundleContext;
+import org.osgi.framework.ServiceRegistration;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+
+@Component(
+ policy = ConfigurationPolicy.REQUIRE,
+ metatype = true,
+ label = "Apache Jackrabbit Oak DataStore PreExtractedTextProvider",
+ description = "Configures a PreExtractedTextProvider based on extracted text stored on FileSystem"
+)
+public class DataStoreTextProviderService {
+ @Property(
+ label = "Path",
+ description = "Local file system path where extracted text is stored in files."
+ )
+ private static final String PROP_DIR = "dir";
+
+ private DataStoreTextWriter textWriter;
+
+ private ServiceRegistration reg;
+
+ @Activate
+ private void activate(BundleContext context, Map<String,? > config) throws IOException {
+ String dirPath = PropertiesUtil.toString(config.get(PROP_DIR), null);
+
+ checkNotNull(dirPath, "Directory path not configured via '%s", PROP_DIR);
+ File dir = new File(dirPath);
+ checkArgument(dir.exists(), "Directory %s does not exist", dir.getAbsolutePath());
+ textWriter = new DataStoreTextWriter(dir, true);
+ reg = context.registerService(PreExtractedTextProvider.class.getName(), textWriter, null);
+ }
+
+ @Deactivate
+ private void deactivate() throws IOException {
+ textWriter.close();
+
+ if (reg != null){
+ reg.unregister();
+ }
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextProviderService.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java?rev=1690635&r1=1690634&r2=1690635&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTextWriter.java Mon Jul 13 11:27:39 2015
@@ -94,9 +94,9 @@ public class DataStoreTextWriter impleme
blobId = stripLength(blobId);
ExtractedText result = null;
if (getEmptyBlobs().contains(blobId)) {
- result = new ExtractedText(ExtractionResult.EMPTY, null);
+ result = ExtractedText.EMPTY;
} else if (getErrorBlobs().contains(blobId)) {
- result = new ExtractedText(ExtractionResult.ERROR, null);
+ result = ExtractedText.ERROR;
} else {
File textFile = getFile(blobId);
if (textFile.exists()) {
@@ -152,6 +152,11 @@ public class DataStoreTextWriter impleme
closed = true;
}
+ @Override
+ public String toString() {
+ return "FileDataStore based text provider";
+ }
+
SetHolder getEmptyBlobsHolder(){
return emptyBlobsHolder;
}
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java?rev=1690635&r1=1690634&r2=1690635&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/fulltext/ExtractedText.java Mon Jul 13 11:27:39 2015
@@ -44,9 +44,17 @@ public class ExtractedText {
ERROR
}
+ public static final ExtractedText ERROR = new ExtractedText(ExtractionResult.ERROR);
+
+ public static final ExtractedText EMPTY = new ExtractedText(ExtractionResult.ERROR, "");
+
private final ExtractionResult extractionResult;
private final CharSequence extractedText;
+ public ExtractedText(@Nonnull ExtractionResult extractionResult){
+ this(extractionResult, null);
+ }
+
public ExtractedText(@Nonnull ExtractionResult extractionResult,CharSequence extractedText) {
this.extractionResult = extractionResult;
this.extractedText = extractedText;