You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2015/07/14 12:12:46 UTC
svn commit: r1690897 - in /jackrabbit/oak/branches/1.2: ./
oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/
oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/
Author: chetanm
Date: Tue Jul 14 10:12:45 2015
New Revision: 1690897
URL: http://svn.apache.org/r1690897
Log:
OAK-2892 - Speed up lucene indexing post migration by pre extracting the text content from binaries
Merging 1690637,1690650,1690885
Added:
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ExtractedTextCache.java
- copied unchanged from r1690637, jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ExtractedTextCache.java
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/TextExtractionStatsMBean.java
- copied unchanged from r1690637, jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/TextExtractionStatsMBean.java
Modified:
jackrabbit/oak/branches/1.2/ (props changed)
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
Propchange: jackrabbit/oak/branches/1.2/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Jul 14 10:12:45 2015
@@ -1,3 +1,3 @@
/jackrabbit/oak/branches/1.0:1665962
-/jackrabbit/oak/trunk:1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673414-1673415,1673436,1673644,1673662-1673664,1673669,1673695,1673738,1673787,1673791,1674046,1674065,1674075,1674107,1674228,1674780,1674880,1675054-1675055,1675319,1675332,1675354,1675357,1675382,1675555,1675566,1675593,1676198,1676237,1676407,1676458,1676539,1676670,1676693,1676703,1676725,1677579,1677581,1677609,1677611,1677774,1677788,1677797,1677804,1677806,1677939,1677991,1678023,1678095-1678096,1678171,1678173,1678211,1678323,1678758,1678938,1678954,1679144,1679165,1679191,1679232,1679235,1679503,1679958,1679961,1680170,1680182,1680222,1680232,1680236,1680461,1680633,1680643,1680747,1680805-1680806,1680903,1681282,1681767,1681918,1682042,1682218,1682235,1682437,1682494,1682555,1682855,1682904,1683059,1683089,1683213,1683249,1683259,1683278,1683323,1683687,1683700,1684174-1684175,1684186,1684376,1684442,1684561,1684570,1684601,1684618,1684820,1684868,1685023,1685370,1685552
,1685589-1685590,1685840,1685964,1685977,1685989,1685999,1686023,1686032,1686097,1686162,1686229,1686234,1686253,1686414,1686780,1686854,1686857,1686971,1687053-1687055,1687175,1687196,1687198,1687220,1687239-1687240,1687301,1687441,1687553,1688089-1688090,1688172,1688179,1688349,1688421,1688436,1688453,1688616,1688622,1688636,1688817,1689003-1689004,1689008,1689577,1689581,1689623,1689810,1689828,1689833,1689903,1690017,1690043,1690047,1690057,1690247,1690249,1690634-1690636,1690669,1690674
+/jackrabbit/oak/trunk:1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673414-1673415,1673436,1673644,1673662-1673664,1673669,1673695,1673738,1673787,1673791,1674046,1674065,1674075,1674107,1674228,1674780,1674880,1675054-1675055,1675319,1675332,1675354,1675357,1675382,1675555,1675566,1675593,1676198,1676237,1676407,1676458,1676539,1676670,1676693,1676703,1676725,1677579,1677581,1677609,1677611,1677774,1677788,1677797,1677804,1677806,1677939,1677991,1678023,1678095-1678096,1678171,1678173,1678211,1678323,1678758,1678938,1678954,1679144,1679165,1679191,1679232,1679235,1679503,1679958,1679961,1680170,1680182,1680222,1680232,1680236,1680461,1680633,1680643,1680747,1680805-1680806,1680903,1681282,1681767,1681918,1682042,1682218,1682235,1682437,1682494,1682555,1682855,1682904,1683059,1683089,1683213,1683249,1683259,1683278,1683323,1683687,1683700,1684174-1684175,1684186,1684376,1684442,1684561,1684570,1684601,1684618,1684820,1684868,1685023,1685370,1685552
,1685589-1685590,1685840,1685964,1685977,1685989,1685999,1686023,1686032,1686097,1686162,1686229,1686234,1686253,1686414,1686780,1686854,1686857,1686971,1687053-1687055,1687175,1687196,1687198,1687220,1687239-1687240,1687301,1687441,1687553,1688089-1688090,1688172,1688179,1688349,1688421,1688436,1688453,1688616,1688622,1688636,1688817,1689003-1689004,1689008,1689577,1689581,1689623,1689810,1689828,1689833,1689903,1690017,1690043,1690047,1690057,1690247,1690249,1690634-1690637,1690650,1690669,1690674,1690885
/jackrabbit/trunk:1345480
Modified: jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java?rev=1690897&r1=1690896&r2=1690897&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java (original)
+++ jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java Tue Jul 14 10:12:45 2015
@@ -52,6 +52,8 @@ import org.apache.jackrabbit.oak.commons
import org.apache.jackrabbit.oak.plugins.index.IndexEditor;
import org.apache.jackrabbit.oak.plugins.index.IndexUpdateCallback;
import org.apache.jackrabbit.oak.plugins.index.PathFilter;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText.ExtractionResult;
import org.apache.jackrabbit.oak.plugins.index.lucene.Aggregate.Matcher;
import org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState;
import org.apache.jackrabbit.oak.plugins.tree.TreeFactory;
@@ -86,6 +88,7 @@ public class LuceneIndexEditor implement
private static final Logger log =
LoggerFactory.getLogger(LuceneIndexEditor.class);
+ static final String TEXT_EXTRACTION_ERROR = "TextExtractionError";
private final LuceneIndexEditorContext context;
@@ -122,12 +125,14 @@ public class LuceneIndexEditor implement
private final PathFilter.Result pathFilterResult;
LuceneIndexEditor(NodeState root, NodeBuilder definition,
- IndexUpdateCallback updateCallback,@Nullable IndexCopier indexCopier) throws CommitFailedException {
+ IndexUpdateCallback updateCallback,
+ @Nullable IndexCopier indexCopier,
+ ExtractedTextCache extractedTextCache) throws CommitFailedException {
this.parent = null;
this.name = null;
this.path = "/";
this.context = new LuceneIndexEditorContext(root, definition,
- updateCallback, indexCopier);
+ updateCallback, indexCopier, extractedTextCache);
this.root = root;
this.isDeleted = false;
this.matcherState = MatcherState.NONE;
@@ -554,12 +559,16 @@ public class LuceneIndexEditor implement
}
for (Blob v : property.getValue(Type.BINARIES)) {
+ String value = parseStringValue(v, metadata, path, property.getName());
+ if (value == null){
+ continue;
+ }
+
if (nodePath != null){
- fields.add(newFulltextField(nodePath, parseStringValue(v, metadata, path)));
+ fields.add(newFulltextField(nodePath, value));
} else {
- fields.add(newFulltextField(parseStringValue(v, metadata, path)));
+ fields.add(newFulltextField(value));
}
-
}
return fields;
}
@@ -832,16 +841,24 @@ public class LuceneIndexEditor implement
return context.isSupportedMediaType(type);
}
- private String parseStringValue(Blob v, Metadata metadata, String path) {
+ private String parseStringValue(Blob v, Metadata metadata, String path, String propertyName) {
+ String text = context.getExtractedTextCache().get(path, propertyName, v, context.isReindex());
+ if (text == null){
+ text = parseStringValue0(v, metadata, path);
+ }
+ return text;
+ }
+
+ private String parseStringValue0(Blob v, Metadata metadata, String path) {
WriteOutContentHandler handler = new WriteOutContentHandler(context.getDefinition().getMaxExtractLength());
long start = System.currentTimeMillis();
- long size = 0;
+ long bytesRead = 0;
try {
CountingInputStream stream = new CountingInputStream(new LazyInputStream(new BlobByteSource(v)));
try {
context.getParser().parse(stream, handler, metadata, new ParseContext());
} finally {
- size = stream.getCount();
+ bytesRead = stream.getCount();
stream.close();
}
} catch (LinkageError e) {
@@ -859,11 +876,15 @@ public class LuceneIndexEditor implement
+ " worry about. The stack trace is included to"
+ " help improve the text extraction feature.",
getIndexName(), path, t);
- return "TextExtractionError";
+ context.getExtractedTextCache().put(v, ExtractedText.ERROR);
+ return TEXT_EXTRACTION_ERROR;
}
}
String result = handler.toString();
- context.recordTextExtractionStats(System.currentTimeMillis() - start, size);
+ if (bytesRead > 0) {
+ context.recordTextExtractionStats(System.currentTimeMillis() - start, bytesRead, result.length());
+ }
+ context.getExtractedTextCache().put(v, new ExtractedText(ExtractionResult.SUCCESS, result));
return result;
}
Modified: jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java?rev=1690897&r1=1690896&r2=1690897&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java (original)
+++ jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java Tue Jul 14 10:12:45 2015
@@ -130,18 +130,20 @@ public class LuceneIndexEditorContext {
private final TextExtractionStats textExtractionStats = new TextExtractionStats();
+ private final ExtractedTextCache extractedTextCache;
/**
* The media types supported by the parser used.
*/
private Set<MediaType> supportedMediaTypes;
LuceneIndexEditorContext(NodeState root, NodeBuilder definition, IndexUpdateCallback updateCallback,
- @Nullable IndexCopier indexCopier) {
+ @Nullable IndexCopier indexCopier, ExtractedTextCache extractedTextCache) {
this.definitionBuilder = definition;
this.indexCopier = indexCopier;
this.definition = new IndexDefinition(root, definition);
this.indexedNodes = 0;
this.updateCallback = updateCallback;
+ this.extractedTextCache = extractedTextCache;
if (this.definition.isOfOldFormat()){
IndexDefinition.updateDefinition(definition);
}
@@ -200,6 +202,7 @@ public class LuceneIndexEditorContext {
PERF_LOGGER.end(start, -1, "Closed IndexWriter for directory {}", definition);
textExtractionStats.log(reindex);
+ textExtractionStats.collectStats(extractedTextCache);
}
}
@@ -269,8 +272,22 @@ public class LuceneIndexEditorContext {
return definition;
}
- public void recordTextExtractionStats(long timeInMillis, long size) {
- textExtractionStats.addStats(timeInMillis, size);
+ @Deprecated
+ public void recordTextExtractionStats(long timeInMillis, long bytesRead) {
+ //Keeping deprecated method to avoid major version change
+ recordTextExtractionStats(timeInMillis, bytesRead, 0);
+ }
+
+ public void recordTextExtractionStats(long timeInMillis, long bytesRead, int textLength) {
+ textExtractionStats.addStats(timeInMillis, bytesRead, textLength);
+ }
+
+ ExtractedTextCache getExtractedTextCache() {
+ return extractedTextCache;
+ }
+
+ public boolean isReindex() {
+ return reindex;
}
private static Parser initializeTikaParser(IndexDefinition definition) {
@@ -324,15 +341,17 @@ public class LuceneIndexEditorContext {
/**
* Log stats only if time spent is more than 2 min
*/
- private static final long LOGGING_THRESHOLD = TimeUnit.MINUTES.toMillis(2);
+ private static final long LOGGING_THRESHOLD = TimeUnit.MINUTES.toMillis(1);
private int count;
- private long totalSize;
+ private long totalBytesRead;
private long totalTime;
+ private long totalTextLength;
- public void addStats(long timeInMillis, long size) {
+ public void addStats(long timeInMillis, long bytesRead, int textLength) {
count++;
- totalSize += size;
+ totalBytesRead += bytesRead;
totalTime += timeInMillis;
+ totalTextLength += textLength;
}
public void log(boolean reindex) {
@@ -343,6 +362,10 @@ public class LuceneIndexEditorContext {
}
}
+ public void collectStats(ExtractedTextCache cache){
+ cache.addStats(count, totalTime, totalBytesRead, totalTextLength);
+ }
+
private boolean isTakingLotsOfTime() {
return totalTime > LOGGING_THRESHOLD;
}
@@ -353,8 +376,11 @@ public class LuceneIndexEditorContext {
@Override
public String toString() {
- return String.format(" %d (%s, %s)", count,
- timeInWords(totalTime), humanReadableByteCount(totalSize));
+ return String.format(" %d (Time Taken %s, Bytes Read %s, Extracted text size %s)",
+ count,
+ timeInWords(totalTime),
+ humanReadableByteCount(totalBytesRead),
+ humanReadableByteCount(totalTextLength));
}
private static String timeInWords(long millis) {
Modified: jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java?rev=1690897&r1=1690896&r2=1690897&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java (original)
+++ jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java Tue Jul 14 10:12:45 2015
@@ -38,13 +38,20 @@ import org.apache.jackrabbit.oak.spi.sta
*/
public class LuceneIndexEditorProvider implements IndexEditorProvider {
private final IndexCopier indexCopier;
+ private final ExtractedTextCache extractedTextCache;
public LuceneIndexEditorProvider() {
this(null);
}
public LuceneIndexEditorProvider(@Nullable IndexCopier indexCopier) {
+ this(indexCopier, new ExtractedTextCache());
+ }
+
+ public LuceneIndexEditorProvider(@Nullable IndexCopier indexCopier,
+ ExtractedTextCache extractedTextCache) {
this.indexCopier = indexCopier;
+ this.extractedTextCache = extractedTextCache;
}
@Override
@@ -53,7 +60,7 @@ public class LuceneIndexEditorProvider i
@Nonnull IndexUpdateCallback callback)
throws CommitFailedException {
if (TYPE_LUCENE.equals(type)) {
- return new LuceneIndexEditor(root, definition, callback, indexCopier);
+ return new LuceneIndexEditor(root, definition, callback, indexCopier, extractedTextCache);
}
return null;
}
@@ -61,4 +68,8 @@ public class LuceneIndexEditorProvider i
IndexCopier getIndexCopier() {
return indexCopier;
}
+
+ ExtractedTextCache getExtractedTextCache() {
+ return extractedTextCache;
+ }
}
Modified: jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java?rev=1690897&r1=1690896&r2=1690897&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java (original)
+++ jackrabbit/oak/branches/1.2/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java Tue Jul 14 10:12:45 2015
@@ -48,6 +48,7 @@ import org.apache.jackrabbit.oak.commons
import org.apache.jackrabbit.oak.osgi.OsgiWhiteboard;
import org.apache.jackrabbit.oak.plugins.index.IndexEditorProvider;
import org.apache.jackrabbit.oak.plugins.index.aggregate.NodeAggregator;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
import org.apache.jackrabbit.oak.spi.commit.BackgroundObserver;
import org.apache.jackrabbit.oak.plugins.index.lucene.score.ScorerProviderFactory;
import org.apache.jackrabbit.oak.spi.commit.BackgroundObserverMBean;
@@ -144,6 +145,12 @@ public class LuceneIndexProviderService
@Reference
ScorerProviderFactory scorerFactory;
+ @Reference(policy = ReferencePolicy.DYNAMIC,
+ cardinality = ReferenceCardinality.OPTIONAL_MULTIPLE,
+ policyOption = ReferencePolicyOption.GREEDY
+ )
+ private volatile PreExtractedTextProvider extractedTextProvider;
+
private IndexCopier indexCopier;
private File indexDir;
@@ -152,6 +159,8 @@ public class LuceneIndexProviderService
private int threadPoolSize;
+ private ExtractedTextCache extractedTextCache = new ExtractedTextCache();
+
@Activate
private void activate(BundleContext bundleContext, Map<String, ?> config)
throws NotCompliantMBeanException, IOException {
@@ -231,12 +240,17 @@ public class LuceneIndexProviderService
LuceneIndexEditorProvider editorProvider;
if (enableCopyOnWrite){
initializeIndexCopier(bundleContext, config);
- editorProvider = new LuceneIndexEditorProvider(indexCopier);
+ editorProvider = new LuceneIndexEditorProvider(indexCopier, extractedTextCache);
log.info("Enabling CopyOnWrite support. Index files would be copied under {}", indexDir.getAbsolutePath());
} else {
- editorProvider = new LuceneIndexEditorProvider();
+ editorProvider = new LuceneIndexEditorProvider(null, extractedTextCache);
}
regs.add(bundleContext.registerService(IndexEditorProvider.class.getName(), editorProvider, null));
+ oakRegs.add(registerMBean(whiteboard,
+ TextExtractionStatsMBean.class,
+ editorProvider.getExtractedTextCache().getStatsMBean(),
+ TextExtractionStatsMBean.TYPE,
+ "TextExtraction statistics"));
}
private IndexTracker createTracker(BundleContext bundleContext, Map<String, ?> config) throws IOException {
@@ -359,6 +373,17 @@ public class LuceneIndexProviderService
TokenFilterFactory.reloadTokenFilters(classLoader);
}
+ private void registerExtractedTextProvider(PreExtractedTextProvider provider){
+ if (extractedTextCache != null){
+ if (provider != null){
+ log.info("Registering PreExtractedTextProvider {} with extracted text cache", provider);
+ } else {
+ log.info("Unregistering PreExtractedTextProvider with extracted text cache");
+ }
+ extractedTextCache.setExtractedTextProvider(provider);
+ }
+ }
+
protected void bindNodeAggregator(NodeAggregator aggregator) {
this.nodeAggregator = aggregator;
@@ -370,4 +395,14 @@ public class LuceneIndexProviderService
initialize();
}
+ protected void bindExtractedTextProvider(PreExtractedTextProvider preExtractedTextProvider){
+ this.extractedTextProvider = preExtractedTextProvider;
+ registerExtractedTextProvider(preExtractedTextProvider);
+ }
+
+ protected void unbindExtractedTextProvider(PreExtractedTextProvider preExtractedTextProvider){
+ this.extractedTextProvider = null;
+ registerExtractedTextProvider(null);
+ }
+
}
Modified: jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java?rev=1690897&r1=1690896&r2=1690897&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java (original)
+++ jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java Tue Jul 14 10:12:45 2015
@@ -19,10 +19,14 @@
package org.apache.jackrabbit.oak.plugins.index.lucene;
+import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
+import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.plugins.index.IndexEditorProvider;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
import org.apache.jackrabbit.oak.spi.commit.BackgroundObserver;
import org.apache.jackrabbit.oak.spi.commit.Observer;
import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider;
@@ -126,9 +130,31 @@ public class LuceneIndexProviderServiceT
MockOsgi.deactivate(service);
}
+ @Test
+ public void preExtractedTextProvider() throws Exception{
+ MockOsgi.activate(service, context.bundleContext(), getDefaultConfig());
+ LuceneIndexEditorProvider editorProvider =
+ (LuceneIndexEditorProvider) context.getService(IndexEditorProvider.class);
+ assertNull(editorProvider.getExtractedTextCache().getExtractedTextProvider());
+
+ //Mock OSGi does not support components
+ //context.registerService(PreExtractedTextProvider.class, new DummyProvider());
+ service.bindExtractedTextProvider(new DummyProvider());
+
+ assertNotNull(editorProvider.getExtractedTextCache().getExtractedTextProvider());
+ }
+
private Map<String,Object> getDefaultConfig(){
Map<String,Object> config = new HashMap<String, Object>();
config.put("localIndexDir", folder.getRoot().getAbsolutePath());
return config;
}
+
+ private static class DummyProvider implements PreExtractedTextProvider {
+
+ @Override
+ public ExtractedText getText(String propertyPath, Blob blob) throws IOException {
+ return null;
+ }
+ }
}
Modified: jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java?rev=1690897&r1=1690896&r2=1690897&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java (original)
+++ jackrabbit/oak/branches/1.2/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java Tue Jul 14 10:12:45 2015
@@ -25,6 +25,7 @@ import java.text.ParseException;
import java.util.Calendar;
import java.util.Collections;
import java.util.List;
+import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ExecutorService;
@@ -51,6 +52,9 @@ import org.apache.jackrabbit.oak.api.Res
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText.ExtractionResult;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
import org.apache.jackrabbit.oak.plugins.index.nodetype.NodeTypeIndexProvider;
import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexEditorProvider;
import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
@@ -93,6 +97,7 @@ import static org.hamcrest.CoreMatchers.
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
import static org.junit.matchers.JUnitMatchers.containsString;
public class LucenePropertyIndexTest extends AbstractQueryTest {
@@ -106,6 +111,8 @@ public class LucenePropertyIndexTest ext
@Rule
public TemporaryFolder temporaryFolder = new TemporaryFolder();
+ private LuceneIndexEditorProvider editorProvider;
+
@Override
protected void createTestIndexNode() throws Exception {
setTraversalEnabled(false);
@@ -113,13 +120,14 @@ public class LucenePropertyIndexTest ext
@Override
protected ContentRepository createRepository() {
+ editorProvider = new LuceneIndexEditorProvider(createIndexCopier());
LuceneIndexProvider provider = new LuceneIndexProvider();
return new Oak()
.with(new InitialContent())
.with(new OpenSecurityProvider())
.with((QueryIndexProvider) provider)
.with((Observer) provider)
- .with(new LuceneIndexEditorProvider(createIndexCopier()))
+ .with(editorProvider)
.with(new PropertyIndexEditorProvider())
.with(new NodeTypeIndexProvider())
.createContentRepository();
@@ -1286,6 +1294,41 @@ public class LucenePropertyIndexTest ext
}
@Test
+ public void preExtractedTextProvider() throws Exception{
+ Tree idx = createFulltextIndex(root.getTree("/"), "test");
+ TestUtil.useV2(idx);
+ root.commit();
+
+ AccessStateProvidingBlob testBlob =
+ new AccessStateProvidingBlob("fox is jumping", "id1");
+
+ MapBasedProvider textProvider = new MapBasedProvider();
+ textProvider.write("id1","lion");
+ editorProvider.getExtractedTextCache().setExtractedTextProvider(textProvider);
+
+ Tree test = root.getTree("/").addChild("test");
+ createFileNode(test, "text", testBlob, "text/plain");
+ root.commit();
+
+ //As its not a reindex case actual blob content would be accessed
+ assertTrue(testBlob.isStreamAccessed());
+ assertQuery("select * from [nt:base] where CONTAINS(*, 'fox ')", asList("/test/text/jcr:content"));
+ assertEquals(0, textProvider.accessCount);
+
+ testBlob.resetState();
+
+ //Lets trigger a reindex
+ root.getTree(idx.getPath()).setProperty(IndexConstants.REINDEX_PROPERTY_NAME, true);
+ root.commit();
+
+ //Now the content should be provided by the PreExtractedTextProvider
+ //and instead of fox its lion!
+ assertFalse(testBlob.isStreamAccessed());
+ assertQuery("select * from [nt:base] where CONTAINS(*, 'lion ')", asList("/test/text/jcr:content"));
+ assertEquals(1, textProvider.accessCount);
+ }
+
+ @Test
public void maxFieldLengthCheck() throws Exception{
Tree idx = createFulltextIndex(root.getTree("/"), "test");
TestUtil.useV2(idx);
@@ -1568,6 +1611,7 @@ public class LucenePropertyIndexTest ext
private static class AccessStateProvidingBlob extends ArrayBasedBlob {
private CountingInputStream stream;
+ private String id;
public AccessStateProvidingBlob(byte[] value) {
super(value);
@@ -1577,6 +1621,11 @@ public class LucenePropertyIndexTest ext
this(content.getBytes(Charsets.UTF_8));
}
+ public AccessStateProvidingBlob(String content, String id) {
+ this(content.getBytes(Charsets.UTF_8));
+ this.id = id;
+ }
+
@Nonnull
@Override
public InputStream getNewStream() {
@@ -1598,5 +1647,32 @@ public class LucenePropertyIndexTest ext
}
return stream.getCount();
}
+
+ @Override
+ public String getContentIdentity() {
+ return id;
+ }
+ }
+
+ private static class MapBasedProvider implements PreExtractedTextProvider {
+ final Map<String, ExtractedText> idMap = Maps.newHashMap();
+ int accessCount = 0;
+
+ @Override
+ public ExtractedText getText(String propertyPath, Blob blob) throws IOException {
+ ExtractedText result = idMap.get(blob.getContentIdentity());
+ if (result != null){
+ accessCount++;
+ }
+ return result;
+ }
+
+ public void write(String id, String text){
+ idMap.put(id, new ExtractedText(ExtractionResult.SUCCESS, text));
+ }
+
+ public void reset(){
+ accessCount = 0;
+ }
}
}