You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2017/07/04 10:41:31 UTC
svn commit: r1800751 - in /jackrabbit/oak/trunk/oak-lucene/src:
main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java
test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java
Author: chetanm
Date: Tue Jul 4 10:41:31 2017
New Revision: 1800751
URL: http://svn.apache.org/viewvc?rev=1800751&view=rev
Log:
OAK-6415 - Use dynamic service loader by default
Reapplying reverted commit
Added a test to check default behaviour which shows that it has not
changed. Minor refactoring done in BinaryTextExtractor but no
functional change done for this issue
Added:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java (with props)
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java?rev=1800751&r1=1800750&r2=1800751&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java Tue Jul 4 10:41:31 2017
@@ -27,6 +27,8 @@ import java.util.Collections;
import java.util.List;
import java.util.Set;
+import javax.annotation.Nullable;
+
import com.google.common.io.CountingInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.JcrConstants;
@@ -65,6 +67,7 @@ public class BinaryTextExtractor {
private final IndexDefinition definition;
private final boolean reindex;
private Parser parser;
+ private TikaConfigHolder tikaConfig;
/**
* The media types supported by the parser used.
*/
@@ -197,6 +200,13 @@ public class BinaryTextExtractor {
//~-------------------------------------------< Tika >
+ public TikaConfig getTikaConfig(){
+ if (tikaConfig == null) {
+ tikaConfig = initializeTikaConfig(definition);
+ }
+ return tikaConfig.config;
+ }
+
private Parser getParser() {
if (parser == null){
parser = initializeTikaParser(definition);
@@ -239,54 +249,79 @@ public class BinaryTextExtractor {
return Collections.emptySet();
}
- private static Parser initializeTikaParser(IndexDefinition definition) {
+
+ private static TikaConfigHolder initializeTikaConfig(@Nullable IndexDefinition definition) {
ClassLoader current = Thread.currentThread().getContextClassLoader();
+ InputStream configStream = null;
+ String configSource = null;
+
try {
- if (definition.hasCustomTikaConfig()) {
+ Thread.currentThread().setContextClassLoader(LuceneIndexEditorContext.class.getClassLoader());
+ if (definition != null && definition.hasCustomTikaConfig()) {
log.debug("[{}] Using custom tika config", definition.getIndexName());
- Thread.currentThread().setContextClassLoader(LuceneIndexEditorContext.class.getClassLoader());
- InputStream is = definition.getTikaConfig();
- try {
- return new AutoDetectParser(getTikaConfig(is, definition));
- } finally {
- IOUtils.closeQuietly(is);
+ configSource = "Custom config at " + definition.getIndexPath();
+ configStream = definition.getTikaConfig();
+ } else {
+ URL configUrl = LuceneIndexEditorContext.class.getResource("tika-config.xml");
+ if (configUrl != null) {
+ configSource = configUrl.toString();
+ configStream = configUrl.openStream();
}
}
- }finally {
+
+ if (configStream != null) {
+ return new TikaConfigHolder(new TikaConfig(configStream), configSource);
+ }
+ } catch (TikaException | IOException | SAXException e) {
+ log.warn("Tika configuration not available : " + configSource, e);
+ } finally {
+ IOUtils.closeQuietly(configStream);
Thread.currentThread().setContextClassLoader(current);
}
- return defaultParser;
+ return new TikaConfigHolder(TikaConfig.getDefaultConfig(), "Default Config");
}
- private static AutoDetectParser createDefaultParser() {
+ private Parser initializeTikaParser(IndexDefinition definition) {
ClassLoader current = Thread.currentThread().getContextClassLoader();
- URL configUrl = LuceneIndexEditorContext.class.getResource("tika-config.xml");
- InputStream is = null;
- if (configUrl != null) {
- try {
+ try {
+ if (definition.hasCustomTikaConfig()) {
Thread.currentThread().setContextClassLoader(LuceneIndexEditorContext.class.getClassLoader());
- is = configUrl.openStream();
- TikaConfig config = new TikaConfig(is);
- log.info("Loaded default Tika Config from classpath {}", configUrl);
- return new AutoDetectParser(config);
- } catch (Exception e) {
- log.warn("Tika configuration not available : " + configUrl, e);
- } finally {
- IOUtils.closeQuietly(is);
- Thread.currentThread().setContextClassLoader(current);
+ return new AutoDetectParser(getTikaConfig());
}
- } else {
- log.warn("Default Tika configuration not found");
+ } finally {
+ Thread.currentThread().setContextClassLoader(current);
}
- return new AutoDetectParser();
+ return defaultParser;
}
- private static TikaConfig getTikaConfig(InputStream configStream, Object source){
+ private static AutoDetectParser createDefaultParser() {
+ ClassLoader current = Thread.currentThread().getContextClassLoader();
+ TikaConfigHolder configHolder = null;
try {
- return new TikaConfig(configStream);
+ configHolder = initializeTikaConfig(null);
+ Thread.currentThread().setContextClassLoader(LuceneIndexEditorContext.class.getClassLoader());
+ log.info("Loaded default Tika Config from classpath {}", configHolder);
+ return new AutoDetectParser(configHolder.config);
} catch (Exception e) {
- log.warn("Tika configuration not available : "+source, e);
+ log.warn("Tika configuration not available : " + configHolder, e);
+ } finally {
+ Thread.currentThread().setContextClassLoader(current);
+ }
+ return new AutoDetectParser();
+ }
+
+ private static final class TikaConfigHolder{
+ final TikaConfig config;
+ final String sourceInfo;
+
+ public TikaConfigHolder(TikaConfig config, String sourceInfo) {
+ this.config = config;
+ this.sourceInfo = sourceInfo;
+ }
+
+ @Override
+ public String toString() {
+ return sourceInfo;
}
- return TikaConfig.getDefaultConfig();
}
}
Added: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java?rev=1800751&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java (added)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java Tue Jul 4 10:41:31 2017
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.lucene.binary;
+
+import org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache;
+import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.junit.Test;
+
+import static org.apache.jackrabbit.oak.InitialContent.INITIAL_CONTENT;
+import static org.junit.Assert.assertTrue;
+
+public class BinaryTextExtractorTest {
+ private NodeState root = INITIAL_CONTENT;
+
+ private NodeBuilder builder = root.builder();
+ private ExtractedTextCache cache = new ExtractedTextCache(1000, 10000);
+
+ @Test
+ public void tikaConfigServiceLoader() throws Exception {
+ IndexDefinition idxDefn = new IndexDefinition(root, builder.getNodeState(), "/foo");
+ BinaryTextExtractor extractor = new BinaryTextExtractor(cache, idxDefn, false);
+ assertTrue(extractor.getTikaConfig().getServiceLoader().isDynamic());
+ }
+
+}
\ No newline at end of file
Propchange: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java
------------------------------------------------------------------------------
svn:eol-style = native