You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2017/07/04 10:41:31 UTC

svn commit: r1800751 - in /jackrabbit/oak/trunk/oak-lucene/src: main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java

Author: chetanm
Date: Tue Jul  4 10:41:31 2017
New Revision: 1800751

URL: http://svn.apache.org/viewvc?rev=1800751&view=rev
Log:
OAK-6415 - Use dynamic service loader by default

Reapplying reverted commit

Added a test to check default behaviour which shows that it has not
changed. Minor refactoring done in BinaryTextExtractor but no
functional change done for this issue

Added:
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java   (with props)
Modified:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java?rev=1800751&r1=1800750&r2=1800751&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java Tue Jul  4 10:41:31 2017
@@ -27,6 +27,8 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Set;
 
+import javax.annotation.Nullable;
+
 import com.google.common.io.CountingInputStream;
 import org.apache.commons.io.IOUtils;
 import org.apache.jackrabbit.JcrConstants;
@@ -65,6 +67,7 @@ public class BinaryTextExtractor {
     private final IndexDefinition definition;
     private final boolean reindex;
     private Parser parser;
+    private TikaConfigHolder tikaConfig;
     /**
      * The media types supported by the parser used.
      */
@@ -197,6 +200,13 @@ public class BinaryTextExtractor {
 
     //~-------------------------------------------< Tika >
 
+    public TikaConfig getTikaConfig(){
+        if (tikaConfig == null) {
+            tikaConfig = initializeTikaConfig(definition);
+        }
+        return tikaConfig.config;
+    }
+
     private Parser getParser() {
         if (parser == null){
             parser = initializeTikaParser(definition);
@@ -239,54 +249,79 @@ public class BinaryTextExtractor {
         return Collections.emptySet();
     }
 
-    private static Parser initializeTikaParser(IndexDefinition definition) {
+
+    private static TikaConfigHolder initializeTikaConfig(@Nullable  IndexDefinition definition) {
         ClassLoader current = Thread.currentThread().getContextClassLoader();
+        InputStream configStream = null;
+        String configSource = null;
+
         try {
-            if (definition.hasCustomTikaConfig()) {
+            Thread.currentThread().setContextClassLoader(LuceneIndexEditorContext.class.getClassLoader());
+            if (definition != null && definition.hasCustomTikaConfig()) {
                 log.debug("[{}] Using custom tika config", definition.getIndexName());
-                Thread.currentThread().setContextClassLoader(LuceneIndexEditorContext.class.getClassLoader());
-                InputStream is = definition.getTikaConfig();
-                try {
-                    return new AutoDetectParser(getTikaConfig(is, definition));
-                } finally {
-                    IOUtils.closeQuietly(is);
+                configSource = "Custom config at " + definition.getIndexPath();
+                configStream = definition.getTikaConfig();
+            } else {
+                URL configUrl = LuceneIndexEditorContext.class.getResource("tika-config.xml");
+                if (configUrl != null) {
+                    configSource = configUrl.toString();
+                    configStream = configUrl.openStream();
                 }
             }
-        }finally {
+
+            if (configStream != null) {
+                return new TikaConfigHolder(new TikaConfig(configStream), configSource);
+            }
+        } catch (TikaException | IOException | SAXException e) {
+            log.warn("Tika configuration not available : " + configSource, e);
+        } finally {
+            IOUtils.closeQuietly(configStream);
             Thread.currentThread().setContextClassLoader(current);
         }
-        return defaultParser;
+        return new TikaConfigHolder(TikaConfig.getDefaultConfig(), "Default Config");
     }
 
-    private static AutoDetectParser createDefaultParser() {
+    private Parser initializeTikaParser(IndexDefinition definition) {
         ClassLoader current = Thread.currentThread().getContextClassLoader();
-        URL configUrl = LuceneIndexEditorContext.class.getResource("tika-config.xml");
-        InputStream is = null;
-        if (configUrl != null) {
-            try {
+        try {
+            if (definition.hasCustomTikaConfig()) {
                 Thread.currentThread().setContextClassLoader(LuceneIndexEditorContext.class.getClassLoader());
-                is = configUrl.openStream();
-                TikaConfig config = new TikaConfig(is);
-                log.info("Loaded default Tika Config from classpath {}", configUrl);
-                return new AutoDetectParser(config);
-            } catch (Exception e) {
-                log.warn("Tika configuration not available : " + configUrl, e);
-            } finally {
-                IOUtils.closeQuietly(is);
-                Thread.currentThread().setContextClassLoader(current);
+                return new AutoDetectParser(getTikaConfig());
             }
-        } else {
-            log.warn("Default Tika configuration not found");
+        } finally {
+            Thread.currentThread().setContextClassLoader(current);
         }
-        return new AutoDetectParser();
+        return defaultParser;
     }
 
-    private static TikaConfig getTikaConfig(InputStream configStream, Object source){
+    private static AutoDetectParser createDefaultParser() {
+        ClassLoader current = Thread.currentThread().getContextClassLoader();
+        TikaConfigHolder configHolder = null;
         try {
-            return new TikaConfig(configStream);
+            configHolder = initializeTikaConfig(null);
+            Thread.currentThread().setContextClassLoader(LuceneIndexEditorContext.class.getClassLoader());
+            log.info("Loaded default Tika Config from classpath {}", configHolder);
+            return new AutoDetectParser(configHolder.config);
         } catch (Exception e) {
-            log.warn("Tika configuration not available : "+source, e);
+            log.warn("Tika configuration not available : " + configHolder, e);
+        } finally {
+            Thread.currentThread().setContextClassLoader(current);
+        }
+        return new AutoDetectParser();
+    }
+
+    private static final class TikaConfigHolder{
+        final TikaConfig config;
+        final String sourceInfo;
+
+        public TikaConfigHolder(TikaConfig config, String sourceInfo) {
+            this.config = config;
+            this.sourceInfo = sourceInfo;
+        }
+
+        @Override
+        public String toString() {
+            return sourceInfo;
         }
-        return TikaConfig.getDefaultConfig();
     }
 }

Added: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java?rev=1800751&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java (added)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java Tue Jul  4 10:41:31 2017
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.lucene.binary;
+
+import org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache;
+import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.junit.Test;
+
+import static org.apache.jackrabbit.oak.InitialContent.INITIAL_CONTENT;
+import static org.junit.Assert.assertTrue;
+
+public class BinaryTextExtractorTest {
+    private NodeState root = INITIAL_CONTENT;
+
+    private NodeBuilder builder = root.builder();
+    private ExtractedTextCache cache = new ExtractedTextCache(1000, 10000);
+
+    @Test
+    public void tikaConfigServiceLoader() throws Exception {
+        IndexDefinition idxDefn = new IndexDefinition(root, builder.getNodeState(), "/foo");
+        BinaryTextExtractor extractor = new BinaryTextExtractor(cache, idxDefn, false);
+        assertTrue(extractor.getTikaConfig().getServiceLoader().isDynamic());
+    }
+
+}
\ No newline at end of file

Propchange: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractorTest.java
------------------------------------------------------------------------------
    svn:eol-style = native