You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/02/10 16:09:00 UTC

[tika] 02/02: TIKA-3297 -- prevent loading of parsers and detectors that are excluded

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 73ead01b1d2c8aa89fffc1ee048a832d1b84e9a2
Author: tballison <ta...@apache.org>
AuthorDate: Wed Feb 10 11:08:45 2021 -0500

    TIKA-3297 -- prevent loading of parsers and detectors that are excluded
---
 .../java/org/apache/tika/config/ServiceLoader.java | 52 +++++++++++++++-------
 .../org/apache/tika/detect/DefaultDetector.java    | 10 +++--
 .../java/org/apache/tika/parser/DefaultParser.java | 13 +++---
 .../org/apache/tika/config/TikaConfigTest.java     | 15 ++++---
 .../org/apache/tika/parser/mock/MockParser.java    | 14 ++++++
 .../org/apache/tika/config/mock-exclude.xml        | 25 +++++++++++
 6 files changed, 98 insertions(+), 31 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java b/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
index b341082..3056e76 100644
--- a/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
+++ b/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
@@ -16,6 +16,9 @@
  */
 package org.apache.tika.config;
 
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.parser.Parser;
+
 import static java.nio.charset.StandardCharsets.UTF_8;
 
 import java.io.BufferedReader;
@@ -28,8 +31,10 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.Enumeration;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.regex.Pattern;
 
 /**
@@ -334,32 +339,47 @@ public class ServiceLoader {
         return names;
     }
 
-    /**
-     * Returns the available static service providers of the given type.
-     * The providers are loaded using the service provider mechanism using
-     * the configured class loader (if any). The returned list is newly
-     * allocated and may be freely modified by the caller.
-     *
-     * @since Apache Tika 1.2
-     * @param iface service provider interface
-     * @return static service providers
-     */
-    @SuppressWarnings("unchecked")
     public <T> List<T> loadStaticServiceProviders(Class<T> iface) {
+        return loadStaticServiceProviders(iface, Collections.EMPTY_SET);
+    }
+        /**
+         * Returns the available static service providers of the given type.
+         * The providers are loaded using the service provider mechanism using
+         * the configured class loader (if any). The returned list is newly
+         * allocated and may be freely modified by the caller.
+         *
+         * @since Apache Tika 1.2
+         * @param iface service provider interface
+         * @param excludes -- do not load these classes
+         * @return static service providers
+         */
+    @SuppressWarnings("unchecked")
+    public <T> List<T> loadStaticServiceProviders(Class<T> iface,
+                                                  Collection<Class<? extends T>> excludes) {
         List<T> providers = new ArrayList<T>();
 
         if (loader != null) {
             List<String> names = identifyStaticServiceProviders(iface);
-
             for (String name : names) {
                 try {
                     Class<?> klass = loader.loadClass(name);
                     if (iface.isAssignableFrom(klass)) {
-                        T instance = (T) klass.newInstance();
-                        if (instance instanceof Initializable) {
-                            ((Initializable)instance).checkInitialization(initializableProblemHandler);
+                        boolean shouldExclude = false;
+                        for (Class<? extends T> ex : excludes) {
+                            if (ex.isAssignableFrom(klass)) {
+                                shouldExclude = true;
+                                break;
+                            }
+                        }
+                        if (! shouldExclude) {
+                            T instance = (T) klass.getConstructor().newInstance();
+                            if (instance instanceof Initializable) {
+                                ((Initializable) instance).checkInitialization(initializableProblemHandler);
+                            }
+                            providers.add(instance);
                         }
-                        providers.add(instance);
+                    } else {
+                        throw new TikaConfigException("Class "+name + " is not of type: " + iface);
                     }
                 } catch (Throwable t) {
                     handler.handleLoadError(name, t);
diff --git a/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java b/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java
index f87c86c..d407a1b 100644
--- a/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java
+++ b/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java
@@ -17,6 +17,7 @@
 package org.apache.tika.detect;
 
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 
@@ -56,8 +57,9 @@ public class DefaultDetector extends CompositeDetector {
      * @return ordered list of statically loadable detectors
      */
     private static List<Detector> getDefaultDetectors(
-            MimeTypes types, ServiceLoader loader) {
-        List<Detector> detectors = loader.loadStaticServiceProviders(Detector.class);
+            MimeTypes types, ServiceLoader loader,
+            Collection<Class<? extends Detector>> excludeDetectors) {
+        List<Detector> detectors = loader.loadStaticServiceProviders(Detector.class, excludeDetectors);
 
         ServiceLoaderUtils.sortLoadedClasses(detectors);
         //look for the override index and put that first
@@ -83,12 +85,12 @@ public class DefaultDetector extends CompositeDetector {
 
     public DefaultDetector(MimeTypes types, ServiceLoader loader,
                            Collection<Class<? extends Detector>> excludeDetectors) {
-        super(types.getMediaTypeRegistry(), getDefaultDetectors(types, loader), excludeDetectors);
+        super(types.getMediaTypeRegistry(), getDefaultDetectors(types, loader, excludeDetectors));
         this.loader = loader;
     }
     
     public DefaultDetector(MimeTypes types, ServiceLoader loader) {
-        this(types, loader, null);
+        this(types, loader, Collections.EMPTY_SET);
     }
 
     public DefaultDetector(MimeTypes types, ClassLoader loader) {
diff --git a/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java b/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
index 233b2a0..1fda1b0 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
@@ -51,8 +51,9 @@ public class DefaultParser extends CompositeParser {
      * @return ordered list of statically loadable parsers
      */
     private static List<Parser> getDefaultParsers(ServiceLoader loader,
-                                                  EncodingDetector encodingDetector) {
-        List<Parser> parsers = loader.loadStaticServiceProviders(Parser.class);
+                                                  EncodingDetector encodingDetector,
+                                                  Collection<Class<? extends Parser>> excludeParsers) {
+        List<Parser> parsers = loader.loadStaticServiceProviders(Parser.class, excludeParsers);
 
         if (encodingDetector != null) {
             for (Parser p : parsers) {
@@ -82,22 +83,22 @@ public class DefaultParser extends CompositeParser {
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader,
                          Collection<Class<? extends Parser>> excludeParsers,
                          EncodingDetector encodingDetector) {
-        super(registry, getDefaultParsers(loader, encodingDetector), excludeParsers);
+        super(registry, getDefaultParsers(loader, encodingDetector, excludeParsers));
         this.loader = loader;
     }
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader,
                          Collection<Class<? extends Parser>> excludeParsers) {
-        super(registry, getDefaultParsers(loader, new DefaultEncodingDetector(loader)), excludeParsers);
+        super(registry, getDefaultParsers(loader, new DefaultEncodingDetector(loader), excludeParsers));
         this.loader = loader;
     }
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader, EncodingDetector encodingDetector) {
-        this(registry, loader, null, encodingDetector);
+        this(registry, loader, Collections.EMPTY_SET, encodingDetector);
     }
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader) {
-        this(registry, loader, null, new DefaultEncodingDetector(loader));
+        this(registry, loader, Collections.EMPTY_SET, new DefaultEncodingDetector(loader));
     }
 
     public DefaultParser(MediaTypeRegistry registry, ClassLoader loader) {
diff --git a/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java b/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
index f2cbf73..7673cb6 100644
--- a/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
+++ b/tika-core/src/test/java/org/apache/tika/config/TikaConfigTest.java
@@ -26,13 +26,9 @@ import java.util.Map;
 import java.util.concurrent.ThreadPoolExecutor;
 
 import org.apache.tika.ResourceLoggingClassLoader;
-import org.apache.tika.config.DummyExecutor;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.config.TikaConfigTest;
 import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MimeDetectionTest;
 import org.apache.tika.parser.AutoDetectParser;
@@ -42,6 +38,7 @@ import org.apache.tika.parser.EmptyParser;
 import org.apache.tika.parser.ErrorParser;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.parser.mock.MockParser;
 import org.apache.tika.parser.multiple.FallbackParser;
 import org.apache.tika.pipes.emitter.Emitter;
 import org.apache.tika.pipes.emitter.EmitterManager;
@@ -49,7 +46,6 @@ import org.apache.tika.pipes.fetcher.Fetcher;
 import org.apache.tika.pipes.fetcher.FetcherManager;
 import org.apache.tika.pipes.fetcher.FileSystemFetcher;
 import org.apache.tika.pipes.fetchiterator.FetchIterator;
-import org.apache.tika.pipes.fetchiterator.FileSystemFetchIterator;
 import org.apache.tika.utils.XMLReaderUtils;
 import org.junit.Test;
 
@@ -422,4 +418,13 @@ public class TikaConfigTest extends AbstractTikaConfigTest {
                 .getFetchIterator();
         assertEquals("fs1", f.getFetcherName());
     }
+
+    @Test
+    public void testTimesInitiated() throws Exception {
+        //this prevents multi-threading tests, but we aren't doing that now...
+        MockParser.resetTimesInitiated();
+        TikaConfig tikaConfig =
+                new TikaConfig(TikaConfigTest.class.getResourceAsStream("mock-exclude.xml"));
+        assertEquals(1, MockParser.getTimesInitiated());
+    }
 }
\ No newline at end of file
diff --git a/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java b/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
index 83950a3..1444be8 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
@@ -33,6 +33,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Random;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
@@ -70,11 +71,24 @@ public class MockParser extends AbstractParser {
     private static final long serialVersionUID = 1L;
     private static PrintStream ORIG_STDERR;
     private static PrintStream ORIG_STDOUT;
+    private static AtomicInteger TIMES_INITIATED = new AtomicInteger(0);
     static {
         ORIG_STDERR = System.err;
         ORIG_STDOUT = System.out;
     }
     private final Random random = new Random();
+    public MockParser() {
+        TIMES_INITIATED.incrementAndGet();
+    }
+
+    public static void resetTimesInitiated() {
+        TIMES_INITIATED.set(0);
+    }
+
+    public static int getTimesInitiated() {
+        return TIMES_INITIATED.get();
+    }
+
     @Override
     public Set<MediaType> getSupportedTypes(ParseContext context) {
         Set<MediaType> types = new HashSet<>();
diff --git a/tika-core/src/test/resources/org/apache/tika/config/mock-exclude.xml b/tika-core/src/test/resources/org/apache/tika/config/mock-exclude.xml
new file mode 100644
index 0000000..a570c51
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/config/mock-exclude.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+    <parsers>
+        <parser class="org.apache.tika.parser.DefaultParser">
+            <parser-exclude class="org.apache.tika.parser.mock.MockParser"/>
+        </parser>
+        <parser class="org.apache.tika.parser.mock.MockParser"/>
+    </parsers>
+</properties>