You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by tp...@apache.org on 2015/02/21 06:17:59 UTC

svn commit: r1661284 - in /tika/trunk: ./ tika-core/src/main/java/org/apache/tika/config/ tika-core/src/test/java/org/apache/tika/parser/ tika-core/src/test/resources/META-INF/ tika-core/src/test/resources/META-INF/services/ tika-core/src/test/resource...

Author: tpalsulich
Date: Sat Feb 21 05:17:58 2015
New Revision: 1661284

URL: http://svn.apache.org/r1661284
Log:
TIKA-1558. Enable blacklisting of Parsers and other services with a servicename.blacklist META-INF file.

Added:
    tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParser.java
    tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserSubclass.java
    tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserTest.java
    tika/trunk/tika-core/src/test/resources/META-INF/
    tika/trunk/tika-core/src/test/resources/META-INF/services/
    tika/trunk/tika-core/src/test/resources/META-INF/services/org.apache.tika.parser.Parser
    tika/trunk/tika-core/src/test/resources/META-INF/services/org.apache.tika.parser.Parser.blacklist
    tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist2_file.blacklist2
    tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist_file.blacklist
Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
    tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml

Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1661284&r1=1661283&r2=1661284&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Sat Feb 21 05:17:58 2015
@@ -1,4 +1,7 @@
 Release 1.8 - Current Development
+  * Parsers and other services can now be disabled with a
+    blacklist META-INF file (TIKA-1558).
+
   * Tika's JAX-RS server can now return stacktraces for
     parse exceptions. (TIKA-1323)
 

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java?rev=1661284&r1=1661283&r2=1661284&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java Sat Feb 21 05:17:58 2015
@@ -218,7 +218,8 @@ public class ServiceLoader {
     }
 
     /**
-     * Returns all the available service providers of the given type.
+     * Returns all the available service providers of the given type
+     * that aren't blacklisted.
      *
      * @param iface service provider interface
      * @return available service providers
@@ -253,13 +254,27 @@ public class ServiceLoader {
                         providers.add((T) service.service);
                     }
                 }
-                return providers;
+                return removeBlacklisted(providers, iface);
             }
         } else {
             return new ArrayList<T>(0);
         }
     }
 
+    private <T> List<T> removeBlacklisted(List<T> providers, Class<T> iface) {
+        List<T> blacklist = loadStaticServiceProvidersBlacklist(iface);
+        List<T> copy = new ArrayList<T>(providers);
+
+        for (T provider : copy) {
+            for (T blacklistedProvider : blacklist) {
+                if (blacklistedProvider.getClass().isAssignableFrom(provider.getClass())){
+                    providers.remove(provider);
+                }
+            }
+        }
+        return providers;
+    }
+
     /**
      * Returns the defined static service providers of the given type, without
      * attempting to load them.
@@ -290,6 +305,68 @@ public class ServiceLoader {
     }
 
     /**
+     * Returns the blacklisted static service providers of the given type, without
+     * attempting to load them.
+     * The providers are loaded using the service provider mechanism using
+     * the configured class loader (if any).
+     *
+     * @since Apache Tika 1.8
+     * @param iface service provider interface
+     * @return static list of uninitialised blacklisted service providers.
+     *
+     */
+    protected <T> List<String> identifyStaticServiceProvidersBlacklist(Class<T> iface) {
+        List<String> names = new ArrayList<String>();
+
+        if (loader != null) {
+            String fileName = iface.getName() + ".blacklist";
+            Enumeration<URL> resources =
+                    findServiceResources("META-INF/services/" + fileName);
+            for (URL resource : Collections.list(resources)) {
+                try {
+                    collectServiceClassNames(resource, names);
+                } catch (IOException e) {
+                    handler.handleLoadError(fileName, e);
+                }
+            }
+        }
+
+        return names;
+    }
+
+    /**
+     * Returns the available blacklisted static service providers of the given type.
+     * The providers are loaded using the service provider mechanism using
+     * the configured class loader (if any). The returned list is newly
+     * allocated and may be freely modified by the caller.
+     *
+     * @since Apache Tika 1.8
+     * @param iface service provider interface
+     * @return blacklisted static service providers
+     */
+    @SuppressWarnings("unchecked")
+    public <T> List<T> loadStaticServiceProvidersBlacklist(Class<T> iface) {
+        List<T> providers = new ArrayList<T>();
+
+        if (loader != null) {
+            List<String> names = identifyStaticServiceProvidersBlacklist(iface);
+
+            for (String name : names) {
+                try {
+                    Class<?> klass = loader.loadClass(name);
+                    if (iface.isAssignableFrom(klass)) {
+                        providers.add((T) klass.newInstance());
+                    }
+                } catch (Throwable t) {
+                    handler.handleLoadError(name, t);
+                }
+            }
+        }
+
+        return providers;
+    }
+
+    /**
      * Returns the available static service providers of the given type.
      * The providers are loaded using the service provider mechanism using
      * the configured class loader (if any). The returned list is newly
@@ -317,8 +394,7 @@ public class ServiceLoader {
                 }
             }
         }
-
-        return providers;
+        return removeBlacklisted(providers, iface);
     }
 
     private static final Pattern COMMENT = Pattern.compile("#.*");

Added: tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParser.java?rev=1661284&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParser.java (added)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParser.java Sat Feb 21 05:17:58 2015
@@ -0,0 +1,48 @@
+package org.apache.tika.parser;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * DummyParser used to test ServiceLoader blacklisting (TIKA-1558).
+ */
+public class BlacklistedParser implements Parser {
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        Set<MediaType> types = new HashSet<MediaType>();
+        MediaType type = MediaType.application("blacklist");
+        types.add(type);
+        return types;
+    }
+
+    @Override
+    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
+        throw new TikaException("Should never get called");
+    }
+}

Added: tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserSubclass.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserSubclass.java?rev=1661284&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserSubclass.java (added)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserSubclass.java Sat Feb 21 05:17:58 2015
@@ -0,0 +1,34 @@
+package org.apache.tika.parser;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.tika.mime.MediaType;
+
+import java.util.HashSet;
+import java.util.Set;
+
+public class BlacklistedParserSubclass extends BlacklistedParser {
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        Set<MediaType> types = new HashSet<MediaType>();
+        MediaType type = MediaType.application("blacklist2");
+        types.add(type);
+        return types;
+    }
+
+}

Added: tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserTest.java?rev=1661284&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserTest.java (added)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserTest.java Sat Feb 21 05:17:58 2015
@@ -0,0 +1,44 @@
+package org.apache.tika.parser;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
+
+public class BlacklistedParserTest {
+    @Test
+    public void testBlacklistedParserNotFound() throws Exception {
+        Metadata m = new Metadata();
+        String filename = "blacklist_file.blacklist";
+        //need to set resource name so that detector can work
+        m.set(Metadata.RESOURCE_NAME_KEY, filename);
+        Parser p = new AutoDetectParser();
+        p.parse(getClass().getResourceAsStream(filename), new BodyContentHandler(), m, new ParseContext());
+    }
+
+    @Test
+    public void testBlacklistedParserSubclassNotFound() throws Exception {
+        Metadata m = new Metadata();
+        String filename = "blacklist2_file.blacklist2";
+        //need to set resource name so that detector can work
+        m.set(Metadata.RESOURCE_NAME_KEY, filename);
+        Parser p = new AutoDetectParser();
+        p.parse(getClass().getResourceAsStream(filename), new BodyContentHandler(), m, new ParseContext());
+    }
+}

Added: tika/trunk/tika-core/src/test/resources/META-INF/services/org.apache.tika.parser.Parser
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/META-INF/services/org.apache.tika.parser.Parser?rev=1661284&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/resources/META-INF/services/org.apache.tika.parser.Parser (added)
+++ tika/trunk/tika-core/src/test/resources/META-INF/services/org.apache.tika.parser.Parser Sat Feb 21 05:17:58 2015
@@ -0,0 +1,2 @@
+org.apache.tika.parser.BlacklistedParser
+org.apache.tika.parser.BlacklistedParserSubclass
\ No newline at end of file

Added: tika/trunk/tika-core/src/test/resources/META-INF/services/org.apache.tika.parser.Parser.blacklist
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/META-INF/services/org.apache.tika.parser.Parser.blacklist?rev=1661284&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/resources/META-INF/services/org.apache.tika.parser.Parser.blacklist (added)
+++ tika/trunk/tika-core/src/test/resources/META-INF/services/org.apache.tika.parser.Parser.blacklist Sat Feb 21 05:17:58 2015
@@ -0,0 +1 @@
+org.apache.tika.parser.BlacklistedParser
\ No newline at end of file

Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml?rev=1661284&r1=1661283&r2=1661284&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes.xml Sat Feb 21 05:17:58 2015
@@ -42,4 +42,13 @@
         <match value="Hello, World!" type="string" offset="0:13" />
      </magic>
   </mime-type>
+
+  <mime-type type="application/blacklist">
+    <glob pattern="*.blacklist"/>
+    <sub-class-of type="text/plain"/>
+  </mime-type>
+  <mime-type type="application/blacklist2">
+    <glob pattern="*.blacklist2"/>
+    <sub-class-of type="text/plain"/>
+  </mime-type>
 </mime-info>

Added: tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist2_file.blacklist2
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist2_file.blacklist2?rev=1661284&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist2_file.blacklist2 (added)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist2_file.blacklist2 Sat Feb 21 05:17:58 2015
@@ -0,0 +1 @@
+This is also content.
\ No newline at end of file

Added: tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist_file.blacklist
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist_file.blacklist?rev=1661284&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist_file.blacklist (added)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist_file.blacklist Sat Feb 21 05:17:58 2015
@@ -0,0 +1 @@
+This is some content.
\ No newline at end of file