You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2012/03/27 19:32:35 UTC

svn commit: r1305920 - in /tika/trunk/tika-core/src/main/java/org/apache/tika: config/ServiceLoader.java detect/CompositeDetector.java detect/DefaultDetector.java parser/DefaultParser.java

Author: jukka
Date: Tue Mar 27 17:32:35 2012
New Revision: 1305920

URL: http://svn.apache.org/viewvc?rev=1305920&view=rev
Log:
TIKA-884: Dynamic loading of Parser and Detector services

Decouple loading of dynamic and static services

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java?rev=1305920&r1=1305919&r2=1305920&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java Tue Mar 27 17:32:35 2012
@@ -194,9 +194,25 @@ public class ServiceLoader {
      * @param iface service provider interface
      * @return available service providers
      */
-    @SuppressWarnings("unchecked")
     public <T> List<T> loadServiceProviders(Class<T> iface) {
         List<T> providers = new ArrayList<T>();
+        providers.addAll(loadDynamicServiceProviders(iface));
+        providers.addAll(loadStaticServiceProviders(iface));
+        return providers;
+    }
+
+    /**
+     * Returns the available dynamic service providers of the given type.
+     * The returned list is newly allocated and may be freely modified
+     * by the caller.
+     *
+     * @since Apache Tika 1.2
+     * @param iface service provider interface
+     * @return dynamic service providers
+     */
+    @SuppressWarnings("unchecked")
+    public <T> List<T> loadDynamicServiceProviders(Class<T> iface) {
+        List<T> providers = new ArrayList<T>();
 
         if (dynamic) {
             synchronized (services) {
@@ -208,6 +224,23 @@ public class ServiceLoader {
             }
         }
 
+        return providers;
+    }
+
+    /**
+     * Returns the available static service providers of the given type.
+     * The providers are loaded using the service provider mechanism using
+     * the configured class loader (if any). The returned list is newly
+     * allocated and may be freely modified by the caller.
+     *
+     * @since Apache Tika 1.2
+     * @param iface service provider interface
+     * @return static service providers
+     */
+    @SuppressWarnings("unchecked")
+    public <T> List<T> loadStaticServiceProviders(Class<T> iface) {
+        List<T> providers = new ArrayList<T>();
+
         if (loader != null) {
             Set<String> names = new HashSet<String>();
 

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java?rev=1305920&r1=1305919&r2=1305920&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java Tue Mar 27 17:32:35 2012
@@ -57,7 +57,7 @@ public class CompositeDetector implement
     public MediaType detect(InputStream input, Metadata metadata)
             throws IOException { 
         MediaType type = MediaType.OCTET_STREAM;
-        for (Detector detector : detectors) {
+        for (Detector detector : getDetectors()) {
             MediaType detected = detector.detect(input, metadata);
             if (registry.isSpecializationOf(detected, type)) {
                 type = detected;

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java?rev=1305920&r1=1305919&r2=1305920&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java Tue Mar 27 17:32:35 2012
@@ -16,7 +16,6 @@
  */
 package org.apache.tika.detect;
 
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
@@ -43,42 +42,44 @@ public class DefaultDetector extends Com
     /** Serial version UID */
     private static final long serialVersionUID = -8170114575326908027L;
 
+    /**
+     * Finds all statically loadable detectors and sort the list by name,
+     * rather than discovery order. Detectors are used in the given order,
+     * so put the Tika parsers last so that non-Tika (user supplied)
+     * parsers can take precedence.
+     *
+     * @param loader service loader
+     * @return ordered list of statically loadable detectors
+     */
     private static List<Detector> getDefaultDetectors(
             MimeTypes types, ServiceLoader loader) {
-        // Find all the detectors available as services
-        List<Detector> svcDetectors = loader.loadServiceProviders(Detector.class);
-        List<Detector> detectors = new ArrayList<Detector>(svcDetectors.size()+1);
-        
-        // Sort the list by classname, rather than discovery order 
-        Collections.sort(svcDetectors, new Comparator<Detector>() {
+        List<Detector> detectors =
+                loader.loadStaticServiceProviders(Detector.class);
+        Collections.sort(detectors, new Comparator<Detector>() {
             public int compare(Detector d1, Detector d2) {
-               return d1.getClass().getName().compareTo(
-                     d2.getClass().getName());
+                String n1 = d1.getClass().getName();
+                String n2 = d2.getClass().getName();
+                boolean t1 = n1.startsWith("org.apache.tika.");
+                boolean t2 = n2.startsWith("org.apache.tika.");
+                if (t1 == t2) {
+                    return n1.compareTo(n2);
+                } else if (t1) {
+                    return 1;
+                } else {
+                    return -1;
+                }
             }
         });
-        
-        // Add the non-Tika (user supplied) detectors First
-        for (Detector d : svcDetectors) {
-           if (! d.getClass().getName().startsWith("org.apache.tika")) {
-              detectors.add(d);
-           }
-        }
-        
-        // Add the Tika detectors next
-        for (Detector d : svcDetectors) {
-           if (d.getClass().getName().startsWith("org.apache.tika")) {
-              detectors.add(d);
-           }
-        }
-        
-        // Finally add the Tika MimeTypes as a fallback
+        // Finally the Tika MimeTypes as a fallback
         detectors.add(types);
-        
         return detectors;
     }
 
+    private transient final ServiceLoader loader;
+
     public DefaultDetector(MimeTypes types, ServiceLoader loader) {
         super(types.getMediaTypeRegistry(), getDefaultDetectors(types, loader));
+        this.loader = loader;
     }
 
     public DefaultDetector(MimeTypes types, ClassLoader loader) {
@@ -97,4 +98,16 @@ public class DefaultDetector extends Com
         this(MimeTypes.getDefaultMimeTypes());
     }
 
+    @Override
+    public List<Detector> getDetectors() {
+        if (loader != null) {
+            List<Detector> detectors =
+                    loader.loadDynamicServiceProviders(Detector.class);
+            detectors.addAll(super.getDetectors());
+            return detectors;
+        } else {
+            return super.getDetectors();
+        }
+    }
+
 }

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java?rev=1305920&r1=1305919&r2=1305920&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java Tue Mar 27 17:32:35 2012
@@ -16,57 +16,62 @@
  */
 package org.apache.tika.parser;
 
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
-
-import javax.imageio.spi.ServiceRegistry;
+import java.util.Map;
 
 import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
 
 /**
  * A composite parser based on all the {@link Parser} implementations
- * available through the {@link ServiceRegistry service provider mechanism}.
+ * available through the
+ * {@link javax.imageio.spi.ServiceRegistry service provider mechanism}.
  *
  * @since Apache Tika 0.8
  */
 public class DefaultParser extends CompositeParser {
+
     /** Serial version UID */
     private static final long serialVersionUID = 3612324825403757520L;
 
+    /**
+     * Finds all statically loadable parsers and sort the list by name,
+     * rather than discovery order. CompositeParser takes the last
+     * parser for any given media type, so put the Tika parsers first
+     * so that non-Tika (user supplied) parsers can take precedence.
+     *
+     * @param loader service loader
+     * @return ordered list of statically loadable parsers
+     */
     private static List<Parser> getDefaultParsers(ServiceLoader loader) {
-        // Find all the Parsers available as services
-        List<Parser> svcParsers = loader.loadServiceProviders(Parser.class);
-        List<Parser> parsers = new ArrayList<Parser>(svcParsers.size());
-
-        // Sort the list by classname, rather than discovery order 
-        Collections.sort(svcParsers, new Comparator<Parser>() {
-           public int compare(Parser p1, Parser p2) {
-              return p1.getClass().getName().compareTo(
-                   p2.getClass().getName());
-           }
+        List<Parser> parsers =
+                loader.loadStaticServiceProviders(Parser.class);
+        Collections.sort(parsers, new Comparator<Parser>() {
+            public int compare(Parser p1, Parser p2) {
+                String n1 = p1.getClass().getName();
+                String n2 = p2.getClass().getName();
+                boolean t1 = n1.startsWith("org.apache.tika.");
+                boolean t2 = n2.startsWith("org.apache.tika.");
+                if (t1 == t2) {
+                    return n1.compareTo(n2);
+                } else if (t1) {
+                    return -1;
+                } else {
+                    return 1;
+                }
+            }
         });
-        
-        // CompositeParser takes the last parser for any given mime type, so put the 
-        // TikaParsers first so that non-Tika (user supplied) parsers can take presidence
-        for (Parser p : svcParsers) {
-           if (p.getClass().getName().startsWith("org.apache.tika")) {
-              parsers.add(p);
-           }
-        }
-        for (Parser p : svcParsers) {
-           if (!p.getClass().getName().startsWith("org.apache.tika")) {
-              parsers.add(p);
-           }
-        }
-        
         return parsers;
     }
 
+    private transient final ServiceLoader loader;
+
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader) {
         super(registry, getDefaultParsers(loader));
+        this.loader = loader;
     }
 
     public DefaultParser(MediaTypeRegistry registry, ClassLoader loader) {
@@ -85,4 +90,22 @@ public class DefaultParser extends Compo
         this(MediaTypeRegistry.getDefaultRegistry());
     }
 
+    @Override
+    public Map<MediaType, Parser> getParsers(ParseContext context) {
+        Map<MediaType, Parser> map = super.getParsers(context);
+
+        if (loader != null) {
+            // Add dynamic parser service (they always override static ones)
+            MediaTypeRegistry registry = getMediaTypeRegistry();
+            for (Parser parser
+                    : loader.loadDynamicServiceProviders(Parser.class)) {
+                for (MediaType type : parser.getSupportedTypes(context)) {
+                    map.put(registry.normalize(type), parser);
+                }
+            }
+        }
+
+        return map;
+    }
+
 }