You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2012/03/27 19:32:35 UTC
svn commit: r1305920 - in
/tika/trunk/tika-core/src/main/java/org/apache/tika:
config/ServiceLoader.java detect/CompositeDetector.java
detect/DefaultDetector.java parser/DefaultParser.java
Author: jukka
Date: Tue Mar 27 17:32:35 2012
New Revision: 1305920
URL: http://svn.apache.org/viewvc?rev=1305920&view=rev
Log:
TIKA-884: Dynamic loading of Parser and Detector services
Decouple loading of dynamic and static services
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java?rev=1305920&r1=1305919&r2=1305920&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java Tue Mar 27 17:32:35 2012
@@ -194,9 +194,25 @@ public class ServiceLoader {
* @param iface service provider interface
* @return available service providers
*/
- @SuppressWarnings("unchecked")
public <T> List<T> loadServiceProviders(Class<T> iface) {
List<T> providers = new ArrayList<T>();
+ providers.addAll(loadDynamicServiceProviders(iface));
+ providers.addAll(loadStaticServiceProviders(iface));
+ return providers;
+ }
+
+ /**
+ * Returns the available dynamic service providers of the given type.
+ * The returned list is newly allocated and may be freely modified
+ * by the caller.
+ *
+ * @since Apache Tika 1.2
+ * @param iface service provider interface
+ * @return dynamic service providers
+ */
+ @SuppressWarnings("unchecked")
+ public <T> List<T> loadDynamicServiceProviders(Class<T> iface) {
+ List<T> providers = new ArrayList<T>();
if (dynamic) {
synchronized (services) {
@@ -208,6 +224,23 @@ public class ServiceLoader {
}
}
+ return providers;
+ }
+
+ /**
+ * Returns the available static service providers of the given type.
+ * The providers are loaded using the service provider mechanism using
+ * the configured class loader (if any). The returned list is newly
+ * allocated and may be freely modified by the caller.
+ *
+ * @since Apache Tika 1.2
+ * @param iface service provider interface
+ * @return static service providers
+ */
+ @SuppressWarnings("unchecked")
+ public <T> List<T> loadStaticServiceProviders(Class<T> iface) {
+ List<T> providers = new ArrayList<T>();
+
if (loader != null) {
Set<String> names = new HashSet<String>();
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java?rev=1305920&r1=1305919&r2=1305920&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/detect/CompositeDetector.java Tue Mar 27 17:32:35 2012
@@ -57,7 +57,7 @@ public class CompositeDetector implement
public MediaType detect(InputStream input, Metadata metadata)
throws IOException {
MediaType type = MediaType.OCTET_STREAM;
- for (Detector detector : detectors) {
+ for (Detector detector : getDetectors()) {
MediaType detected = detector.detect(input, metadata);
if (registry.isSpecializationOf(detected, type)) {
type = detected;
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java?rev=1305920&r1=1305919&r2=1305920&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java Tue Mar 27 17:32:35 2012
@@ -16,7 +16,6 @@
*/
package org.apache.tika.detect;
-import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
@@ -43,42 +42,44 @@ public class DefaultDetector extends Com
/** Serial version UID */
private static final long serialVersionUID = -8170114575326908027L;
+ /**
+ * Finds all statically loadable detectors and sort the list by name,
+ * rather than discovery order. Detectors are used in the given order,
+ * so put the Tika parsers last so that non-Tika (user supplied)
+ * parsers can take precedence.
+ *
+ * @param loader service loader
+ * @return ordered list of statically loadable detectors
+ */
private static List<Detector> getDefaultDetectors(
MimeTypes types, ServiceLoader loader) {
- // Find all the detectors available as services
- List<Detector> svcDetectors = loader.loadServiceProviders(Detector.class);
- List<Detector> detectors = new ArrayList<Detector>(svcDetectors.size()+1);
-
- // Sort the list by classname, rather than discovery order
- Collections.sort(svcDetectors, new Comparator<Detector>() {
+ List<Detector> detectors =
+ loader.loadStaticServiceProviders(Detector.class);
+ Collections.sort(detectors, new Comparator<Detector>() {
public int compare(Detector d1, Detector d2) {
- return d1.getClass().getName().compareTo(
- d2.getClass().getName());
+ String n1 = d1.getClass().getName();
+ String n2 = d2.getClass().getName();
+ boolean t1 = n1.startsWith("org.apache.tika.");
+ boolean t2 = n2.startsWith("org.apache.tika.");
+ if (t1 == t2) {
+ return n1.compareTo(n2);
+ } else if (t1) {
+ return 1;
+ } else {
+ return -1;
+ }
}
});
-
- // Add the non-Tika (user supplied) detectors First
- for (Detector d : svcDetectors) {
- if (! d.getClass().getName().startsWith("org.apache.tika")) {
- detectors.add(d);
- }
- }
-
- // Add the Tika detectors next
- for (Detector d : svcDetectors) {
- if (d.getClass().getName().startsWith("org.apache.tika")) {
- detectors.add(d);
- }
- }
-
- // Finally add the Tika MimeTypes as a fallback
+ // Finally the Tika MimeTypes as a fallback
detectors.add(types);
-
return detectors;
}
+ private transient final ServiceLoader loader;
+
public DefaultDetector(MimeTypes types, ServiceLoader loader) {
super(types.getMediaTypeRegistry(), getDefaultDetectors(types, loader));
+ this.loader = loader;
}
public DefaultDetector(MimeTypes types, ClassLoader loader) {
@@ -97,4 +98,16 @@ public class DefaultDetector extends Com
this(MimeTypes.getDefaultMimeTypes());
}
+ @Override
+ public List<Detector> getDetectors() {
+ if (loader != null) {
+ List<Detector> detectors =
+ loader.loadDynamicServiceProviders(Detector.class);
+ detectors.addAll(super.getDetectors());
+ return detectors;
+ } else {
+ return super.getDetectors();
+ }
+ }
+
}
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java?rev=1305920&r1=1305919&r2=1305920&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java Tue Mar 27 17:32:35 2012
@@ -16,57 +16,62 @@
*/
package org.apache.tika.parser;
-import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
-
-import javax.imageio.spi.ServiceRegistry;
+import java.util.Map;
import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
/**
* A composite parser based on all the {@link Parser} implementations
- * available through the {@link ServiceRegistry service provider mechanism}.
+ * available through the
+ * {@link javax.imageio.spi.ServiceRegistry service provider mechanism}.
*
* @since Apache Tika 0.8
*/
public class DefaultParser extends CompositeParser {
+
/** Serial version UID */
private static final long serialVersionUID = 3612324825403757520L;
+ /**
+ * Finds all statically loadable parsers and sort the list by name,
+ * rather than discovery order. CompositeParser takes the last
+ * parser for any given media type, so put the Tika parsers first
+ * so that non-Tika (user supplied) parsers can take precedence.
+ *
+ * @param loader service loader
+ * @return ordered list of statically loadable parsers
+ */
private static List<Parser> getDefaultParsers(ServiceLoader loader) {
- // Find all the Parsers available as services
- List<Parser> svcParsers = loader.loadServiceProviders(Parser.class);
- List<Parser> parsers = new ArrayList<Parser>(svcParsers.size());
-
- // Sort the list by classname, rather than discovery order
- Collections.sort(svcParsers, new Comparator<Parser>() {
- public int compare(Parser p1, Parser p2) {
- return p1.getClass().getName().compareTo(
- p2.getClass().getName());
- }
+ List<Parser> parsers =
+ loader.loadStaticServiceProviders(Parser.class);
+ Collections.sort(parsers, new Comparator<Parser>() {
+ public int compare(Parser p1, Parser p2) {
+ String n1 = p1.getClass().getName();
+ String n2 = p2.getClass().getName();
+ boolean t1 = n1.startsWith("org.apache.tika.");
+ boolean t2 = n2.startsWith("org.apache.tika.");
+ if (t1 == t2) {
+ return n1.compareTo(n2);
+ } else if (t1) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
});
-
- // CompositeParser takes the last parser for any given mime type, so put the
- // TikaParsers first so that non-Tika (user supplied) parsers can take presidence
- for (Parser p : svcParsers) {
- if (p.getClass().getName().startsWith("org.apache.tika")) {
- parsers.add(p);
- }
- }
- for (Parser p : svcParsers) {
- if (!p.getClass().getName().startsWith("org.apache.tika")) {
- parsers.add(p);
- }
- }
-
return parsers;
}
+ private transient final ServiceLoader loader;
+
public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader) {
super(registry, getDefaultParsers(loader));
+ this.loader = loader;
}
public DefaultParser(MediaTypeRegistry registry, ClassLoader loader) {
@@ -85,4 +90,22 @@ public class DefaultParser extends Compo
this(MediaTypeRegistry.getDefaultRegistry());
}
+ @Override
+ public Map<MediaType, Parser> getParsers(ParseContext context) {
+ Map<MediaType, Parser> map = super.getParsers(context);
+
+ if (loader != null) {
+ // Add dynamic parser service (they always override static ones)
+ MediaTypeRegistry registry = getMediaTypeRegistry();
+ for (Parser parser
+ : loader.loadDynamicServiceProviders(Parser.class)) {
+ for (MediaType type : parser.getSupportedTypes(context)) {
+ map.put(registry.normalize(type), parser);
+ }
+ }
+ }
+
+ return map;
+ }
+
}