You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/08/01 19:02:26 UTC

svn commit: r1693733 - /tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java

Author: nick
Date: Sat Aug  1 17:02:26 2015
New Revision: 1693733

URL: http://svn.apache.org/r1693733
Log:
TIKA-1702 Move the parser and detector creation logic to the config loader classes

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=1693733&r1=1693732&r2=1693733&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Sat Aug  1 17:02:26 2015
@@ -129,7 +129,7 @@ public class TikaConfig {
         DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
         
         this.mimeTypes = typesFromDomElement(element);
-        this.detector = detectorFromDomElement(element, mimeTypes, loader);
+        this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
         this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
         this.translator = translatorFromDomElement(element, loader);
     }
@@ -213,8 +213,7 @@ public class TikaConfig {
                 
                 this.mimeTypes = typesFromDomElement(element);
                 this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
-                this.detector =
-                        detectorFromDomElement(element, mimeTypes, loader);
+                this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
                 this.translator = translatorFromDomElement(element, loader);
             } catch (SAXException e) {
                 throw new TikaException(
@@ -358,137 +357,6 @@ public class TikaConfig {
             return getDefaultMimeTypes(null);
         }
     }
-
-//    private static CompositeParser parserFromDomElement(
-//            Element element, MimeTypes mimeTypes, ServiceLoader loader)
-//            throws TikaException, IOException {
-//        List<Parser> parsers = new ArrayList<Parser>();
-//        
-//        // Find the parser children of the parsers tag, if any
-//        for (Element pe : getTopLevelElementChildren(element, "parsers", "parser")) {
-//            parsers.add(parserFromParserDomElement(pe, mimeTypes, loader));
-//        }
-//        
-//        if (parsers.isEmpty()) {
-//            // No parsers defined, create a DefaultParser
-//            return getDefaultParser(mimeTypes, loader);
-//        } else if (parsers.size() == 1 && parsers.get(0) instanceof CompositeParser) {
-//            // Single Composite defined, use that
-//            return (CompositeParser)parsers.get(0);
-//        } else {
-//            // Wrap the defined parsers up in a Composite
-//            MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
-//            return new CompositeParser(registry, parsers);
-//        }
-//    }
-    private static Parser parserFromParserDomElement(
-            Element parserNode, MimeTypes mimeTypes, ServiceLoader loader)
-            throws TikaException, IOException {
-        String name = parserNode.getAttribute("class");
-        Parser parser = null;
-
-        try {
-            Class<? extends Parser> parserClass =
-                    loader.getServiceClass(Parser.class, name);
-            // https://issues.apache.org/jira/browse/TIKA-866
-            if (AutoDetectParser.class.isAssignableFrom(parserClass)) {
-                throw new TikaException(
-                        "AutoDetectParser not supported in a <parser>"
-                        + " configuration element: " + name);
-            }
-
-            // Is this a composite or decorated parser? If so, support recursion
-            if (CompositeParser.class.isAssignableFrom(parserClass) ||
-                ParserDecorator.class.isAssignableFrom(parserClass)) {
-                
-                // Get the child parsers for it
-                List<Parser> childParsers = new ArrayList<Parser>();
-                NodeList childParserNodes = parserNode.getElementsByTagName("parser");
-                if (childParserNodes.getLength() > 0) {
-                    for (int i = 0; i < childParserNodes.getLength(); i++) {
-                        childParsers.add(parserFromParserDomElement(
-                                (Element)childParserNodes.item(i), mimeTypes, loader
-                        ));
-                    }
-                }
-                
-                // Get the list of parsers to exclude
-                Set<Class<? extends Parser>> excludeParsers = new HashSet<Class<? extends Parser>>();
-                NodeList excludeParserNodes = parserNode.getElementsByTagName("parser-exclude");
-                if (excludeParserNodes.getLength() > 0) {
-                    for (int i = 0; i < excludeParserNodes.getLength(); i++) {
-                        Element excl = (Element)excludeParserNodes.item(i);
-                        String exclName = excl.getAttribute("class");
-                        excludeParsers.add(loader.getServiceClass(Parser.class, exclName));
-                    }
-                }
-                
-                // Create the Composite Parser
-                Constructor<? extends Parser> c = null;
-                MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
-                if (parser == null) {
-                    try {
-                        c = parserClass.getConstructor(MediaTypeRegistry.class, ServiceLoader.class, Collection.class);
-                        parser = c.newInstance(registry, loader, excludeParsers);
-                    } 
-                    catch (NoSuchMethodException me) {}
-                }
-                if (parser == null) {
-                    try {
-                        c = parserClass.getConstructor(MediaTypeRegistry.class, List.class, Collection.class);
-                        parser = c.newInstance(registry, childParsers, excludeParsers);
-                    } catch (NoSuchMethodException me) {}
-                }
-                // Create as a Parser Decorator
-                if (parser == null && ParserDecorator.class.isAssignableFrom(parserClass)) {
-                    try {
-                        CompositeParser cp = null;
-                        if (childParsers.size() == 1 && excludeParsers.size() == 0 &&
-                                childParsers.get(0) instanceof CompositeParser) {
-                            cp = (CompositeParser)childParsers.get(0);
-                        } else {
-                            cp = new CompositeParser(registry, childParsers, excludeParsers);
-                        }
-                        c = parserClass.getConstructor(Parser.class);
-                        parser = c.newInstance(cp);
-                    } catch (NoSuchMethodException me) {}
-                }
-                // Default constructor
-                if (parser == null) {
-                    parser = parserClass.newInstance();
-                }
-            } else {
-                // Regular parser, create as-is
-                parser = parserClass.newInstance();
-            }
-
-            // Is there an explicit list of mime types for this to handle?
-            Set<MediaType> parserTypes = mediaTypesListFromDomElement(parserNode, "mime");
-            if (! parserTypes.isEmpty()) {
-                parser = ParserDecorator.withTypes(parser, parserTypes);
-            }
-            // Is there an explicit list of mime types this shouldn't handle?
-            Set<MediaType> parserExclTypes = mediaTypesListFromDomElement(parserNode, "mime-exclude");
-            if (! parserExclTypes.isEmpty()) {
-                parser = ParserDecorator.withoutTypes(parser, parserExclTypes);
-            }
-            
-            // All done with setup
-            return parser;
-        } catch (ClassNotFoundException e) {
-            throw new TikaException(
-                    "Unable to find a parser class: " + name, e);
-        } catch (IllegalAccessException e) {
-            throw new TikaException(
-                    "Unable to access a parser class: " + name, e);
-        } catch (InvocationTargetException e) {
-            throw new TikaException(
-                    "Unable to create a parser class: " + name, e);
-        } catch (InstantiationException e) {
-            throw new TikaException(
-                    "Unable to instantiate a parser class: " + name, e);
-        }
-    }
     
     private static Set<MediaType> mediaTypesListFromDomElement(
             Element node, String tag) 
@@ -516,49 +384,6 @@ public class TikaConfig {
         return Collections.emptySet();
     }
 
-    private static CompositeDetector detectorFromDomElement(
-            Element element, MimeTypes mimeTypes, ServiceLoader loader)
-            throws TikaException, IOException {
-        List<Detector> detectors = new ArrayList<Detector>();
-        
-        // Find the detector children of the detectors tag, if any
-        for (Element de : getTopLevelElementChildren(element, "detectors", "detector")) {
-            detectors.add(detectorFromDetectorDomElement(de, mimeTypes, loader));
-        }
-        
-        if (detectors.isEmpty()) {
-            // No detectors defined, create a DefaultDetector
-            return getDefaultDetector(mimeTypes, loader);
-        } else if (detectors.size() == 1 && detectors.get(0) instanceof CompositeDetector) {
-            // Single Composite defined, use that
-            return (CompositeDetector)detectors.get(0);
-        } else {
-            // Wrap the defined detectors up in a Composite
-            MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
-            return new CompositeDetector(registry, detectors);
-        }
-    }
-    private static Detector detectorFromDetectorDomElement(
-            Element detectorNode, MimeTypes mimeTypes, ServiceLoader loader)
-            throws TikaException, IOException {
-        String name = detectorNode.getAttribute("class");
-        
-        try {
-            Class<? extends Detector> detectorClass =
-                    loader.getServiceClass(Detector.class, name);
-            return detectorClass.newInstance();
-        } catch (ClassNotFoundException e) {
-            throw new TikaException(
-                    "Unable to find a detector class: " + name, e);
-        } catch (IllegalAccessException e) {
-            throw new TikaException(
-                    "Unable to access a detector class: " + name, e);
-        } catch (InstantiationException e) {
-            throw new TikaException(
-                    "Unable to instantiate a detector class: " + name, e);
-        }
-    }
-
     private static Translator translatorFromDomElement(
             Element element, ServiceLoader loader)
             throws TikaException, IOException {
@@ -593,9 +418,17 @@ public class TikaConfig {
     private static abstract class XmlLoader<CT,T> {
         abstract String getParentTagName(); // eg parsers
         abstract String getLoaderTagName(); // eg parser
+        abstract Class<? extends T> getLoaderClass(); // Generics workaround
         abstract boolean isComposite(T loaded);
+        abstract boolean isComposite(Class<? extends T> loadedClass);
         abstract CT createDefault(MimeTypes mimeTypes, ServiceLoader loader);
         abstract CT createComposite(List<T> loaded, MimeTypes mimeTypes, ServiceLoader loader);
+        abstract T createComposite(Class<? extends T> compositeClass, 
+                List<T> children, Set<Class<? extends T>> excludeChildren, 
+                MimeTypes mimeTypes, ServiceLoader loader) 
+                throws InvocationTargetException, IllegalAccessException, InstantiationException;
+        abstract T decorate(T created, Element element) 
+                throws IOException, TikaException; // eg explicit mime types 
         
         @SuppressWarnings("unchecked")
         CT loadOverall(Element element, MimeTypes mimeTypes, 
@@ -621,22 +454,100 @@ public class TikaConfig {
             // Wrap the defined parsers/detectors up in a Composite
             return createComposite(loaded, mimeTypes, loader);
         }
-        T loadOne(Element element, MimeTypes mimeTypes, 
-                ServiceLoader loader) throws TikaException, IOException {
-            // TODO Do this properly
-            // TODO This is a cheat for parsers only!
-            return (T)parserFromParserDomElement(element, mimeTypes, loader);
-        }
+        T loadOne(Element element, MimeTypes mimeTypes, ServiceLoader loader) 
+                throws TikaException, IOException {
+            String name = element.getAttribute("class");
+            T loaded = null;
+
+            try {
+                Class<? extends T> loadedClass =
+                        loader.getServiceClass(getLoaderClass(), name);
+                
+                // Check for classes which can't be set in config
+                if (AutoDetectParser.class.isAssignableFrom(loadedClass)) {
+                    // https://issues.apache.org/jira/browse/TIKA-866
+                    throw new TikaException(
+                            "AutoDetectParser not supported in a <parser>"
+                            + " configuration element: " + name);
+                }
+
+                // Is this a composite or decorated class? If so, support recursion
+                if (isComposite(loadedClass)) {
+                    // Get the child objects for it
+                    List<T> children = new ArrayList<T>();
+                    NodeList childNodes = element.getElementsByTagName(getLoaderTagName());
+                    if (childNodes.getLength() > 0) {
+                        for (int i = 0; i < childNodes.getLength(); i++) {
+                            children.add(loadOne(
+                                    (Element)childNodes.item(i), mimeTypes, loader
+                            ));
+                        }
+                    }
+                    
+                    // Get the list of children to exclude
+                    Set<Class<? extends T>> excludeChildren = new HashSet<Class<? extends T>>();
+                    NodeList excludeChildNodes = element.getElementsByTagName(getLoaderTagName()+"-exclude");
+                    if (excludeChildNodes.getLength() > 0) {
+                        for (int i = 0; i < excludeChildNodes.getLength(); i++) {
+                            Element excl = (Element)excludeChildNodes.item(i);
+                            String exclName = excl.getAttribute("class");
+                            excludeChildren.add(loader.getServiceClass(getLoaderClass(), exclName));
+                        }
+                    }
+                    
+                    // Create the Composite
+                    loaded = createComposite(loadedClass, children, excludeChildren, mimeTypes, loader);
+
+                    // Default constructor fallback
+                    if (loaded == null) {
+                        loaded = loadedClass.newInstance();
+                    }
+                } else {
+                    // Regular class, create as-is
+                    // TODO Support arguments, needed for Translators etc
+                    loaded = loadedClass.newInstance();
+                }
+                
+                // Have any decoration performed, eg explicit mimetypes
+                loaded = decorate(loaded, element);
+                
+                // All done with setup
+                return loaded;
+            } catch (ClassNotFoundException e) {
+                throw new TikaException(
+                        "Unable to find a "+getLoaderTagName()+" class: " + name, e);
+            } catch (IllegalAccessException e) {
+                throw new TikaException(
+                        "Unable to access a "+getLoaderTagName()+" class: " + name, e);
+            } catch (InvocationTargetException e) {
+                throw new TikaException(
+                        "Unable to create a "+getLoaderTagName()+" class: " + name, e);
+            } catch (InstantiationException e) {
+                throw new TikaException(
+                        "Unable to instantiate a "+getLoaderTagName()+" class: " + name, e);
+            }        }
     }
     private static class ParserXmlLoader extends XmlLoader<CompositeParser,Parser> {
         String getParentTagName() { return "parsers"; }
         String getLoaderTagName() { return "parser"; }
         
         @Override
+        Class<? extends Parser> getLoaderClass() {
+            return Parser.class;
+        }
+        @Override
         boolean isComposite(Parser loaded) {
             return loaded instanceof CompositeParser;
         }
         @Override
+        boolean isComposite(Class<? extends Parser> loadedClass) {
+            if (CompositeParser.class.isAssignableFrom(loadedClass) ||
+                ParserDecorator.class.isAssignableFrom(loadedClass)) {
+                return true;
+            }
+            return false;
+        }
+        @Override
         CompositeParser createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
             return getDefaultParser(mimeTypes, loader);
         }
@@ -644,17 +555,83 @@ public class TikaConfig {
         CompositeParser createComposite(List<Parser> parsers, MimeTypes mimeTypes, ServiceLoader loader) {
             MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
             return new CompositeParser(registry, parsers);
-        }        
+        }
+        @Override
+        Parser createComposite(Class<? extends Parser> parserClass,
+                List<Parser> childParsers, Set<Class<? extends Parser>> excludeParsers,
+                MimeTypes mimeTypes, ServiceLoader loader) 
+                throws InvocationTargetException, IllegalAccessException, InstantiationException {
+            Parser parser = null;
+            Constructor<? extends Parser> c = null;
+            MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
+            
+            // Try the possible parser constructors
+            if (parser == null) {
+                try {
+                    c = parserClass.getConstructor(MediaTypeRegistry.class, ServiceLoader.class, Collection.class);
+                    parser = c.newInstance(registry, loader, excludeParsers);
+                } 
+                catch (NoSuchMethodException me) {}
+            }
+            if (parser == null) {
+                try {
+                    c = parserClass.getConstructor(MediaTypeRegistry.class, List.class, Collection.class);
+                    parser = c.newInstance(registry, childParsers, excludeParsers);
+                } catch (NoSuchMethodException me) {}
+            }
+            
+            // Create as a Parser Decorator
+            if (parser == null && ParserDecorator.class.isAssignableFrom(parserClass)) {
+                try {
+                    CompositeParser cp = null;
+                    if (childParsers.size() == 1 && excludeParsers.size() == 0 &&
+                            childParsers.get(0) instanceof CompositeParser) {
+                        cp = (CompositeParser)childParsers.get(0);
+                    } else {
+                        cp = new CompositeParser(registry, childParsers, excludeParsers);
+                    }
+                    c = parserClass.getConstructor(Parser.class);
+                    parser = c.newInstance(cp);
+                } catch (NoSuchMethodException me) {}
+            }
+            return parser;
+        }
+        @Override
+        Parser decorate(Parser created, Element element) throws IOException, TikaException {
+            Parser parser = created;
+            
+            // Is there an explicit list of mime types for this to handle?
+            Set<MediaType> parserTypes = mediaTypesListFromDomElement(element, "mime");
+            if (! parserTypes.isEmpty()) {
+                parser = ParserDecorator.withTypes(parser, parserTypes);
+            }
+            // Is there an explicit list of mime types this shouldn't handle?
+            Set<MediaType> parserExclTypes = mediaTypesListFromDomElement(element, "mime-exclude");
+            if (! parserExclTypes.isEmpty()) {
+                parser = ParserDecorator.withoutTypes(parser, parserExclTypes);
+            }
+            
+            // All done with decoration
+            return parser;
+        }
     }
     private static class DetectorXmlLoader extends XmlLoader<CompositeDetector,Detector> {
         String getParentTagName() { return "detectors"; }
         String getLoaderTagName() { return "detector"; }
         
         @Override
+        Class<? extends Detector> getLoaderClass() {
+            return Detector.class;
+        }
+        @Override
         boolean isComposite(Detector loaded) {
             return loaded instanceof CompositeDetector;
         }
         @Override
+        boolean isComposite(Class<? extends Detector> loadedClass) {
+            return CompositeDetector.class.isAssignableFrom(loadedClass);
+        }
+        @Override
         CompositeDetector createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
             return getDefaultDetector(mimeTypes, loader);
         }
@@ -662,6 +639,20 @@ public class TikaConfig {
         CompositeDetector createComposite(List<Detector> detectors, MimeTypes mimeTypes, ServiceLoader loader) {
             MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
             return new CompositeDetector(registry, detectors);
-        }        
+        }
+        @Override
+        Detector createComposite(Class<? extends Detector> compositeClass,
+                List<Detector> children,
+                Set<Class<? extends Detector>> excludeChildren,
+                MimeTypes mimeTypes, ServiceLoader loader)
+                throws InvocationTargetException, IllegalAccessException,
+                InstantiationException {
+            // TODO Implement properly
+            return compositeClass.newInstance();
+        }
+        @Override
+        Detector decorate(Detector created, Element element) {
+            return created; // No decoration of Detectors
+        }
     }
 }