You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/08/01 18:22:28 UTC

svn commit: r1693721 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/config/TikaConfig.java tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java

Author: nick
Date: Sat Aug  1 16:22:28 2015
New Revision: 1693721

URL: http://svn.apache.org/r1693721
Log:
TIKA-1702 Start moving to a loader class pattern for common Detector and Parser (+later others)

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=1693721&r1=1693720&r2=1693721&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Sat Aug  1 16:22:28 2015
@@ -125,9 +125,12 @@ public class TikaConfig {
 
     private TikaConfig(Element element, ServiceLoader loader)
             throws TikaException, IOException {
+        ParserXmlLoader parserLoader = new ParserXmlLoader();
+        DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
+        
         this.mimeTypes = typesFromDomElement(element);
         this.detector = detectorFromDomElement(element, mimeTypes, loader);
-        this.parser = parserFromDomElement(element, mimeTypes, loader);
+        this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
         this.translator = translatorFromDomElement(element, loader);
     }
 
@@ -204,11 +207,12 @@ public class TikaConfig {
             }
 
             try {
-                Element element =
-                        getBuilder().parse(stream).getDocumentElement();
+                Element element = getBuilder().parse(stream).getDocumentElement();
+                ParserXmlLoader parserLoader = new ParserXmlLoader();
+                DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
+                
                 this.mimeTypes = typesFromDomElement(element);
-                this.parser =
-                        parserFromDomElement(element, mimeTypes, loader);
+                this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
                 this.detector =
                         detectorFromDomElement(element, mimeTypes, loader);
                 this.translator = translatorFromDomElement(element, loader);
@@ -355,28 +359,28 @@ public class TikaConfig {
         }
     }
 
-    private static CompositeParser parserFromDomElement(
-            Element element, MimeTypes mimeTypes, ServiceLoader loader)
-            throws TikaException, IOException {
-        List<Parser> parsers = new ArrayList<Parser>();
-        
-        // Find the parser children of the parsers tag, if any
-        for (Element pe : getTopLevelElementChildren(element, "parsers", "parser")) {
-            parsers.add(parserFromParserDomElement(pe, mimeTypes, loader));
-        }
-        
-        if (parsers.isEmpty()) {
-            // No parsers defined, create a DefaultParser
-            return getDefaultParser(mimeTypes, loader);
-        } else if (parsers.size() == 1 && parsers.get(0) instanceof CompositeParser) {
-            // Single Composite defined, use that
-            return (CompositeParser)parsers.get(0);
-        } else {
-            // Wrap the defined parsers up in a Composite
-            MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
-            return new CompositeParser(registry, parsers);
-        }
-    }
+//    private static CompositeParser parserFromDomElement(
+//            Element element, MimeTypes mimeTypes, ServiceLoader loader)
+//            throws TikaException, IOException {
+//        List<Parser> parsers = new ArrayList<Parser>();
+//        
+//        // Find the parser children of the parsers tag, if any
+//        for (Element pe : getTopLevelElementChildren(element, "parsers", "parser")) {
+//            parsers.add(parserFromParserDomElement(pe, mimeTypes, loader));
+//        }
+//        
+//        if (parsers.isEmpty()) {
+//            // No parsers defined, create a DefaultParser
+//            return getDefaultParser(mimeTypes, loader);
+//        } else if (parsers.size() == 1 && parsers.get(0) instanceof CompositeParser) {
+//            // Single Composite defined, use that
+//            return (CompositeParser)parsers.get(0);
+//        } else {
+//            // Wrap the defined parsers up in a Composite
+//            MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
+//            return new CompositeParser(registry, parsers);
+//        }
+//    }
     private static Parser parserFromParserDomElement(
             Element parserNode, MimeTypes mimeTypes, ServiceLoader loader)
             throws TikaException, IOException {
@@ -585,4 +589,79 @@ public class TikaConfig {
             return translators.get(0);
         }
     }
+    
+    private static abstract class XmlLoader<CT,T> {
+        abstract String getParentTagName(); // eg parsers
+        abstract String getLoaderTagName(); // eg parser
+        abstract boolean isComposite(T loaded);
+        abstract CT createDefault(MimeTypes mimeTypes, ServiceLoader loader);
+        abstract CT createComposite(List<T> loaded, MimeTypes mimeTypes, ServiceLoader loader);
+        
+        @SuppressWarnings("unchecked")
+        CT loadOverall(Element element, MimeTypes mimeTypes, 
+                ServiceLoader loader) throws TikaException, IOException {
+            List<T> loaded = new ArrayList<T>();
+            
+            // Find the children of the parent tag, if any
+            for (Element le : getTopLevelElementChildren(element, getParentTagName(), getLoaderTagName())) {
+                loaded.add(loadOne(le, mimeTypes, loader));
+            }
+            
+            // Build the classes, and wrap as needed
+            if (loaded.isEmpty()) {
+                // Nothing defined, create a Default
+                return createDefault(mimeTypes, loader);
+            } else if (loaded.size() == 1) {
+                T single = loaded.get(0);
+                if (isComposite(single)) {
+                    // Single Composite defined, use that
+                    return (CT)single;
+                }
+            }
+            // Wrap the defined parsers/detectors up in a Composite
+            return createComposite(loaded, mimeTypes, loader);
+        }
+        T loadOne(Element element, MimeTypes mimeTypes, 
+                ServiceLoader loader) throws TikaException, IOException {
+            // TODO Do this properly
+            // TODO This is a cheat for parsers only!
+            return (T)parserFromParserDomElement(element, mimeTypes, loader);
+        }
+    }
+    private static class ParserXmlLoader extends XmlLoader<CompositeParser,Parser> {
+        String getParentTagName() { return "parsers"; }
+        String getLoaderTagName() { return "parser"; }
+        
+        @Override
+        boolean isComposite(Parser loaded) {
+            return loaded instanceof CompositeParser;
+        }
+        @Override
+        CompositeParser createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
+            return getDefaultParser(mimeTypes, loader);
+        }
+        @Override
+        CompositeParser createComposite(List<Parser> parsers, MimeTypes mimeTypes, ServiceLoader loader) {
+            MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
+            return new CompositeParser(registry, parsers);
+        }        
+    }
+    private static class DetectorXmlLoader extends XmlLoader<CompositeDetector,Detector> {
+        String getParentTagName() { return "detectors"; }
+        String getLoaderTagName() { return "detector"; }
+        
+        @Override
+        boolean isComposite(Detector loaded) {
+            return loaded instanceof CompositeDetector;
+        }
+        @Override
+        CompositeDetector createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
+            return getDefaultDetector(mimeTypes, loader);
+        }
+        @Override
+        CompositeDetector createComposite(List<Detector> detectors, MimeTypes mimeTypes, ServiceLoader loader) {
+            MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
+            return new CompositeDetector(registry, detectors);
+        }        
+    }
 }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java?rev=1693721&r1=1693720&r2=1693721&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java Sat Aug  1 16:22:28 2015
@@ -37,7 +37,7 @@ import org.junit.Test;
  */
 public class TikaDetectorConfigTest extends AbstractTikaConfigTest {
     @Test
-    @Ignore // TODO Finish support
+    @Ignore // TODO Work on TIKA-1702 in progress
     public void testDetectorExcludeFromDefault() throws Exception {
         TikaConfig config = getConfig("TIKA-1702-detector-blacklist.xml");
         assertNotNull(config.getParser());