You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/08/01 16:53:36 UTC

svn commit: r1693713 - in /tika/trunk/tika-core/src: main/java/org/apache/tika/config/TikaConfig.java test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java

Author: nick
Date: Sat Aug  1 14:53:36 2015
New Revision: 1693713

URL: http://svn.apache.org/r1693713
Log:
TIKA-1702 Refactor some of the config parser loading to be more re-usable for detectors, and bring the method signature in line WRT Composite vs not (must always be composite)

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
    tika/trunk/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=1693713&r1=1693712&r2=1693713&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Sat Aug  1 14:53:36 2015
@@ -66,7 +66,7 @@ public class TikaConfig {
         return MimeTypes.getDefaultMimeTypes(loader);
     }
 
-    protected Detector getDefaultDetector(
+    protected CompositeDetector getDefaultDetector(
             MimeTypes types, ServiceLoader loader) {
         return new DefaultDetector(types, loader);
     }
@@ -80,7 +80,7 @@ public class TikaConfig {
         return new DefaultTranslator(loader);
     }
     private final CompositeParser parser;
-    private final Detector detector;
+    private final CompositeDetector detector;
     private final Translator translator;
 
     private final MimeTypes mimeTypes;
@@ -317,6 +317,33 @@ public class TikaConfig {
         }
         return null;
     }
+    private static List<Element> getTopLevelElementChildren(Element element, 
+            String parentName, String childrenName) throws TikaException {
+        // Should be only zero or one <parsers> / <detectors> etc tag
+        NodeList nodes = element.getElementsByTagName(parentName);
+        if (nodes.getLength() > 1) {
+            throw new TikaException("Properties may not contain multiple "+parentName+" entries");
+        }
+        else if (nodes.getLength() == 1) {
+            // Find only the direct child parser/detector objects
+            Node parsersE = nodes.item(0);
+            nodes = parsersE.getChildNodes();
+            List<Element> elements = new ArrayList<Element>();
+            for (int i = 0; i < nodes.getLength(); i++) {
+                Node node = nodes.item(i);
+                if (node instanceof Element) {
+                    Element nodeE = (Element)node;
+                    if (childrenName.equals(nodeE.getTagName())) {
+                        elements.add(nodeE);
+                    }
+                }
+            }
+            return elements;
+        } else {
+            // No elements of this type
+            return Collections.emptyList();
+        }
+    }
 
     private static MimeTypes typesFromDomElement(Element element)
             throws TikaException, IOException {
@@ -333,24 +360,9 @@ public class TikaConfig {
             throws TikaException, IOException {
         List<Parser> parsers = new ArrayList<Parser>();
         
-        // Should be only zero or one <parsers> tag
-        NodeList nodes = element.getElementsByTagName("parsers");
-        if (nodes.getLength() > 1) {
-            throw new TikaException("Properties may not contain multiple Parsers entries");
-        }
-        else if (nodes.getLength() == 1) {
-            // Find only the direct child parser objects
-            Node parsersE = nodes.item(0);
-            nodes = parsersE.getChildNodes();
-            for (int i = 0; i < nodes.getLength(); i++) {
-                Node node = nodes.item(i);
-                if (node instanceof Element) {
-                    Element nodeE = (Element)node;
-                    if ("parser".equals(nodeE.getTagName())) {
-                        parsers.add(parserFromParserDomElement(nodeE, mimeTypes, loader));
-                    }
-                }
-            }
+        // Find the parser children of the parsers tag, if any
+        for (Element pe : getTopLevelElementChildren(element, "parsers", "parser")) {
+            parsers.add(parserFromParserDomElement(pe, mimeTypes, loader));
         }
         
         if (parsers.isEmpty()) {
@@ -500,7 +512,7 @@ public class TikaConfig {
         return Collections.emptySet();
     }
 
-    private static Detector detectorFromDomElement(
+    private static CompositeDetector detectorFromDomElement(
           Element element, MimeTypes mimeTypes, ServiceLoader loader)
           throws TikaException, IOException {
        List<Detector> detectors = new ArrayList<Detector>();

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java?rev=1693713&r1=1693712&r2=1693713&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java Sat Aug  1 14:53:36 2015
@@ -29,14 +29,10 @@ import java.nio.charset.Charset;
 import org.apache.tika.Tika;
 import org.apache.tika.config.ServiceLoader;
 import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.CompositeDetector;
 import org.apache.tika.detect.DefaultProbDetector;
-import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.mime.MediaTypeRegistry;
-import org.apache.tika.mime.MimeTypes;
-import org.apache.tika.mime.ProbabilisticMimeDetectionSelector;
 import org.apache.tika.mime.ProbabilisticMimeDetectionSelector.Builder;
 import org.junit.Before;
 import org.junit.Test;
@@ -54,7 +50,7 @@ public class ProbabilisticMimeDetectionT
             registry = MimeTypes.getDefaultMimeTypes().getMediaTypeRegistry();
             tika = new Tika(new TikaConfig() {
                 @Override
-                protected Detector getDefaultDetector(MimeTypes types,
+                protected CompositeDetector getDefaultDetector(MimeTypes types,
                         ServiceLoader loader) {
                     /*
                      * here is an example with the use of the builder to