You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/08/01 19:53:54 UTC

svn commit: r1693747 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/config/ tika-core/src/main/java/org/apache/tika/language/translate/ tika-parsers/src/test/java/org/apache/tika/config/ tika-parsers/src/test/resources/org/apache/tika/config/

Author: nick
Date: Sat Aug  1 17:53:53 2015
New Revision: 1693747

URL: http://svn.apache.org/r1693747
Log:
Convert Translator config to the new pattern for TIKA-1702, and add unit tests for Translator xml config

Added:
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
    tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-default.xml
    tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty-default.xml
    tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty.xml
Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=1693747&r1=1693746&r2=1693747&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Sat Aug  1 17:53:53 2015
@@ -127,11 +127,12 @@ public class TikaConfig {
             throws TikaException, IOException {
         ParserXmlLoader parserLoader = new ParserXmlLoader();
         DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
+        TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
         
         this.mimeTypes = typesFromDomElement(element);
         this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
         this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
-        this.translator = translatorFromDomElement(element, loader);
+        this.translator = translatorLoader.loadOverall(element, mimeTypes, loader);
     }
 
     /**
@@ -210,11 +211,12 @@ public class TikaConfig {
                 Element element = getBuilder().parse(stream).getDocumentElement();
                 ParserXmlLoader parserLoader = new ParserXmlLoader();
                 DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
+                TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
                 
                 this.mimeTypes = typesFromDomElement(element);
                 this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
                 this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
-                this.translator = translatorFromDomElement(element, loader);
+                this.translator = translatorLoader.loadOverall(element, mimeTypes, loader);
             } catch (SAXException e) {
                 throw new TikaException(
                         "Specified Tika configuration has syntax errors: "
@@ -322,15 +324,24 @@ public class TikaConfig {
     }
     private static List<Element> getTopLevelElementChildren(Element element, 
             String parentName, String childrenName) throws TikaException {
-        // Should be only zero or one <parsers> / <detectors> etc tag
-        NodeList nodes = element.getElementsByTagName(parentName);
-        if (nodes.getLength() > 1) {
-            throw new TikaException("Properties may not contain multiple "+parentName+" entries");
+        Node parentNode = null;
+        if (parentName != null) {
+            // Should be only zero or one <parsers> / <detectors> etc tag
+            NodeList nodes = element.getElementsByTagName(parentName);
+            if (nodes.getLength() > 1) {
+                throw new TikaException("Properties may not contain multiple "+parentName+" entries");
+            }
+            else if (nodes.getLength() == 1) {
+                parentNode = nodes.item(0);
+            }
+        } else {
+            // All children directly on the master element
+            parentNode = element;
         }
-        else if (nodes.getLength() == 1) {
+        
+        if (parentNode != null) {
             // Find only the direct child parser/detector objects
-            Node parsersE = nodes.item(0);
-            nodes = parsersE.getChildNodes();
+            NodeList nodes = parentNode.getChildNodes();
             List<Element> elements = new ArrayList<Element>();
             for (int i = 0; i < nodes.getLength(); i++) {
                 Node node = nodes.item(i);
@@ -383,39 +394,9 @@ public class TikaConfig {
         if (types != null) return types;
         return Collections.emptySet();
     }
-
-    private static Translator translatorFromDomElement(
-            Element element, ServiceLoader loader)
-            throws TikaException, IOException {
-        List<Translator> translators = new ArrayList<Translator>();
-        NodeList nodes = element.getElementsByTagName("translator");
-        for (int i = 0; i < nodes.getLength(); i++) {
-            Element node = (Element) nodes.item(i);
-            String name = node.getAttribute("class");
-
-            try {
-                Class<? extends Translator> translatorClass =
-                        loader.getServiceClass(Translator.class, name);
-                translators.add(translatorClass.newInstance());
-            } catch (ClassNotFoundException e) {
-                throw new TikaException(
-                        "Unable to find a translator class: " + name, e);
-            } catch (IllegalAccessException e) {
-                throw new TikaException(
-                        "Unable to access a translator class: " + name, e);
-            } catch (InstantiationException e) {
-                throw new TikaException(
-                        "Unable to instantiate a translator class: " + name, e);
-            }
-        }
-        if (translators.isEmpty()) {
-            return getDefaultTranslator(loader);
-        } else {
-            return translators.get(0);
-        }
-    }
     
     private static abstract class XmlLoader<CT,T> {
+        abstract boolean supportsComposite();
         abstract String getParentTagName(); // eg parsers
         abstract String getLoaderTagName(); // eg parser
         abstract Class<? extends T> getLoaderClass(); // Generics workaround
@@ -450,6 +431,9 @@ public class TikaConfig {
                     // Single Composite defined, use that
                     return (CT)single;
                 }
+            } else if (! supportsComposite()) {
+                // No composite support, just return the first one
+                return (CT)loaded.get(0);
             }
             // Wrap the defined parsers/detectors up in a Composite
             return createComposite(loaded, mimeTypes, loader);
@@ -529,6 +513,7 @@ public class TikaConfig {
             }        }
     }
     private static class ParserXmlLoader extends XmlLoader<CompositeParser,Parser> {
+        boolean supportsComposite() { return true; }
         String getParentTagName() { return "parsers"; }
         String getLoaderTagName() { return "parser"; }
         
@@ -566,7 +551,7 @@ public class TikaConfig {
             Constructor<? extends Parser> c = null;
             MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
             
-            // Try the possible parser constructors
+            // Try the possible default and composite parser constructors
             if (parser == null) {
                 try {
                     c = parserClass.getConstructor(MediaTypeRegistry.class, ServiceLoader.class, Collection.class);
@@ -623,6 +608,7 @@ public class TikaConfig {
         }
     }
     private static class DetectorXmlLoader extends XmlLoader<CompositeDetector,Detector> {
+        boolean supportsComposite() { return true; }
         String getParentTagName() { return "detectors"; }
         String getLoaderTagName() { return "detector"; }
         
@@ -658,7 +644,7 @@ public class TikaConfig {
             Constructor<? extends Detector> c = null;
             MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
             
-            // Try the possible composite detector constructors
+            // Try the possible default and composite detector constructors
             if (detector == null) {
                 try {
                     c = detectorClass.getConstructor(MimeTypes.class, ServiceLoader.class, Collection.class);
@@ -692,4 +678,40 @@ public class TikaConfig {
             return created; // No decoration of Detectors
         }
     }
+    private static class TranslatorXmlLoader extends XmlLoader<Translator,Translator> {
+        boolean supportsComposite() { return false; }
+        String getParentTagName() { return null; }
+        String getLoaderTagName() { return "translator"; }
+        
+        @Override
+        Class<? extends Translator> getLoaderClass() {
+            return Translator.class;
+        }
+        @Override
+        boolean isComposite(Translator loaded) { return false; }
+        @Override
+        boolean isComposite(Class<? extends Translator> loadedClass) { return false; }
+        @Override
+        Translator createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
+            return getDefaultTranslator(loader);
+        }
+        @Override
+        Translator createComposite(List<Translator> loaded,
+                MimeTypes mimeTypes, ServiceLoader loader) {
+            return loaded.get(0);
+        }
+        @Override
+        Translator createComposite(Class<? extends Translator> compositeClass,
+                List<Translator> children,
+                Set<Class<? extends Translator>> excludeChildren,
+                MimeTypes mimeTypes, ServiceLoader loader)
+                throws InvocationTargetException, IllegalAccessException,
+                InstantiationException {
+            throw new InstantiationException("Only one translator supported");
+        }
+        @Override
+        Translator decorate(Translator created, Element element) {
+            return created; // No decoration of Translators
+        }        
+    }
 }

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java?rev=1693747&r1=1693746&r2=1693747&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java Sat Aug  1 17:53:53 2015
@@ -38,6 +38,9 @@ public class DefaultTranslator implement
     public DefaultTranslator(ServiceLoader loader) {
         this.loader = loader;
     }
+    public DefaultTranslator() {
+        this(new ServiceLoader());
+    }
 
     /**
      * Finds all statically loadable translators and sort the list by name,

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java?rev=1693747&r1=1693746&r2=1693747&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java Sat Aug  1 17:53:53 2015
@@ -31,7 +31,7 @@ import org.junit.Test;
 
 /**
  * Junit test class for {@link TikaConfig}, which cover things
- *  that {@link AbstractTikaConfigTest} can't do due to a need for the
+ *  that {@link TikaConfigTest} can't do due to a need for the
  *  full set of detectors
  */
 public class TikaDetectorConfigTest extends AbstractTikaConfigTest {

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java?rev=1693747&r1=1693746&r2=1693747&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java Sat Aug  1 17:53:53 2015
@@ -37,7 +37,7 @@ import org.junit.Test;
 
 /**
  * Junit test class for {@link TikaConfig}, which cover things
- *  that {@link AbstractTikaConfigTest} can't do due to a need for the
+ *  that {@link TikaConfigTest} can't do due to a need for the
  *  full set of parsers
  */
 public class TikaParserConfigTest extends AbstractTikaConfigTest {

Added: tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java?rev=1693747&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java (added)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java Sat Aug  1 17:53:53 2015
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import org.apache.tika.language.translate.DefaultTranslator;
+import org.apache.tika.language.translate.EmptyTranslator;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link TikaConfig}, which cover things
+ *  that {@link TikaConfigTest} can't do due to a need for the
+ *  full set of translators
+ */
+public class TikaTranslatorConfigTest extends AbstractTikaConfigTest {
+    @Test
+    public void testDefaultBehaviour() throws Exception {
+        TikaConfig config = TikaConfig.getDefaultConfig();
+        assertNotNull(config.getTranslator());
+        assertEquals(DefaultTranslator.class, config.getTranslator().getClass());
+    }
+    
+    @Test
+    public void testRequestsDefault() throws Exception {
+        TikaConfig config = getConfig("TIKA-1702-translator-default.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        assertNotNull(config.getTranslator());
+        
+        assertEquals(DefaultTranslator.class, config.getTranslator().getClass());
+    }
+    
+    @Test
+    public void testRequestsEmpty() throws Exception {
+        TikaConfig config = getConfig("TIKA-1702-translator-empty.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        assertNotNull(config.getTranslator());
+        
+        assertEquals(EmptyTranslator.class, config.getTranslator().getClass());
+    }
+    
+    /**
+     * Currently, Translators don't support Composites, so
+     *  if multiple translators are given, only the first wins
+     */
+    @Test
+    public void testRequestsMultiple() throws Exception {
+        TikaConfig config = getConfig("TIKA-1702-translator-empty-default.xml");
+        assertNotNull(config.getParser());
+        assertNotNull(config.getDetector());
+        assertNotNull(config.getTranslator());
+        
+        assertEquals(EmptyTranslator.class, config.getTranslator().getClass());
+    }
+}

Added: tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-default.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-default.xml?rev=1693747&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-default.xml (added)
+++ tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-default.xml Sat Aug  1 17:53:53 2015
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <!-- Explicitly request default parsers and translators -->
+  <parsers/>
+  <detectors/>
+  <!-- Explicitly request the default Translator -->
+  <translator class="org.apache.tika.language.translate.DefaultTranslator"/>
+</properties>

Added: tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty-default.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty-default.xml?rev=1693747&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty-default.xml (added)
+++ tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty-default.xml Sat Aug  1 17:53:53 2015
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <!-- As Translators don't support Composites, Empty used -->
+  <translator class="org.apache.tika.language.translate.EmptyTranslator"/>
+  <translator class="org.apache.tika.language.translate.DefaultTranslator"/>
+</properties>

Added: tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty.xml?rev=1693747&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty.xml (added)
+++ tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty.xml Sat Aug  1 17:53:53 2015
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <translator class="org.apache.tika.language.translate.EmptyTranslator"/>
+</properties>