You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2015/08/01 19:53:54 UTC
svn commit: r1693747 - in /tika/trunk:
tika-core/src/main/java/org/apache/tika/config/
tika-core/src/main/java/org/apache/tika/language/translate/
tika-parsers/src/test/java/org/apache/tika/config/
tika-parsers/src/test/resources/org/apache/tika/config/
Author: nick
Date: Sat Aug 1 17:53:53 2015
New Revision: 1693747
URL: http://svn.apache.org/r1693747
Log:
Convert Translator config to the new pattern for TIKA-1702, and add unit tests for Translator xml config
Added:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-default.xml
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty-default.xml
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty.xml
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=1693747&r1=1693746&r2=1693747&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Sat Aug 1 17:53:53 2015
@@ -127,11 +127,12 @@ public class TikaConfig {
throws TikaException, IOException {
ParserXmlLoader parserLoader = new ParserXmlLoader();
DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
+ TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
this.mimeTypes = typesFromDomElement(element);
this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
- this.translator = translatorFromDomElement(element, loader);
+ this.translator = translatorLoader.loadOverall(element, mimeTypes, loader);
}
/**
@@ -210,11 +211,12 @@ public class TikaConfig {
Element element = getBuilder().parse(stream).getDocumentElement();
ParserXmlLoader parserLoader = new ParserXmlLoader();
DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
+ TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
this.mimeTypes = typesFromDomElement(element);
this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
- this.translator = translatorFromDomElement(element, loader);
+ this.translator = translatorLoader.loadOverall(element, mimeTypes, loader);
} catch (SAXException e) {
throw new TikaException(
"Specified Tika configuration has syntax errors: "
@@ -322,15 +324,24 @@ public class TikaConfig {
}
private static List<Element> getTopLevelElementChildren(Element element,
String parentName, String childrenName) throws TikaException {
- // Should be only zero or one <parsers> / <detectors> etc tag
- NodeList nodes = element.getElementsByTagName(parentName);
- if (nodes.getLength() > 1) {
- throw new TikaException("Properties may not contain multiple "+parentName+" entries");
+ Node parentNode = null;
+ if (parentName != null) {
+ // Should be only zero or one <parsers> / <detectors> etc tag
+ NodeList nodes = element.getElementsByTagName(parentName);
+ if (nodes.getLength() > 1) {
+ throw new TikaException("Properties may not contain multiple "+parentName+" entries");
+ }
+ else if (nodes.getLength() == 1) {
+ parentNode = nodes.item(0);
+ }
+ } else {
+ // All children directly on the master element
+ parentNode = element;
}
- else if (nodes.getLength() == 1) {
+
+ if (parentNode != null) {
// Find only the direct child parser/detector objects
- Node parsersE = nodes.item(0);
- nodes = parsersE.getChildNodes();
+ NodeList nodes = parentNode.getChildNodes();
List<Element> elements = new ArrayList<Element>();
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
@@ -383,39 +394,9 @@ public class TikaConfig {
if (types != null) return types;
return Collections.emptySet();
}
-
- private static Translator translatorFromDomElement(
- Element element, ServiceLoader loader)
- throws TikaException, IOException {
- List<Translator> translators = new ArrayList<Translator>();
- NodeList nodes = element.getElementsByTagName("translator");
- for (int i = 0; i < nodes.getLength(); i++) {
- Element node = (Element) nodes.item(i);
- String name = node.getAttribute("class");
-
- try {
- Class<? extends Translator> translatorClass =
- loader.getServiceClass(Translator.class, name);
- translators.add(translatorClass.newInstance());
- } catch (ClassNotFoundException e) {
- throw new TikaException(
- "Unable to find a translator class: " + name, e);
- } catch (IllegalAccessException e) {
- throw new TikaException(
- "Unable to access a translator class: " + name, e);
- } catch (InstantiationException e) {
- throw new TikaException(
- "Unable to instantiate a translator class: " + name, e);
- }
- }
- if (translators.isEmpty()) {
- return getDefaultTranslator(loader);
- } else {
- return translators.get(0);
- }
- }
private static abstract class XmlLoader<CT,T> {
+ abstract boolean supportsComposite();
abstract String getParentTagName(); // eg parsers
abstract String getLoaderTagName(); // eg parser
abstract Class<? extends T> getLoaderClass(); // Generics workaround
@@ -450,6 +431,9 @@ public class TikaConfig {
// Single Composite defined, use that
return (CT)single;
}
+ } else if (! supportsComposite()) {
+ // No composite support, just return the first one
+ return (CT)loaded.get(0);
}
// Wrap the defined parsers/detectors up in a Composite
return createComposite(loaded, mimeTypes, loader);
@@ -529,6 +513,7 @@ public class TikaConfig {
} }
}
private static class ParserXmlLoader extends XmlLoader<CompositeParser,Parser> {
+ boolean supportsComposite() { return true; }
String getParentTagName() { return "parsers"; }
String getLoaderTagName() { return "parser"; }
@@ -566,7 +551,7 @@ public class TikaConfig {
Constructor<? extends Parser> c = null;
MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
- // Try the possible parser constructors
+ // Try the possible default and composite parser constructors
if (parser == null) {
try {
c = parserClass.getConstructor(MediaTypeRegistry.class, ServiceLoader.class, Collection.class);
@@ -623,6 +608,7 @@ public class TikaConfig {
}
}
private static class DetectorXmlLoader extends XmlLoader<CompositeDetector,Detector> {
+ boolean supportsComposite() { return true; }
String getParentTagName() { return "detectors"; }
String getLoaderTagName() { return "detector"; }
@@ -658,7 +644,7 @@ public class TikaConfig {
Constructor<? extends Detector> c = null;
MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
- // Try the possible composite detector constructors
+ // Try the possible default and composite detector constructors
if (detector == null) {
try {
c = detectorClass.getConstructor(MimeTypes.class, ServiceLoader.class, Collection.class);
@@ -692,4 +678,40 @@ public class TikaConfig {
return created; // No decoration of Detectors
}
}
+ private static class TranslatorXmlLoader extends XmlLoader<Translator,Translator> {
+ boolean supportsComposite() { return false; }
+ String getParentTagName() { return null; }
+ String getLoaderTagName() { return "translator"; }
+
+ @Override
+ Class<? extends Translator> getLoaderClass() {
+ return Translator.class;
+ }
+ @Override
+ boolean isComposite(Translator loaded) { return false; }
+ @Override
+ boolean isComposite(Class<? extends Translator> loadedClass) { return false; }
+ @Override
+ Translator createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
+ return getDefaultTranslator(loader);
+ }
+ @Override
+ Translator createComposite(List<Translator> loaded,
+ MimeTypes mimeTypes, ServiceLoader loader) {
+ return loaded.get(0);
+ }
+ @Override
+ Translator createComposite(Class<? extends Translator> compositeClass,
+ List<Translator> children,
+ Set<Class<? extends Translator>> excludeChildren,
+ MimeTypes mimeTypes, ServiceLoader loader)
+ throws InvocationTargetException, IllegalAccessException,
+ InstantiationException {
+ throw new InstantiationException("Only one translator supported");
+ }
+ @Override
+ Translator decorate(Translator created, Element element) {
+ return created; // No decoration of Translators
+ }
+ }
}
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java?rev=1693747&r1=1693746&r2=1693747&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/language/translate/DefaultTranslator.java Sat Aug 1 17:53:53 2015
@@ -38,6 +38,9 @@ public class DefaultTranslator implement
public DefaultTranslator(ServiceLoader loader) {
this.loader = loader;
}
+ public DefaultTranslator() {
+ this(new ServiceLoader());
+ }
/**
* Finds all statically loadable translators and sort the list by name,
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java?rev=1693747&r1=1693746&r2=1693747&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java Sat Aug 1 17:53:53 2015
@@ -31,7 +31,7 @@ import org.junit.Test;
/**
* Junit test class for {@link TikaConfig}, which cover things
- * that {@link AbstractTikaConfigTest} can't do due to a need for the
+ * that {@link TikaConfigTest} can't do due to a need for the
* full set of detectors
*/
public class TikaDetectorConfigTest extends AbstractTikaConfigTest {
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java?rev=1693747&r1=1693746&r2=1693747&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java Sat Aug 1 17:53:53 2015
@@ -37,7 +37,7 @@ import org.junit.Test;
/**
* Junit test class for {@link TikaConfig}, which cover things
- * that {@link AbstractTikaConfigTest} can't do due to a need for the
+ * that {@link TikaConfigTest} can't do due to a need for the
* full set of parsers
*/
public class TikaParserConfigTest extends AbstractTikaConfigTest {
Added: tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java?rev=1693747&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java (added)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaTranslatorConfigTest.java Sat Aug 1 17:53:53 2015
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import org.apache.tika.language.translate.DefaultTranslator;
+import org.apache.tika.language.translate.EmptyTranslator;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link TikaConfig}, which cover things
+ * that {@link TikaConfigTest} can't do due to a need for the
+ * full set of translators
+ */
+public class TikaTranslatorConfigTest extends AbstractTikaConfigTest {
+ @Test
+ public void testDefaultBehaviour() throws Exception {
+ TikaConfig config = TikaConfig.getDefaultConfig();
+ assertNotNull(config.getTranslator());
+ assertEquals(DefaultTranslator.class, config.getTranslator().getClass());
+ }
+
+ @Test
+ public void testRequestsDefault() throws Exception {
+ TikaConfig config = getConfig("TIKA-1702-translator-default.xml");
+ assertNotNull(config.getParser());
+ assertNotNull(config.getDetector());
+ assertNotNull(config.getTranslator());
+
+ assertEquals(DefaultTranslator.class, config.getTranslator().getClass());
+ }
+
+ @Test
+ public void testRequestsEmpty() throws Exception {
+ TikaConfig config = getConfig("TIKA-1702-translator-empty.xml");
+ assertNotNull(config.getParser());
+ assertNotNull(config.getDetector());
+ assertNotNull(config.getTranslator());
+
+ assertEquals(EmptyTranslator.class, config.getTranslator().getClass());
+ }
+
+ /**
+ * Currently, Translators don't support Composites, so
+ * if multiple translators are given, only the first wins
+ */
+ @Test
+ public void testRequestsMultiple() throws Exception {
+ TikaConfig config = getConfig("TIKA-1702-translator-empty-default.xml");
+ assertNotNull(config.getParser());
+ assertNotNull(config.getDetector());
+ assertNotNull(config.getTranslator());
+
+ assertEquals(EmptyTranslator.class, config.getTranslator().getClass());
+ }
+}
Added: tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-default.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-default.xml?rev=1693747&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-default.xml (added)
+++ tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-default.xml Sat Aug 1 17:53:53 2015
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <!-- Explicitly request default parsers and translators -->
+ <parsers/>
+ <detectors/>
+ <!-- Explicitly request the default Translator -->
+ <translator class="org.apache.tika.language.translate.DefaultTranslator"/>
+</properties>
Added: tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty-default.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty-default.xml?rev=1693747&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty-default.xml (added)
+++ tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty-default.xml Sat Aug 1 17:53:53 2015
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <!-- As Translators don't support Composites, Empty used -->
+ <translator class="org.apache.tika.language.translate.EmptyTranslator"/>
+ <translator class="org.apache.tika.language.translate.DefaultTranslator"/>
+</properties>
Added: tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty.xml?rev=1693747&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty.xml (added)
+++ tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty.xml Sat Aug 1 17:53:53 2015
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <translator class="org.apache.tika.language.translate.EmptyTranslator"/>
+</properties>