You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by th...@apache.org on 2016/06/02 03:16:06 UTC
[07/12] tika git commit: Added support for type for runtime parameters
Added support for type for runtime parameters
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/01869923
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/01869923
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/01869923
Branch: refs/heads/TIKA-1508
Commit: 01869923533b330ec7728995e3ee5feceee1b90e
Parents: b64612d
Author: Thamme Gowda <tg...@gmail.com>
Authored: Wed May 25 17:18:25 2016 -0700
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Wed May 25 17:18:25 2016 -0700
----------------------------------------------------------------------
.../java/org/apache/tika/base/Configurable.java | 3 +-
.../main/java/org/apache/tika/config/Param.java | 191 +++++++++++++++++++
.../java/org/apache/tika/config/TikaConfig.java | 12 +-
.../org/apache/tika/parser/AbstractParser.java | 3 +-
.../org/apache/tika/parser/ParseContext.java | 7 +-
.../java/org/apache/tika/config/ParamTest.java | 71 +++++++
.../tika/parser/DummyConfigurableParser.java | 5 +-
.../tika/config/TIKA-1508-configurable.xml | 2 +-
8 files changed, 283 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/base/Configurable.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/base/Configurable.java b/tika-core/src/main/java/org/apache/tika/base/Configurable.java
index 4e6418d..f1eb91a 100644
--- a/tika-core/src/main/java/org/apache/tika/base/Configurable.java
+++ b/tika-core/src/main/java/org/apache/tika/base/Configurable.java
@@ -16,6 +16,7 @@
*/
package org.apache.tika.base;
+import org.apache.tika.config.Param;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.parser.ParseContext;
@@ -39,5 +40,5 @@ public interface Configurable {
* Gets parameters of this configurable instance
* @return parameters in the form of a map of key value pairs
*/
- Map<String, String> getParams();
+ Map<String, Param<?>> getParams();
}
http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/config/Param.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/Param.java b/tika-core/src/main/java/org/apache/tika/config/Param.java
new file mode 100644
index 0000000..b54f6be
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/config/Param.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import org.w3c.dom.Node;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBException;
+import javax.xml.bind.Marshaller;
+import javax.xml.bind.Unmarshaller;
+import javax.xml.bind.annotation.*;
+import javax.xml.bind.helpers.DefaultValidationEventHandler;
+import java.io.File;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.math.BigInteger;
+import java.net.URI;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
+
+
+/**
+ * This is a JAXB serializable model class for parameters from configuration file.
+ *
+ * @param <T> value type. Should be serializable to string and have a constructor with string param
+ * @since Apache Tika 1.14
+ */
+@XmlRootElement()
+@XmlAccessorType(XmlAccessType.NONE)
+public class Param<T> implements Serializable {
+
+ private static final JAXBContext JAXB_CTX;
+ private static final Marshaller MARSHALLER;
+ private static final Unmarshaller UNMARSHALLER;
+ private static final Map<Class<?>, String> map = new HashMap<>();
+ private static final Map<String, Class<?>> reverseMap = new HashMap<>();
+
+ static {
+ map.put(Boolean.class, "bool");
+ map.put(String.class, "string");
+ map.put(Byte.class, "byte");
+ map.put(Short.class, "short");
+ map.put(Integer.class, "int");
+ map.put(Long.class, "long");
+ map.put(BigInteger.class, "bigint");
+ map.put(Float.class, "float");
+ map.put(Double.class, "double");
+ map.put(File.class, "file");
+ map.put(URI.class, "uri");
+ map.put(URL.class, "url");
+ for (Map.Entry<Class<?>, String> entry : map.entrySet()) {
+ reverseMap.put(entry.getValue(), entry.getKey());
+ }
+ try {
+ JAXB_CTX = JAXBContext.newInstance(Param.class);
+ MARSHALLER = JAXB_CTX.createMarshaller();
+ MARSHALLER.setEventHandler(new DefaultValidationEventHandler());
+ UNMARSHALLER = JAXB_CTX.createUnmarshaller();
+ UNMARSHALLER.setEventHandler(new DefaultValidationEventHandler());
+ } catch (JAXBException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @XmlTransient
+ private Class<T> type;
+
+ @XmlAttribute(name = "name")
+ private String name;
+
+ @XmlValue()
+ private String value;
+
+ @XmlTransient
+ private T actualValue;
+
+ public Param(){
+ }
+
+ public Param(String name, Class<T> type, T value){
+ this.name = name;
+ this.type = type;
+ this.value = value.toString();
+ }
+
+ public Param(String name, T value){
+ this(name, (Class<T>) value.getClass(), value);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ @XmlTransient
+ public Class<T> getType() {
+ return type;
+ }
+
+ public void setType(Class<T> type) {
+ this.type = type;
+ }
+
+ @XmlAttribute(name = "type")
+ public String getTypeString(){
+ if (type == null) {
+ return null;
+ }
+ if (map.containsKey(type)){
+ return map.get(type);
+ }
+ return type.getName();
+ }
+
+ public void setTypeString(String type){
+ if (type == null || type.isEmpty()){
+ return;
+ }
+ if (reverseMap.containsKey(type)){
+ this.type = (Class<T>) reverseMap.get(type);
+ } else try {
+ this.type = (Class<T>) Class.forName(type);
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ this.actualValue = null;
+ }
+
+ @XmlTransient
+ public T getValue(){
+ if (actualValue == null) {
+ try {
+ Constructor<T> constructor = type.getConstructor(String.class);
+ constructor.setAccessible(true);
+ this.actualValue = constructor.newInstance(value);
+ } catch (NoSuchMethodException e) {
+ throw new RuntimeException(type + " doesnt have a constructor that takes String arg", e);
+ } catch (IllegalAccessException | InstantiationException | InvocationTargetException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ return actualValue;
+ }
+
+ @Override
+ public String toString() {
+ return "Param{" +
+ "name='" + name + '\'' +
+ ", value='" + value + '\'' +
+ ", actualValue=" + actualValue +
+ '}';
+ }
+
+ public void save(OutputStream stream) throws JAXBException {
+ MARSHALLER.marshal(this, stream);
+ }
+
+ public void save(Node node) throws JAXBException {
+ MARSHALLER.marshal(this, node);
+ }
+
+ public static <T> Param<T> load(InputStream stream) throws JAXBException {
+ return (Param<T>) UNMARSHALLER.unmarshal(stream);
+ }
+
+ public static <T> Param<T> load(Node node) throws JAXBException {
+ return (Param<T>) UNMARSHALLER.unmarshal(node);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 896b51b..17b735e 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -17,6 +17,7 @@
package org.apache.tika.config;
import javax.imageio.spi.ServiceRegistry;
+import javax.xml.bind.JAXBException;
import javax.xml.parsers.DocumentBuilder;
import java.io.File;
import java.io.IOException;
@@ -598,8 +599,8 @@ public class TikaConfig {
* @param el xml node which has {@link #PARAMS_TAG_NAME} child
* @return Map of key values read from xml
*/
- Map<String, String> getParams(Element el){
- Map<String, String> params = new HashMap<>();
+ Map<String, Param<?>> getParams(Element el){
+ Map<String, Param<?>> params = new HashMap<>();
for (Node child = el.getFirstChild(); child != null;
child = child.getNextSibling()){
if (PARAMS_TAG_NAME.equals(child.getNodeName())){ //found the node
@@ -608,7 +609,12 @@ public class TikaConfig {
for (int i = 0; i < childNodes.getLength(); i++) {
Node item = childNodes.item(i);
if (item.getNodeType() == Node.ELEMENT_NODE){
- params.put(item.getNodeName().trim(), item.getTextContent().trim());
+ try {
+ Param<?> param = Param.load(item);
+ params.put(param.getName(), param);
+ } catch (JAXBException e) {
+ throw new RuntimeException(e);
+ }
}
}
}
http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
index 00fac7b..5c045db 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
@@ -21,6 +21,7 @@ import java.io.InputStream;
import java.util.Map;
import java.util.Properties;
+import org.apache.tika.config.Param;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -80,7 +81,7 @@ public abstract class AbstractParser implements ConfigurableParser {
* @since Apache Tika 1.14
*/
@Override
- public Map<String, String> getParams() {
+ public Map<String, Param<?>> getParams() {
return this.context.getParams();
}
}
http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
index e58f5c8..c47bbec 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
@@ -32,6 +32,7 @@ import java.lang.reflect.Method;
import java.util.HashMap;
import java.util.Map;
+import org.apache.tika.config.Param;
import org.apache.tika.exception.TikaException;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
@@ -57,7 +58,7 @@ public class ParseContext implements Serializable {
/**
* Map of configurable arguments
*/
- private final Map<String, String> params = new HashMap<>();
+ private final Map<String, Param<?>> params = new HashMap<>();
private static final EntityResolver IGNORING_SAX_ENTITY_RESOLVER = new EntityResolver() {
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
@@ -205,7 +206,7 @@ public class ParseContext implements Serializable {
* @param key parameter name
* @param value value
*/
- public void setParam(String key, String value){
+ public void setParam(String key, Param<?> value){
this.params.put(key, value);
}
@@ -221,7 +222,7 @@ public class ParseContext implements Serializable {
* Gets all the params
* @return map of key values
*/
- public Map<String, String> getParams() {
+ public Map<String, Param<?>> getParams() {
return params;
}
http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/test/java/org/apache/tika/config/ParamTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/config/ParamTest.java b/tika-core/src/test/java/org/apache/tika/config/ParamTest.java
new file mode 100644
index 0000000..7c9007e
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/config/ParamTest.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.math.BigInteger;
+import java.net.URI;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import static org.junit.Assert.*;
+
+public class ParamTest {
+
+ @Test
+ public void testSaveAndLoad() throws Exception {
+
+ Object objects [] = {
+ Integer.MAX_VALUE,
+ 2.5f,
+ 4000.57576,
+ true,
+ false,
+ Long.MAX_VALUE,
+ "Hello this is a boring string",
+ new URL("http://apache.org"),
+ new URI("tika://org.apache.tika.ner.parser?impl=xyz"),
+ new BigInteger(Long.MAX_VALUE + "").add(new BigInteger(Long.MAX_VALUE + "")),
+ new File("."),
+ };
+
+ for (Object object : objects) {
+ String name = "name" + System.currentTimeMillis();
+ Param<?> param = new Param<>(name, object);
+ ByteArrayOutputStream stream = new ByteArrayOutputStream();
+ param.save(stream);
+ ByteArrayInputStream inStream = new ByteArrayInputStream(stream.toByteArray());
+ stream.close();
+ inStream.close();
+ Param<?> loaded = Param.load(inStream);
+ assertEquals(param.getName(), loaded.getName());
+ assertEquals(param.getTypeString(), loaded.getTypeString());
+ assertEquals(param.getType(), loaded.getType());
+ assertEquals(param.getValue(), loaded.getValue());
+
+ assertEquals(loaded.getValue(), object);
+ assertEquals(loaded.getName(), name);
+ assertEquals(loaded.getType(), object.getClass());
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
index b8775e0..5a874ac 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
@@ -16,6 +16,7 @@
*/
package org.apache.tika.parser;
+import org.apache.tika.config.Param;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
@@ -56,8 +57,8 @@ public class DummyConfigurableParser extends AbstractParser {
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
- for (Map.Entry<String, String> entry : getParams().entrySet()) {
- metadata.add(entry.getKey(), entry.getValue());
+ for (Map.Entry<String, Param<?>> entry : getParams().entrySet()) {
+ metadata.add(entry.getKey(), entry.getValue().getValue().toString());
}
}
http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
index 999cb45..37c71c9 100644
--- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
@@ -19,7 +19,7 @@
<parsers>
<parser class="org.apache.tika.parser.DummyConfigurableParser">
<params>
- <testparam>testparamval</testparam>
+ <param name="testparam" type="string">testparamval</param>
</params>
</parser>