You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by th...@apache.org on 2016/06/02 03:16:06 UTC

[07/12] tika git commit: Added support for type for runtime parameters

Added support for type for runtime parameters

Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/01869923
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/01869923
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/01869923

Branch: refs/heads/TIKA-1508
Commit: 01869923533b330ec7728995e3ee5feceee1b90e
Parents: b64612d
Author: Thamme Gowda <tg...@gmail.com>
Authored: Wed May 25 17:18:25 2016 -0700
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Wed May 25 17:18:25 2016 -0700

----------------------------------------------------------------------
 .../java/org/apache/tika/base/Configurable.java |   3 +-
 .../main/java/org/apache/tika/config/Param.java | 191 +++++++++++++++++++
 .../java/org/apache/tika/config/TikaConfig.java |  12 +-
 .../org/apache/tika/parser/AbstractParser.java  |   3 +-
 .../org/apache/tika/parser/ParseContext.java    |   7 +-
 .../java/org/apache/tika/config/ParamTest.java  |  71 +++++++
 .../tika/parser/DummyConfigurableParser.java    |   5 +-
 .../tika/config/TIKA-1508-configurable.xml      |   2 +-
 8 files changed, 283 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/base/Configurable.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/base/Configurable.java b/tika-core/src/main/java/org/apache/tika/base/Configurable.java
index 4e6418d..f1eb91a 100644
--- a/tika-core/src/main/java/org/apache/tika/base/Configurable.java
+++ b/tika-core/src/main/java/org/apache/tika/base/Configurable.java
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.base;
 
+import org.apache.tika.config.Param;
 import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.parser.ParseContext;
 
@@ -39,5 +40,5 @@ public interface Configurable {
      * Gets parameters of this configurable instance
      * @return parameters in the form  of a map of key value pairs
      */
-    Map<String, String> getParams();
+    Map<String, Param<?>> getParams();
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/config/Param.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/Param.java b/tika-core/src/main/java/org/apache/tika/config/Param.java
new file mode 100644
index 0000000..b54f6be
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/config/Param.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import org.w3c.dom.Node;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBException;
+import javax.xml.bind.Marshaller;
+import javax.xml.bind.Unmarshaller;
+import javax.xml.bind.annotation.*;
+import javax.xml.bind.helpers.DefaultValidationEventHandler;
+import java.io.File;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.math.BigInteger;
+import java.net.URI;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
+
+
+/**
+ * This is a JAXB serializable model class for parameters from configuration file.
+ *
+ * @param <T> value type. Should be serializable to string and have a constructor with string param
+ * @since Apache Tika 1.14
+ */
+@XmlRootElement()
+@XmlAccessorType(XmlAccessType.NONE)
+public class Param<T> implements Serializable {
+
+    private static final JAXBContext JAXB_CTX;
+    private static final Marshaller MARSHALLER;
+    private static final Unmarshaller UNMARSHALLER;
+    private static final Map<Class<?>, String> map = new HashMap<>();
+    private static final Map<String, Class<?>> reverseMap = new HashMap<>();
+
+    static {
+        map.put(Boolean.class, "bool");
+        map.put(String.class, "string");
+        map.put(Byte.class, "byte");
+        map.put(Short.class, "short");
+        map.put(Integer.class, "int");
+        map.put(Long.class, "long");
+        map.put(BigInteger.class, "bigint");
+        map.put(Float.class, "float");
+        map.put(Double.class, "double");
+        map.put(File.class, "file");
+        map.put(URI.class, "uri");
+        map.put(URL.class, "url");
+        for (Map.Entry<Class<?>, String> entry : map.entrySet()) {
+            reverseMap.put(entry.getValue(), entry.getKey());
+        }
+        try {
+            JAXB_CTX = JAXBContext.newInstance(Param.class);
+            MARSHALLER = JAXB_CTX.createMarshaller();
+            MARSHALLER.setEventHandler(new DefaultValidationEventHandler());
+            UNMARSHALLER = JAXB_CTX.createUnmarshaller();
+            UNMARSHALLER.setEventHandler(new DefaultValidationEventHandler());
+        } catch (JAXBException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    @XmlTransient
+    private Class<T> type;
+
+    @XmlAttribute(name = "name")
+    private String name;
+
+    @XmlValue()
+    private String value;
+
+    @XmlTransient
+    private T actualValue;
+
+    public Param(){
+    }
+
+    public Param(String name, Class<T> type, T value){
+        this.name = name;
+        this.type = type;
+        this.value = value.toString();
+    }
+
+    public Param(String name, T value){
+        this(name, (Class<T>) value.getClass(), value);
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    @XmlTransient
+    public Class<T> getType() {
+        return type;
+    }
+
+    public void setType(Class<T> type) {
+        this.type = type;
+    }
+
+    @XmlAttribute(name = "type")
+    public String getTypeString(){
+        if (type == null) {
+            return null;
+        }
+        if (map.containsKey(type)){
+            return map.get(type);
+        }
+        return type.getName();
+    }
+
+    public void setTypeString(String type){
+        if (type == null || type.isEmpty()){
+            return;
+        }
+        if (reverseMap.containsKey(type)){
+            this.type = (Class<T>) reverseMap.get(type);
+        } else try {
+            this.type = (Class<T>) Class.forName(type);
+        } catch (ClassNotFoundException e) {
+            throw new RuntimeException(e);
+        }
+        this.actualValue = null;
+    }
+
+    @XmlTransient
+    public T getValue(){
+        if (actualValue == null) {
+            try {
+                Constructor<T> constructor = type.getConstructor(String.class);
+                constructor.setAccessible(true);
+                this.actualValue = constructor.newInstance(value);
+            } catch (NoSuchMethodException e) {
+                throw new RuntimeException(type + " doesnt have a constructor that takes String arg", e);
+            } catch (IllegalAccessException | InstantiationException | InvocationTargetException e) {
+                throw new RuntimeException(e);
+            }
+        }
+        return actualValue;
+    }
+
+    @Override
+    public String toString() {
+        return "Param{" +
+                "name='" + name + '\'' +
+                ", value='" + value + '\'' +
+                ", actualValue=" + actualValue +
+                '}';
+    }
+
+    public void save(OutputStream stream) throws JAXBException {
+        MARSHALLER.marshal(this, stream);
+    }
+
+    public void save(Node node) throws JAXBException {
+        MARSHALLER.marshal(this, node);
+    }
+
+    public static <T> Param<T> load(InputStream stream) throws JAXBException {
+        return (Param<T>) UNMARSHALLER.unmarshal(stream);
+    }
+
+    public static <T> Param<T> load(Node node) throws JAXBException {
+        return (Param<T>) UNMARSHALLER.unmarshal(node);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 896b51b..17b735e 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -17,6 +17,7 @@
 package org.apache.tika.config;
 
 import javax.imageio.spi.ServiceRegistry;
+import javax.xml.bind.JAXBException;
 import javax.xml.parsers.DocumentBuilder;
 import java.io.File;
 import java.io.IOException;
@@ -598,8 +599,8 @@ public class TikaConfig {
          * @param el xml node which has {@link #PARAMS_TAG_NAME} child
          * @return Map of key values read from xml
          */
-        Map<String, String>  getParams(Element el){
-            Map<String, String> params = new HashMap<>();
+        Map<String, Param<?>>  getParams(Element el){
+            Map<String, Param<?>> params = new HashMap<>();
             for (Node child = el.getFirstChild(); child != null;
                  child = child.getNextSibling()){
                 if (PARAMS_TAG_NAME.equals(child.getNodeName())){ //found the node
@@ -608,7 +609,12 @@ public class TikaConfig {
                         for (int i = 0; i < childNodes.getLength(); i++) {
                             Node item = childNodes.item(i);
                             if (item.getNodeType() == Node.ELEMENT_NODE){
-                                params.put(item.getNodeName().trim(), item.getTextContent().trim());
+                                try {
+                                    Param<?> param = Param.load(item);
+                                    params.put(param.getName(), param);
+                                } catch (JAXBException e) {
+                                    throw new RuntimeException(e);
+                                }
                             }
                         }
                     }

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
index 00fac7b..5c045db 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
@@ -21,6 +21,7 @@ import java.io.InputStream;
 import java.util.Map;
 import java.util.Properties;
 
+import org.apache.tika.config.Param;
 import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
@@ -80,7 +81,7 @@ public abstract class AbstractParser implements ConfigurableParser {
      * @since Apache Tika 1.14
      */
     @Override
-    public Map<String, String> getParams() {
+    public Map<String, Param<?>> getParams() {
         return this.context.getParams();
     }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
index e58f5c8..c47bbec 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
@@ -32,6 +32,7 @@ import java.lang.reflect.Method;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.tika.config.Param;
 import org.apache.tika.exception.TikaException;
 import org.xml.sax.EntityResolver;
 import org.xml.sax.InputSource;
@@ -57,7 +58,7 @@ public class ParseContext implements Serializable {
     /**
      * Map of configurable arguments
      */
-    private final Map<String, String> params = new HashMap<>();
+    private final Map<String, Param<?>> params = new HashMap<>();
 
     private static final EntityResolver IGNORING_SAX_ENTITY_RESOLVER = new EntityResolver() {
         public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
@@ -205,7 +206,7 @@ public class ParseContext implements Serializable {
      * @param key parameter name
      * @param value value
      */
-    public void setParam(String key, String value){
+    public void setParam(String key, Param<?> value){
         this.params.put(key, value);
     }
 
@@ -221,7 +222,7 @@ public class ParseContext implements Serializable {
      * Gets all the params
      * @return map of key values
      */
-    public Map<String, String> getParams() {
+    public Map<String, Param<?>> getParams() {
         return params;
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/test/java/org/apache/tika/config/ParamTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/config/ParamTest.java b/tika-core/src/test/java/org/apache/tika/config/ParamTest.java
new file mode 100644
index 0000000..7c9007e
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/config/ParamTest.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.math.BigInteger;
+import java.net.URI;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import static org.junit.Assert.*;
+
+public class ParamTest {
+
+    @Test
+    public void testSaveAndLoad() throws Exception {
+
+        Object objects [] =  {
+                Integer.MAX_VALUE,
+                2.5f,
+                4000.57576,
+                true,
+                false,
+                Long.MAX_VALUE,
+                "Hello this is a boring string",
+                new URL("http://apache.org"),
+                new URI("tika://org.apache.tika.ner.parser?impl=xyz"),
+                new BigInteger(Long.MAX_VALUE + "").add(new BigInteger(Long.MAX_VALUE + "")),
+                new File("."),
+        };
+
+        for (Object object : objects) {
+            String name = "name" + System.currentTimeMillis();
+            Param<?> param = new Param<>(name, object);
+            ByteArrayOutputStream stream = new ByteArrayOutputStream();
+            param.save(stream);
+            ByteArrayInputStream inStream = new ByteArrayInputStream(stream.toByteArray());
+            stream.close();
+            inStream.close();
+            Param<?> loaded = Param.load(inStream);
+            assertEquals(param.getName(), loaded.getName());
+            assertEquals(param.getTypeString(), loaded.getTypeString());
+            assertEquals(param.getType(), loaded.getType());
+            assertEquals(param.getValue(), loaded.getValue());
+
+            assertEquals(loaded.getValue(), object);
+            assertEquals(loaded.getName(), name);
+            assertEquals(loaded.getType(), object.getClass());
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
index b8775e0..5a874ac 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.parser;
 
+import org.apache.tika.config.Param;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -56,8 +57,8 @@ public class DummyConfigurableParser extends AbstractParser {
     public void parse(InputStream stream, ContentHandler handler,
                       Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
-        for (Map.Entry<String, String> entry : getParams().entrySet()) {
-            metadata.add(entry.getKey(), entry.getValue());
+        for (Map.Entry<String, Param<?>> entry : getParams().entrySet()) {
+            metadata.add(entry.getKey(), entry.getValue().getValue().toString());
         }
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
index 999cb45..37c71c9 100644
--- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
@@ -19,7 +19,7 @@
     <parsers>
         <parser class="org.apache.tika.parser.DummyConfigurableParser">
             <params>
-                <testparam>testparamval</testparam>
+                <param name="testparam" type="string">testparamval</param>
             </params>
         </parser>