You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by th...@apache.org on 2016/06/02 03:16:00 UTC

[01/12] tika git commit: Add uniformity to parser parameter configuration.

Repository: tika
Updated Branches:
  refs/heads/TIKA-1508 a20c46cc7 -> ea47b716e


Add uniformity to parser parameter configuration.

1. Added Configurable interface.
 This can be used for all services like Parser, Detector which can take
  configurable parameters.

2. Added ConfigurableParser interface which extends Parser interface.
   I didn't add new method to existing Parser because
    that will break the compatibility.

3. AbstractParser extends ConfigurableParser and has
  default implementation for configure() contract.
  I think it is safe to do so and it doesnt break anything.
  In addition all parsers which extend AbstractParser will can easily
  access config from TikaConfig if they want to

3. Added a TODO to TikaConfig,
 after this should allow multiple instances of same parser with
 different runtime configurations.

4. TikaConfig is modified to detect if instance can be configured,
  if so, then checks if params are available in XML file, parses the
  params and invokes configure(ctx) method with these params

5. Added DummyConfigurableParser that simply copies parameters to
 metadata for the sake of testing

6. Added a sample XML config file for testing.
Added ConfigurableParserTest that performs an end to end test of all
the above.


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/b2cf2317
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/b2cf2317
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/b2cf2317

Branch: refs/heads/TIKA-1508
Commit: b2cf23178ede925b0ef23f88ebf1aff95c8c157c
Parents: 1caa4fb
Author: Thamme Gowda <tg...@gmail.com>
Authored: Tue Mar 8 18:23:19 2016 -0800
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Tue Mar 8 18:23:19 2016 -0800

----------------------------------------------------------------------
 .../java/org/apache/tika/base/Configurable.java | 19 ++++++
 .../java/org/apache/tika/config/TikaConfig.java | 41 ++++++++++-
 .../org/apache/tika/parser/AbstractParser.java  | 18 ++++-
 .../apache/tika/parser/ConfigurableParser.java  | 30 ++++++++
 .../org/apache/tika/parser/ParseContext.java    | 39 ++++++++++-
 .../java/org/apache/tika/parser/Parser.java     |  1 +
 .../tika/parser/ConfigurableParserTest.java     | 44 ++++++++++++
 .../tika/parser/DummyConfigurableParser.java    | 72 ++++++++++++++++++++
 .../tika/config/TIKA-1508-configurable.xml      | 27 ++++++++
 9 files changed, 288 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/b2cf2317/tika-core/src/main/java/org/apache/tika/base/Configurable.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/base/Configurable.java b/tika-core/src/main/java/org/apache/tika/base/Configurable.java
new file mode 100644
index 0000000..8ae1b30
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/base/Configurable.java
@@ -0,0 +1,19 @@
+package org.apache.tika.base;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.ParseContext;
+
+/**
+ * Defines contract for configurable services
+ * @since Apache Tika 1.13
+ */
+public interface Configurable {
+
+    /**
+     * Confure an instance with Tika Context
+     * @param context configuration instance in the form of context
+     * @throws TikaException when an instance fails to work at the given context
+     * @since Apache Tika 1.13
+     */
+    void configure(ParseContext context) throws TikaException;
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/b2cf2317/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 17f36e0..a4dedae 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -28,8 +28,10 @@ import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ExecutorService;
 
@@ -38,6 +40,7 @@ import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.parsers.ParserConfigurationException;
 
+import org.apache.tika.base.Configurable;
 import org.apache.tika.concurrent.ConfigurableThreadPoolExecutor;
 import org.apache.tika.concurrent.SimpleThreadPoolExecutor;
 import org.apache.tika.detect.CompositeDetector;
@@ -54,6 +57,7 @@ import org.apache.tika.mime.MimeTypesFactory;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.DefaultParser;
+import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
 import org.w3c.dom.Document;
@@ -465,6 +469,7 @@ public class TikaConfig {
     }
     
     private static abstract class XmlLoader<CT,T> {
+        protected static final String PARAMS_TAG_NAME = "params";
         abstract boolean supportsComposite();
         abstract String getParentTagName(); // eg parsers
         abstract String getLoaderTagName(); // eg parser
@@ -510,6 +515,7 @@ public class TikaConfig {
             // Wrap the defined parsers/detectors up in a Composite
             return createComposite(loaded, mimeTypes, loader);
         }
+
         T loadOne(Element element, MimeTypes mimeTypes, ServiceLoader loader) 
                 throws TikaException, IOException {
             String name = element.getAttribute("class");
@@ -520,6 +526,7 @@ public class TikaConfig {
                         loader.getServiceClass(getLoaderClass(), name);
 
                 // Do pre-load checks and short-circuits
+                //TODO : allow duplicate instances with different configurations
                 loaded = preLoadOne(loadedClass, name, mimeTypes);
                 if (loaded != null) return loaded;
                 
@@ -563,7 +570,12 @@ public class TikaConfig {
                 
                 // Have any decoration performed, eg explicit mimetypes
                 loaded = decorate(loaded, element);
-                
+                //if the instance is configurable, then call configure()
+                if (loaded instanceof Configurable){
+                    ParseContext context = new ParseContext();
+                    context.getParams().putAll(getParams(element));
+                    ((Configurable) loaded).configure(context); // initialize here
+                }
                 // All done with setup
                 return loaded;
             } catch (ClassNotFoundException e) {
@@ -586,6 +598,33 @@ public class TikaConfig {
                         "Unable to instantiate a "+getLoaderTagName()+" class: " + name, e);
             }
         }
+
+        /**
+         * Gets parameters from a given
+         * @param el xml node which has {@link #PARAMS_TAG_NAME} child
+         * @return Map of key values read from xml
+         */
+        Map<String, String>  getParams(Element el){
+            //TODO: move this constant to static final
+            Map<String, String> params = new HashMap<>();
+            for (Node child = el.getFirstChild(); child != null;
+                 child = child.getNextSibling()){
+                if (PARAMS_TAG_NAME.equals(child.getNodeName())){ //found the node
+                    if (child.hasChildNodes()) {//it has children
+                        NodeList childNodes = child.getChildNodes();
+                        for (int i = 0; i < childNodes.getLength(); i++) {
+                            Node item = childNodes.item(i);
+                            if (item.getNodeType() == Node.ELEMENT_NODE){
+                                params.put(item.getNodeName().trim(), item.getTextContent().trim());
+                            }
+                        }
+                    }
+                    break; //only the first one is used
+                }
+            }
+            return params;
+        }
+
     }
     private static class ParserXmlLoader extends XmlLoader<CompositeParser,Parser> {
         boolean supportsComposite() { return true; }

http://git-wip-us.apache.org/repos/asf/tika/blob/b2cf2317/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
index 2411f05..10f731e 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
@@ -18,6 +18,7 @@ package org.apache.tika.parser;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Properties;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
@@ -30,7 +31,12 @@ import org.xml.sax.SAXException;
  *
  * @since Apache Tika 0.10
  */
-public abstract class AbstractParser implements Parser {
+public abstract class AbstractParser implements ConfigurableParser {
+
+    /**
+     * Configuration supplied at runtime
+     */
+    protected ParseContext context;
 
     /**
      * Serial version UID.
@@ -53,4 +59,14 @@ public abstract class AbstractParser implements Parser {
         parse(stream, handler, metadata, new ParseContext());
     }
 
+    /**
+     * called by the framework to supply runtime parameters which may be
+     * required for initialization
+     * @param context the parser context at runtime
+     * @since Apache Tika 1.13
+     */
+    @Override
+    public void configure(ParseContext context) throws TikaException {
+        this.context = context;
+    }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/b2cf2317/tika-core/src/main/java/org/apache/tika/parser/ConfigurableParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ConfigurableParser.java b/tika-core/src/main/java/org/apache/tika/parser/ConfigurableParser.java
new file mode 100644
index 0000000..3eabc02
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/parser/ConfigurableParser.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import org.apache.tika.base.Configurable;
+
+import java.io.Serializable;
+
+/**
+ * Extension of {@link Parser} with {@link Configurable} contract.
+ * This interface shall be implemented to create parsers which accepts runtime parameters
+ * from tika configuration file
+ */
+public interface ConfigurableParser extends Parser,
+        Configurable, Serializable {
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/b2cf2317/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
index 48a7841..20607d9 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
@@ -43,7 +43,11 @@ public class ParseContext implements Serializable {
 
     /** Map of objects in this context */
     private final Map<String, Object> context = new HashMap<String, Object>();
- 
+    /**
+     * Map of configurable arguments
+     */
+    private final Map<String, String> params = new HashMap<>();
+
     /**
      * Adds the given value to the context as an implementation of the given
      * interface.
@@ -145,4 +149,37 @@ public class ParseContext implements Serializable {
         return factory;
     }
 
+    /**
+     * Stores a key=value parameter
+     * @param key parameter name
+     * @param value value
+     */
+    public void setParam(String key, String value){
+        this.params.put(key, value);
+    }
+
+    /**
+     * Gets the value associated with given parameter
+     * @param key parameter name
+     */
+    public void getParam(String key){
+        this.params.get(key);
+    }
+
+    /**
+     * Gets all the params
+     * @return map of key values
+     */
+    public Map<String, String> getParams() {
+        return params;
+    }
+
+    /**
+     * Checks if parameter is available
+     * @param key parameter name
+     * @return true if parameter is available, false otherwise
+     */
+    public boolean hasParam(String key){
+       return params.containsKey(key);
+    }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/b2cf2317/tika-core/src/main/java/org/apache/tika/parser/Parser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/Parser.java b/tika-core/src/main/java/org/apache/tika/parser/Parser.java
index 3ac2d1f..352b8d3 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/Parser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/Parser.java
@@ -29,6 +29,7 @@ import org.xml.sax.SAXException;
 
 /**
  * Tika parser interface.
+ * @see ConfigurableParser for parsers which adopts to runtime params
  */
 public interface Parser extends Serializable {
 

http://git-wip-us.apache.org/repos/asf/tika/blob/b2cf2317/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java b/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java
new file mode 100644
index 0000000..f91a2b0
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.net.URL;
+
+public class ConfigurableParserTest {
+
+    public static final String TIKA_CFG_FILE = "org/apache/tika/config/TIKA-1508-configurable.xml";
+    public static final String TEST_PARAM = "testparam";
+    public static final String TEST_PARAM_VAL = "testparamval";
+
+    @Test
+    public void testConfigurableParser() throws Exception {
+        URL configFileUrl = getClass().getClassLoader().getResource(TIKA_CFG_FILE);
+        assert configFileUrl != null;
+        TikaConfig config = new TikaConfig(configFileUrl);
+        Tika tika = new Tika(config);
+        Metadata md = new Metadata();
+        tika.parse(configFileUrl.openStream(), md);
+        Assert.assertEquals(TEST_PARAM_VAL, md.get(TEST_PARAM));
+        //assert that param from configuration file is read, given to parser and it copied to metadata
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/b2cf2317/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
new file mode 100644
index 0000000..4bbeac9
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ *
+ * This Parser is created to test runtime configuration to parser.
+ * This parser simply copies parameters to metadata so that a test
+ * suit can be developed to test that :
+ * 1. Parameters were parsed from configuration file
+ * 2. parameters were supplied to parser via configure(ctx) method
+ * 3. parameters were available at parse
+ *
+ */
+public class DummyConfigurableParser extends AbstractParser {
+
+    private static Set<MediaType> MIMES = new HashSet<>();
+    static {
+        MIMES.add(MediaType.TEXT_PLAIN);
+        MIMES.add(MediaType.TEXT_HTML);
+        MIMES.add(MediaType.OCTET_STREAM);
+    }
+
+    private Map<String, String> params;
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return MIMES;
+    }
+
+    @Override
+    public void configure(ParseContext context) throws TikaException {
+        super.configure(context);
+        this.params = context.getParams();
+        // initialize here
+    }
+
+    @Override
+    public void parse(InputStream stream, ContentHandler handler,
+                      Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        for (Map.Entry<String, String> entry : this.params.entrySet()) {
+            metadata.add(entry.getKey(), entry.getValue());
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/b2cf2317/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
new file mode 100644
index 0000000..999cb45
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+    <parsers>
+        <parser class="org.apache.tika.parser.DummyConfigurableParser">
+            <params>
+                <testparam>testparamval</testparam>
+            </params>
+        </parser>
+
+    </parsers>
+</properties>


[09/12] tika git commit: FIX: return value typo

Posted by th...@apache.org.
FIX: return value typo

Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c6eefbde
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c6eefbde
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c6eefbde

Branch: refs/heads/TIKA-1508
Commit: c6eefbde650dd3be88103416b45ca5e92a336f92
Parents: 9e08a6b
Author: Thamme Gowda <tg...@gmail.com>
Authored: Fri May 27 10:33:25 2016 -0700
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Fri May 27 10:33:25 2016 -0700

----------------------------------------------------------------------
 .../src/main/java/org/apache/tika/parser/ParseContext.java      | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/c6eefbde/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
index c47bbec..dc03099 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
@@ -213,9 +213,10 @@ public class ParseContext implements Serializable {
     /**
      * Gets the value associated with given parameter
      * @param key parameter name
+     * @return param value
      */
-    public void getParam(String key){
-        this.params.get(key);
+    public Param<?> getParam(String key){
+        return this.params.get(key);
     }
 
     /**


[04/12] tika git commit: Test Case updated with newer exception and getter

Posted by th...@apache.org.
Test Case updated with newer exception and getter


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/0d69ca75
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/0d69ca75
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/0d69ca75

Branch: refs/heads/TIKA-1508
Commit: 0d69ca7540b4350e043c5b9ed34d14a46bd70cf7
Parents: 64db961
Author: Thamme Gowda <tg...@gmail.com>
Authored: Sat Mar 12 06:51:14 2016 -0800
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Sat Mar 12 06:51:14 2016 -0800

----------------------------------------------------------------------
 .../org/apache/tika/parser/DummyConfigurableParser.java  | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/0d69ca75/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
index 4bbeac9..e9466ca 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.parser;
 
+import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -47,24 +48,16 @@ public class DummyConfigurableParser extends AbstractParser {
         MIMES.add(MediaType.OCTET_STREAM);
     }
 
-    private Map<String, String> params;
     @Override
     public Set<MediaType> getSupportedTypes(ParseContext context) {
         return MIMES;
     }
 
     @Override
-    public void configure(ParseContext context) throws TikaException {
-        super.configure(context);
-        this.params = context.getParams();
-        // initialize here
-    }
-
-    @Override
     public void parse(InputStream stream, ContentHandler handler,
                       Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
-        for (Map.Entry<String, String> entry : this.params.entrySet()) {
+        for (Map.Entry<String, String> entry : getParams().entrySet()) {
             metadata.add(entry.getKey(), entry.getValue());
         }
     }


[11/12] tika git commit: Using TikaConfigException instead of RuntimeException

Posted by th...@apache.org.
Using TikaConfigException instead of RuntimeException

Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/67941a63
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/67941a63
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/67941a63

Branch: refs/heads/TIKA-1508
Commit: 67941a63949280289c1a7b5430a86de0aa3838a9
Parents: aad23d9
Author: Thamme Gowda <tg...@gmail.com>
Authored: Wed Jun 1 16:36:27 2016 -0700
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Wed Jun 1 16:36:27 2016 -0700

----------------------------------------------------------------------
 .../src/main/java/org/apache/tika/config/ParamField.java | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/67941a63/tika-core/src/main/java/org/apache/tika/config/ParamField.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/ParamField.java b/tika-core/src/main/java/org/apache/tika/config/ParamField.java
index 96063dc..281dd98 100644
--- a/tika-core/src/main/java/org/apache/tika/config/ParamField.java
+++ b/tika-core/src/main/java/org/apache/tika/config/ParamField.java
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.config;
 
+import org.apache.tika.exception.TikaConfigException;
+
 import java.lang.reflect.*;
 import java.util.HashMap;
 import java.util.Locale;
@@ -54,7 +56,7 @@ public class ParamField {
      * Creates a ParamField object
      * @param member a field or method which has {@link Field} annotation
      */
-    public ParamField(AccessibleObject member){
+    public ParamField(AccessibleObject member) throws TikaConfigException {
         if (member instanceof java.lang.reflect.Field) {
             field = (java.lang.reflect.Field) member;
         } else {
@@ -113,20 +115,19 @@ public class ParamField {
         }
     }
 
-    private Class retrieveType() {
+    private Class retrieveType() throws TikaConfigException {
         Class type;
         if (field != null) {
             type = field.getType();
         } else {
             Class[] params = setter.getParameterTypes();
             if (params.length != 1) {
-                //todo:Tika config exception
                 String msg = "Invalid setter method. Must have one and only one parameter. ";
                 if (setter.getName().startsWith("get")) {
                     msg += "Perhaps the annotation is misplaced on " +
-                            setter.getName() +" while a set'X' is expected?";
+                            setter.getName() + " while a set'X' is expected?";
                 }
-                throw new RuntimeException(msg);
+                throw new TikaConfigException(msg);
             }
             type = params[0];
         }


[02/12] tika git commit: remove unwanted TODO:

Posted by th...@apache.org.
remove unwanted TODO:


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/ae51417d
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/ae51417d
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/ae51417d

Branch: refs/heads/TIKA-1508
Commit: ae51417d8881dd90b921f02c2677a7d5bfd69a30
Parents: b2cf231
Author: Thamme Gowda <tg...@gmail.com>
Authored: Tue Mar 8 19:23:47 2016 -0800
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Tue Mar 8 19:23:47 2016 -0800

----------------------------------------------------------------------
 tika-core/src/main/java/org/apache/tika/config/TikaConfig.java | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/ae51417d/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index a4dedae..caa916a 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -605,7 +605,6 @@ public class TikaConfig {
          * @return Map of key values read from xml
          */
         Map<String, String>  getParams(Element el){
-            //TODO: move this constant to static final
             Map<String, String> params = new HashMap<>();
             for (Node child = el.getFirstChild(); child != null;
                  child = child.getNextSibling()){


[12/12] tika git commit: Merge branch 'TIKA-1508' of https://git-wip-us.apache.org/repos/asf/tika into TIKA-1508

Posted by th...@apache.org.
Merge branch 'TIKA-1508' of https://git-wip-us.apache.org/repos/asf/tika into TIKA-1508


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/ea47b716
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/ea47b716
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/ea47b716

Branch: refs/heads/TIKA-1508
Commit: ea47b716e5a7a1a1637b1cef0dc439b140590711
Parents: 67941a6 a20c46c
Author: Thamme Gowda <tg...@gmail.com>
Authored: Wed Jun 1 20:14:08 2016 -0700
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Wed Jun 1 20:14:08 2016 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |   2 +
 .../org/apache/tika/mime/tika-mimetypes.xml     |   9 +
 .../org/apache/tika/parser/dbf/DBFCell.java     | 147 +++++++++++++
 .../apache/tika/parser/dbf/DBFColumnHeader.java |  97 +++++++++
 .../apache/tika/parser/dbf/DBFFileHeader.java   | 144 +++++++++++++
 .../org/apache/tika/parser/dbf/DBFParser.java   | 155 ++++++++++++++
 .../org/apache/tika/parser/dbf/DBFReader.java   | 207 +++++++++++++++++++
 .../java/org/apache/tika/parser/dbf/DBFRow.java |  62 ++++++
 .../apache/tika/parser/geo/topic/GeoParser.java |  14 +-
 .../tika/parser/html/HtmlEncodingDetector.java  |  16 +-
 .../org/apache/tika/parser/pdf/PDF2XHTML.java   |  26 ++-
 .../services/org.apache.tika.parser.Parser      |   3 +-
 .../apache/tika/parser/dbf/DBFParserTest.java   | 158 ++++++++++++++
 .../apache/tika/parser/html/HtmlParserTest.java |  22 +-
 .../apache/tika/parser/pdf/PDFParserTest.java   |  18 ++
 .../test/resources/test-documents/testDBF.dbf   | Bin 0 -> 890 bytes
 .../test-documents/testDBF_gb18030.dbf          | Bin 0 -> 144 bytes
 17 files changed, 1061 insertions(+), 19 deletions(-)
----------------------------------------------------------------------



[05/12] tika git commit: merged upstream changes and resolved conflicts

Posted by th...@apache.org.
merged upstream changes and resolved conflicts


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/e780d566
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/e780d566
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/e780d566

Branch: refs/heads/TIKA-1508
Commit: e780d56652d48dd0f50b4e62a58153e95f055022
Parents: 0d69ca7 bb46c0e
Author: Thamme Gowda <tg...@gmail.com>
Authored: Mon May 23 11:30:13 2016 -0700
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Mon May 23 11:30:13 2016 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |   85 +-
 pom.xml                                         |    3 +-
 tika-app/pom.xml                                |    7 +-
 .../main/appended-resources/META-INF/LICENSE    |  227 -
 .../tika/cli/BatchCommandLineBuilder.java       |    7 -
 .../main/java/org/apache/tika/cli/TikaCLI.java  |   50 +-
 .../main/resources/tika-app-batch-config.xml    |   10 +-
 .../tika/cli/TikaCLIBatchCommandLineTest.java   |    1 -
 .../java/org/apache/tika/cli/TikaCLITest.java   |   16 -
 tika-batch/pom.xml                              |    4 +-
 .../batch/builders/BatchProcessBuilder.java     |   15 +-
 .../builders/CommandLineParserBuilder.java      |   16 +-
 .../apache/tika/batch/fs/FSBatchProcessCLI.java |    4 +-
 .../builders/BasicTikaFSConsumersBuilder.java   |   51 +-
 .../tika/batch/fs/default-tika-batch-config.xml |   50 +-
 .../apache/tika/batch/fs/BatchProcessTest.java  |   19 +-
 .../tika/batch/fs/HandlerBuilderTest.java       |    4 -
 .../tika-batch-config-MockConsumersBuilder.xml  |    2 +-
 .../test/resources/tika-batch-config-broken.xml |    2 +-
 .../tika-batch-config-test-suffix-override.xml  |  112 +
 .../test/resources/tika-batch-config-test.xml   |    2 +-
 tika-bundle/pom.xml                             |    6 +-
 .../main/appended-resources/META-INF/LICENSE    |  226 -
 tika-core/pom.xml                               |    7 +-
 .../java/org/apache/tika/config/TikaConfig.java |   26 +-
 .../tika/config/TikaConfigSerializer.java       |    4 +-
 .../org/apache/tika/detect/NameDetector.java    |   15 +-
 .../tika/detect/ZeroSizeFileDetector.java       |   45 +
 .../java/org/apache/tika/fork/ForkClient.java   |   10 +-
 .../tika/language/LanguageIdentifier.java       |    7 +-
 .../apache/tika/language/LanguageProfile.java   |    2 +
 .../tika/language/LanguageProfilerBuilder.java  |    9 +-
 .../apache/tika/language/ProfilingHandler.java  |    3 +-
 .../apache/tika/language/ProfilingWriter.java   |    2 +
 .../language/detect/LanguageConfidence.java     |   25 +
 .../tika/language/detect/LanguageDetector.java  |  239 +
 .../tika/language/detect/LanguageHandler.java   |   66 +
 .../tika/language/detect/LanguageNames.java     |   86 +
 .../tika/language/detect/LanguageResult.java    |   98 +
 .../tika/language/detect/LanguageWriter.java    |   78 +
 .../org/apache/tika/language/package-info.java  |   22 -
 .../tika/metadata/TikaCoreProperties.java       |    9 +
 .../java/org/apache/tika/mime/MediaType.java    |    3 +
 .../org/apache/tika/mime/MediaTypeRegistry.java |    2 +
 .../org/apache/tika/mime/MimeTypesReader.java   |   20 +-
 .../org/apache/tika/parser/NetworkParser.java   |    4 +-
 .../org/apache/tika/parser/ParseContext.java    |  169 +-
 .../org/apache/tika/parser/ParserDecorator.java |   35 +-
 .../tika/parser/external/ExternalParser.java    |    8 +-
 .../external/ExternalParsersConfigReader.java   |   11 +-
 .../tika/sax/BasicContentHandlerFactory.java    |    8 +
 .../src/main/java/org/apache/tika/sax/Link.java |    4 +
 .../java/org/apache/tika/sax/LinkBuilder.java   |    6 +-
 .../org/apache/tika/sax/LinkContentHandler.java |   18 +-
 .../resources/org/apache/tika/language/be.ngp   |    0
 .../resources/org/apache/tika/language/ca.ngp   |    0
 .../resources/org/apache/tika/language/eo.ngp   |    0
 .../resources/org/apache/tika/language/gl.ngp   |    0
 .../resources/org/apache/tika/language/ro.ngp   |    0
 .../resources/org/apache/tika/language/sk.ngp   |    0
 .../resources/org/apache/tika/language/sl.ngp   |    0
 .../resources/org/apache/tika/language/uk.ngp   |    0
 .../org/apache/tika/mime/tika-mimetypes.xml     |   93 +-
 .../src/test/java/org/apache/tika/TikaTest.java |   59 +-
 .../apache/tika/detect/NameDetectorTest.java    |   10 +
 .../tika/detect/ZeroSizeFileDetectorTest.java   |   64 +
 .../tika/language/LanguageIdentifierTest.java   |    1 +
 .../tika/language/LanguageProfileTest.java      |    7 +-
 .../language/LanguageProfilerBuilderTest.java   |    1 +
 .../tika/language/ProfilingWriterTest.java      |    5 +-
 .../tika/language/detect/LanguageNamesTest.java |   38 +
 .../org/apache/tika/parser/mock/MockParser.java |   12 +-
 .../apache/tika/sax/LinkContentHandlerTest.java |   36 +-
 .../tika/language/langbuilder/welsh_corpus.txt  | 5204 +++++++++---------
 tika-example/pom.xml                            |   16 +-
 .../java/org/apache/tika/example/Language.java  |   32 +-
 .../tika/example/LanguageDetectingParser.java   |   16 +-
 .../tika/example/LanguageDetectorExample.java   |   33 +
 .../tika/example/LanguageIdentifierExample.java |   27 -
 .../org/apache/tika/example/MyFirstTika.java    |   13 +-
 .../org/apache/tika/example/ParsingExample.java |   14 +-
 .../example/LanguageDetectorExampleTest.java    |   39 +
 .../example/LanguageIdentifierExampleTest.java  |   37 -
 tika-java7/pom.xml                              |    2 +-
 tika-langdetect/pom.xml                         |  171 +
 .../tika/langdetect/OptimaizeLangDetector.java  |  196 +
 .../tika/langdetect/TextLangDetector.java       |  146 +
 ...apache.tika.language.detect.LanguageDetector |   16 +
 .../tika/langdetect/LanguageDetectorTest.java   |   92 +
 .../langdetect/OptimaizeLangDetectorTest.java   |  265 +
 .../tika/langdetect/TextLangDetectorTest.java   |   59 +
 .../src/test/resources/log4j.properties         |   24 +
 .../apache/tika/langdetect/language-codes.txt   |  186 +
 .../tika/langdetect/language-tests/da.test      |  108 +
 .../tika/langdetect/language-tests/de.test      |  104 +
 .../tika/langdetect/language-tests/el.test      |  109 +
 .../tika/langdetect/language-tests/en.test      |  105 +
 .../tika/langdetect/language-tests/es.test      |  107 +
 .../tika/langdetect/language-tests/et.test      |   17 +
 .../tika/langdetect/language-tests/fi.test      |  106 +
 .../tika/langdetect/language-tests/fr.test      |  105 +
 .../tika/langdetect/language-tests/it.test      |  109 +
 .../tika/langdetect/language-tests/ja.test      |   78 +
 .../tika/langdetect/language-tests/lt.test      |   32 +
 .../tika/langdetect/language-tests/nl.test      |  105 +
 .../tika/langdetect/language-tests/pt.test      |  105 +
 .../tika/langdetect/language-tests/sv.test      |  108 +
 .../tika/langdetect/language-tests/th.test      |   28 +
 .../tika/langdetect/language-tests/zh.test      |   57 +
 .../org/apache/tika/langdetect/text-test.tsv    |   18 +
 .../org/apache/tika/langdetect/udhr-known.txt   |   11 +
 .../org/apache/tika/langdetect/udhr-unknown.txt |    4 +
 tika-parent/pom.xml                             |   29 +-
 tika-parsers/pom.xml                            |   53 +-
 .../tika/parser/code/SourceCodeParser.java      |  142 +-
 .../tika/parser/epub/EpubContentParser.java     |   33 +-
 .../org/apache/tika/parser/epub/EpubParser.java |    8 +-
 .../parser/executable/ExecutableParser.java     |    2 +-
 .../tika/parser/font/AdobeFontMetricParser.java |   16 +-
 .../apache/tika/parser/font/TrueTypeParser.java |    4 +-
 .../geoinfo/GeographicInformationParser.java    |   30 +-
 .../apache/tika/parser/html/HtmlHandler.java    |    3 +
 .../apache/tika/parser/image/ICNSParser.java    |  117 +
 .../org/apache/tika/parser/image/ICNSType.java  |  170 +
 .../parser/image/ImageMetadataExtractor.java    |   45 +-
 .../tika/parser/image/xmp/JempboxExtractor.java |   75 +-
 .../tika/parser/isatab/ISArchiveParser.java     |   62 +-
 .../tika/parser/jdbc/AbstractDBParser.java      |   13 +-
 .../tika/parser/jdbc/JDBCTableReader.java       |   68 +-
 .../tika/parser/jdbc/SQLite3DBParser.java       |   31 +-
 .../apache/tika/parser/jdbc/SQLite3Parser.java  |    6 +-
 .../tika/parser/jdbc/SQLite3TableReader.java    |   45 +-
 .../apache/tika/parser/journal/TEIParser.java   |    8 +-
 .../tika/parser/mail/MailContentHandler.java    |  110 +-
 .../org/apache/tika/parser/mat/MatParser.java   |   27 +-
 .../tika/parser/microsoft/HSLFExtractor.java    |   14 +
 .../tika/parser/microsoft/OfficeParser.java     |    3 +-
 .../microsoft/POIFSContainerDetector.java       |   21 +-
 .../tika/parser/microsoft/WordExtractor.java    |   11 +-
 .../microsoft/ooxml/AbstractOOXMLExtractor.java |   22 +-
 .../ooxml/XSLFPowerPointExtractorDecorator.java |   58 +-
 .../ooxml/XSSFExcelExtractorDecorator.java      |  112 +-
 .../microsoft/xml/AbstractXML2003Parser.java    |   86 +
 .../parser/microsoft/xml/HyperlinkHandler.java  |   96 +
 .../microsoft/xml/SpreadsheetMLParser.java      |  161 +
 .../tika/parser/microsoft/xml/WordMLParser.java |  229 +
 .../parser/mp4/DirectFileReadDataSource.java    |   34 +-
 .../org/apache/tika/parser/mp4/MP4Parser.java   |  379 +-
 .../parser/ner/grobid/GrobidNERecogniser.java   |  240 +
 .../parser/ner/mitie/MITIENERecogniser.java     |  160 +
 .../tika/parser/ner/nltk/NLTKNERecogniser.java  |   19 +-
 .../apache/tika/parser/netcdf/NetCDFParser.java |   20 +-
 .../parser/odf/OpenDocumentContentParser.java   |   37 +-
 .../tika/parser/odf/OpenDocumentParser.java     |   62 +-
 .../org/apache/tika/parser/pdf/PDF2XHTML.java   |  244 +-
 .../parser/pdf/PDFEncodedStringDecoder.java     |   14 +-
 .../org/apache/tika/parser/pdf/PDFParser.java   |  143 +-
 .../apache/tika/parser/pdf/PDFParserConfig.java |   67 +-
 .../apache/tika/parser/pdf/XFAExtractor.java    |   30 +-
 .../tika/parser/pot/PooledTimeSeriesParser.java |  394 +-
 .../services/org.apache.tika.parser.Parser      |    5 +-
 .../parser/ner/grobid/GrobidServer.properties   |   17 +
 .../apache/tika/parser/pdf/PDFParser.properties |    4 +-
 .../org/apache/tika/mime/TestMimeTypes.java     |   21 +-
 .../parser/executable/ExecutableParserTest.java |   73 +-
 .../GeographicInformationParserTest.java        |   48 +-
 .../apache/tika/parser/html/HtmlParserTest.java |   38 +
 .../tika/parser/image/ICNSParserTest.java       |   65 +
 .../tika/parser/image/ImageParserTest.java      |    5 +-
 .../tika/parser/jdbc/SQLite3ParserTest.java     |  106 +-
 .../apache/tika/parser/jpeg/JpegParserTest.java |   21 +-
 .../tika/parser/mail/RFC822ParserTest.java      |  115 +
 .../AbstractPOIContainerExtractionTest.java     |    4 +-
 .../tika/parser/microsoft/ExcelParserTest.java  |   18 +-
 .../microsoft/POIContainerExtractionTest.java   |   35 +-
 .../parser/microsoft/PowerPointParserTest.java  |    2 +-
 .../ooxml/OOXMLContainerExtractionTest.java     |   23 +-
 .../parser/microsoft/ooxml/OOXMLParserTest.java |   24 +-
 .../parser/microsoft/xml/XML2003ParserTest.java |   81 +
 .../apache/tika/parser/mp4/MP4ParserTest.java   |   12 +-
 .../apache/tika/parser/odf/ODFParserTest.java   |   10 +-
 .../apache/tika/parser/pdf/PDFParserTest.java   |  562 +-
 .../resources/test-documents/testEXCEL2003.xml  |  100 +
 .../test-documents/testEXCEL_hyperlinks.xls     |  Bin 0 -> 29696 bytes
 .../test-documents/testEXCEL_hyperlinks.xlsx    |  Bin 0 -> 10038 bytes
 .../test/resources/test-documents/testHFA.hfa   |  Bin 0 -> 1024 bytes
 .../test/resources/test-documents/testICNS.icns |  Bin 0 -> 2472 bytes
 .../test-documents/testICNS_basic.icns          |  Bin 0 -> 18199 bytes
 .../resources/test-documents/testKeynoteNew.key |  Bin 0 -> 274397 bytes
 .../test/resources/test-documents/testMIF.mif   |  Bin 0 -> 10240 bytes
 .../test-documents/testMP4_truncated.m4a        |  Bin 0 -> 74 bytes
 .../testMSChart-govdocs-428996.ppt              |  Bin 0 -> 41472 bytes
 .../testMSChart-govdocs-428996.pptx             |  Bin 0 -> 56224 bytes
 .../testMSChart-govdocs-428996.xls              |  Bin 0 -> 35328 bytes
 .../testMSChart-govdocs-428996.xlsx             |  Bin 0 -> 17112 bytes
 .../test-documents/testNumbersNew.numbers       |  Bin 0 -> 179147 bytes
 .../resources/test-documents/testODTNoMeta.odt  |  Bin 0 -> 5847 bytes
 .../test-documents/testPDF_bad_page_303226.pdf  |  Bin 0 -> 138027 bytes
 .../resources/test-documents/testPagesNew.pages |  Bin 0 -> 237567 bytes
 .../test-documents/testRFC822_date_utf8         |    8 +
 .../resources/test-documents/testRFC822_eml     |   33 +
 .../resources/test-documents/testSqlite3b.db    |  Bin 27648 -> 27648 bytes
 .../resources/test-documents/testWORD2003.xml   | 2542 +++++++++
 tika-serialization/pom.xml                      |    4 +-
 tika-server/pom.xml                             |   45 +-
 .../tika/server/resource/LanguageResource.java  |   27 +-
 .../tika/server/resource/MetadataResource.java  |    9 +-
 .../resource/RecursiveMetadataResource.java     |    7 +-
 .../tika/server/resource/TranslateResource.java |   22 +-
 .../org/apache/tika/server/CXFTestBase.java     |   26 +-
 tika-translate/pom.xml                          |    9 +-
 .../language/translate/AbstractTranslator.java  |   32 +
 .../language/translate/CachedTranslator.java    |   20 +-
 .../language/translate/ExternalTranslator.java  |   13 +-
 .../language/translate/GoogleTranslator.java    |   20 +-
 .../language/translate/Lingo24Translator.java   |   20 +-
 .../language/translate/MosesTranslator.java     |    7 +-
 .../language/translate/YandexTranslator.java    |  175 +
 .../translate/translator.yandex.properties      |   24 +
 .../translate/YandexTranslatorTest.java         |  105 +
 tika-xmp/pom.xml                                |    2 +-
 221 files changed, 13467 insertions(+), 5115 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/e780d566/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
----------------------------------------------------------------------
diff --cc tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index caa916a,0e3acd9..896b51b
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@@ -35,12 -35,6 +37,8 @@@ import java.util.Map
  import java.util.Set;
  import java.util.concurrent.ExecutorService;
  
- import javax.imageio.spi.ServiceRegistry;
- import javax.xml.parsers.DocumentBuilder;
- import javax.xml.parsers.DocumentBuilderFactory;
- import javax.xml.parsers.ParserConfigurationException;
- 
 +import org.apache.tika.base.Configurable;
++
  import org.apache.tika.concurrent.ConfigurableThreadPoolExecutor;
  import org.apache.tika.concurrent.SimpleThreadPoolExecutor;
  import org.apache.tika.detect.CompositeDetector;

http://git-wip-us.apache.org/repos/asf/tika/blob/e780d566/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
----------------------------------------------------------------------
diff --cc tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
index 20607d9,2521cc9..e58f5c8
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
@@@ -43,11 -53,22 +53,27 @@@ public class ParseContext implements Se
  
      /** Map of objects in this context */
      private final Map<String, Object> context = new HashMap<String, Object>();
+ 
 +    /**
 +     * Map of configurable arguments
 +     */
 +    private final Map<String, String> params = new HashMap<>();
 +
+     private static final EntityResolver IGNORING_SAX_ENTITY_RESOLVER = new EntityResolver() {
+         public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
+             return new InputSource(new StringReader(""));
+         }
+     };
+ 
+     private static final XMLResolver IGNORING_STAX_ENTITY_RESOLVER =
+             new XMLResolver() {
+                 @Override
+                 public Object resolveEntity(String publicID, String systemID, String baseURI, String namespace) throws
+                         XMLStreamException {
+                     return "";
+                 }
+             };
+ 
      /**
       * Adds the given value to the context as an implementation of the given
       * interface.
@@@ -150,36 -196,111 +201,144 @@@
      }
  
      /**
 +     * Stores a key=value parameter
 +     * @param key parameter name
 +     * @param value value
 +     */
 +    public void setParam(String key, String value){
 +        this.params.put(key, value);
 +    }
 +
 +    /**
 +     * Gets the value associated with given parameter
 +     * @param key parameter name
 +     */
 +    public void getParam(String key){
 +        this.params.get(key);
 +    }
 +
 +    /**
 +     * Gets all the params
 +     * @return map of key values
 +     */
 +    public Map<String, String> getParams() {
 +        return params;
 +    }
 +
 +    /**
 +     * Checks if parameter is available
 +     * @param key parameter name
 +     * @return true if parameter is available, false otherwise
 +     */
 +    public boolean hasParam(String key){
 +       return params.containsKey(key);
 +    }
++    /**
+      * Returns the DOM builder factory specified in this parsing context.
+      * If a factory is not explicitly specified, then a default factory
+      * instance is created and returned. The default factory instance is
+      * configured to be namespace-aware and to apply reasonable security
+      * features.
+      *
+      * @since Apache Tika 1.13
+      * @return DOM parser factory
+      */
+     private DocumentBuilderFactory getDocumentBuilderFactory() {
+         //borrowed from Apache POI
+         DocumentBuilderFactory documentBuilderFactory = get(DocumentBuilderFactory.class);
+         if (documentBuilderFactory != null) {
+             return documentBuilderFactory;
+         }
+         documentBuilderFactory = DocumentBuilderFactory.newInstance();
+         documentBuilderFactory.setNamespaceAware(true);
+         documentBuilderFactory.setValidating(false);
+         tryToSetSAXFeatureOnDOMFactory(documentBuilderFactory,
+             XMLConstants.FEATURE_SECURE_PROCESSING, true);
+         tryToSetXercesManager(documentBuilderFactory);
+         return documentBuilderFactory;
+     }
+ 
+     /**
+      * Returns the DOM builder specified in this parsing context.
+      * If a builder is not explicitly specified, then a builder
+      * instance is created and returned. The builder instance is
+      * configured to apply an {@link #IGNORING_SAX_ENTITY_RESOLVER},
+      * and it sets the ErrorHandler to <code>null</code>.
+      *
+      * @since Apache Tika 1.13
+      * @return DOM Builder
+      */
+     public DocumentBuilder getDocumentBuilder() throws TikaException {
+         DocumentBuilder documentBuilder = get(DocumentBuilder.class);
+         if (documentBuilder != null) {
+             return documentBuilder;
+         }
+         try {
+             DocumentBuilderFactory documentBuilderFactory = getDocumentBuilderFactory();
+             documentBuilder = documentBuilderFactory.newDocumentBuilder();
+             documentBuilder.setEntityResolver(IGNORING_SAX_ENTITY_RESOLVER);
+             documentBuilder.setErrorHandler(null);
+             return documentBuilder;
+         } catch (ParserConfigurationException e) {
+             throw new TikaException("XML parser not available", e);
+         }
+     }
+ 
+     /**
+      * Returns the StAX input factory specified in this parsing context.
+      * If a factory is not explicitly specified, then a default factory
+      * instance is created and returned. The default factory instance is
+      * configured to be namespace-aware and to apply reasonable security
+      * using the {@link #IGNORING_STAX_ENTITY_RESOLVER}.
+      *
+      * @since Apache Tika 1.13
+      * @return StAX input factory
+      */
+     public XMLInputFactory getXMLInputFactory() {
+         XMLInputFactory factory = get(XMLInputFactory.class);
+         if (factory != null) {
+             return factory;
+         }
+         factory = XMLInputFactory.newFactory();
+ 
+         tryToSetStaxProperty(factory, XMLInputFactory.IS_NAMESPACE_AWARE, true);
+         tryToSetStaxProperty(factory, XMLInputFactory.IS_VALIDATING, false);
+ 
+         factory.setXMLResolver(IGNORING_STAX_ENTITY_RESOLVER);
+         return factory;
+     }
+ 
+     private static void tryToSetSAXFeatureOnDOMFactory(DocumentBuilderFactory dbf, String feature, boolean value) {
+         try {
+             dbf.setFeature(feature, value);
+         } catch (Exception|AbstractMethodError e) {
+         }
+     }
+ 
+     private static void tryToSetXercesManager(DocumentBuilderFactory dbf) {
+         // Try built-in JVM one first, standalone if not
+         for (String securityManagerClassName : new String[] {
+                 "com.sun.org.apache.xerces.internal.util.SecurityManager",
+                 "org.apache.xerces.util.SecurityManager"
+         }) {
+             try {
+                 Object mgr = Class.forName(securityManagerClassName).newInstance();
+                 Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
+                 setLimit.invoke(mgr, 4096);
+                 dbf.setAttribute("http://apache.org/xml/properties/security-manager", mgr);
+                 // Stop once one can be setup without error
+                 return;
+             } catch (Throwable t) {
+             }
+         }
+     }
+ 
+     private void tryToSetStaxProperty(XMLInputFactory factory, String key, boolean value) {
+         try {
+             factory.setProperty(key, value);
+         } catch (IllegalArgumentException e) {
+             //swallow
+         }
+     }
+ 
  }


[07/12] tika git commit: Added support for type for runtime parameters

Posted by th...@apache.org.
Added support for type for runtime parameters

Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/01869923
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/01869923
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/01869923

Branch: refs/heads/TIKA-1508
Commit: 01869923533b330ec7728995e3ee5feceee1b90e
Parents: b64612d
Author: Thamme Gowda <tg...@gmail.com>
Authored: Wed May 25 17:18:25 2016 -0700
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Wed May 25 17:18:25 2016 -0700

----------------------------------------------------------------------
 .../java/org/apache/tika/base/Configurable.java |   3 +-
 .../main/java/org/apache/tika/config/Param.java | 191 +++++++++++++++++++
 .../java/org/apache/tika/config/TikaConfig.java |  12 +-
 .../org/apache/tika/parser/AbstractParser.java  |   3 +-
 .../org/apache/tika/parser/ParseContext.java    |   7 +-
 .../java/org/apache/tika/config/ParamTest.java  |  71 +++++++
 .../tika/parser/DummyConfigurableParser.java    |   5 +-
 .../tika/config/TIKA-1508-configurable.xml      |   2 +-
 8 files changed, 283 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/base/Configurable.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/base/Configurable.java b/tika-core/src/main/java/org/apache/tika/base/Configurable.java
index 4e6418d..f1eb91a 100644
--- a/tika-core/src/main/java/org/apache/tika/base/Configurable.java
+++ b/tika-core/src/main/java/org/apache/tika/base/Configurable.java
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.base;
 
+import org.apache.tika.config.Param;
 import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.parser.ParseContext;
 
@@ -39,5 +40,5 @@ public interface Configurable {
      * Gets parameters of this configurable instance
      * @return parameters in the form  of a map of key value pairs
      */
-    Map<String, String> getParams();
+    Map<String, Param<?>> getParams();
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/config/Param.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/Param.java b/tika-core/src/main/java/org/apache/tika/config/Param.java
new file mode 100644
index 0000000..b54f6be
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/config/Param.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import org.w3c.dom.Node;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBException;
+import javax.xml.bind.Marshaller;
+import javax.xml.bind.Unmarshaller;
+import javax.xml.bind.annotation.*;
+import javax.xml.bind.helpers.DefaultValidationEventHandler;
+import java.io.File;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.math.BigInteger;
+import java.net.URI;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
+
+
+/**
+ * This is a JAXB serializable model class for parameters from configuration file.
+ *
+ * @param <T> value type. Should be serializable to string and have a constructor with string param
+ * @since Apache Tika 1.14
+ */
+@XmlRootElement()
+@XmlAccessorType(XmlAccessType.NONE)
+public class Param<T> implements Serializable {
+
+    private static final JAXBContext JAXB_CTX;
+    private static final Marshaller MARSHALLER;
+    private static final Unmarshaller UNMARSHALLER;
+    private static final Map<Class<?>, String> map = new HashMap<>();
+    private static final Map<String, Class<?>> reverseMap = new HashMap<>();
+
+    static {
+        map.put(Boolean.class, "bool");
+        map.put(String.class, "string");
+        map.put(Byte.class, "byte");
+        map.put(Short.class, "short");
+        map.put(Integer.class, "int");
+        map.put(Long.class, "long");
+        map.put(BigInteger.class, "bigint");
+        map.put(Float.class, "float");
+        map.put(Double.class, "double");
+        map.put(File.class, "file");
+        map.put(URI.class, "uri");
+        map.put(URL.class, "url");
+        for (Map.Entry<Class<?>, String> entry : map.entrySet()) {
+            reverseMap.put(entry.getValue(), entry.getKey());
+        }
+        try {
+            JAXB_CTX = JAXBContext.newInstance(Param.class);
+            MARSHALLER = JAXB_CTX.createMarshaller();
+            MARSHALLER.setEventHandler(new DefaultValidationEventHandler());
+            UNMARSHALLER = JAXB_CTX.createUnmarshaller();
+            UNMARSHALLER.setEventHandler(new DefaultValidationEventHandler());
+        } catch (JAXBException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    @XmlTransient
+    private Class<T> type;
+
+    @XmlAttribute(name = "name")
+    private String name;
+
+    @XmlValue()
+    private String value;
+
+    @XmlTransient
+    private T actualValue;
+
+    public Param(){
+    }
+
+    public Param(String name, Class<T> type, T value){
+        this.name = name;
+        this.type = type;
+        this.value = value.toString();
+    }
+
+    public Param(String name, T value){
+        this(name, (Class<T>) value.getClass(), value);
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    @XmlTransient
+    public Class<T> getType() {
+        return type;
+    }
+
+    public void setType(Class<T> type) {
+        this.type = type;
+    }
+
+    @XmlAttribute(name = "type")
+    public String getTypeString(){
+        if (type == null) {
+            return null;
+        }
+        if (map.containsKey(type)){
+            return map.get(type);
+        }
+        return type.getName();
+    }
+
+    public void setTypeString(String type){
+        if (type == null || type.isEmpty()){
+            return;
+        }
+        if (reverseMap.containsKey(type)){
+            this.type = (Class<T>) reverseMap.get(type);
+        } else try {
+            this.type = (Class<T>) Class.forName(type);
+        } catch (ClassNotFoundException e) {
+            throw new RuntimeException(e);
+        }
+        this.actualValue = null;
+    }
+
+    @XmlTransient
+    public T getValue(){
+        if (actualValue == null) {
+            try {
+                Constructor<T> constructor = type.getConstructor(String.class);
+                constructor.setAccessible(true);
+                this.actualValue = constructor.newInstance(value);
+            } catch (NoSuchMethodException e) {
+                throw new RuntimeException(type + " doesnt have a constructor that takes String arg", e);
+            } catch (IllegalAccessException | InstantiationException | InvocationTargetException e) {
+                throw new RuntimeException(e);
+            }
+        }
+        return actualValue;
+    }
+
+    @Override
+    public String toString() {
+        return "Param{" +
+                "name='" + name + '\'' +
+                ", value='" + value + '\'' +
+                ", actualValue=" + actualValue +
+                '}';
+    }
+
+    public void save(OutputStream stream) throws JAXBException {
+        MARSHALLER.marshal(this, stream);
+    }
+
+    public void save(Node node) throws JAXBException {
+        MARSHALLER.marshal(this, node);
+    }
+
+    public static <T> Param<T> load(InputStream stream) throws JAXBException {
+        return (Param<T>) UNMARSHALLER.unmarshal(stream);
+    }
+
+    public static <T> Param<T> load(Node node) throws JAXBException {
+        return (Param<T>) UNMARSHALLER.unmarshal(node);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 896b51b..17b735e 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -17,6 +17,7 @@
 package org.apache.tika.config;
 
 import javax.imageio.spi.ServiceRegistry;
+import javax.xml.bind.JAXBException;
 import javax.xml.parsers.DocumentBuilder;
 import java.io.File;
 import java.io.IOException;
@@ -598,8 +599,8 @@ public class TikaConfig {
          * @param el xml node which has {@link #PARAMS_TAG_NAME} child
          * @return Map of key values read from xml
          */
-        Map<String, String>  getParams(Element el){
-            Map<String, String> params = new HashMap<>();
+        Map<String, Param<?>>  getParams(Element el){
+            Map<String, Param<?>> params = new HashMap<>();
             for (Node child = el.getFirstChild(); child != null;
                  child = child.getNextSibling()){
                 if (PARAMS_TAG_NAME.equals(child.getNodeName())){ //found the node
@@ -608,7 +609,12 @@ public class TikaConfig {
                         for (int i = 0; i < childNodes.getLength(); i++) {
                             Node item = childNodes.item(i);
                             if (item.getNodeType() == Node.ELEMENT_NODE){
-                                params.put(item.getNodeName().trim(), item.getTextContent().trim());
+                                try {
+                                    Param<?> param = Param.load(item);
+                                    params.put(param.getName(), param);
+                                } catch (JAXBException e) {
+                                    throw new RuntimeException(e);
+                                }
                             }
                         }
                     }

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
index 00fac7b..5c045db 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
@@ -21,6 +21,7 @@ import java.io.InputStream;
 import java.util.Map;
 import java.util.Properties;
 
+import org.apache.tika.config.Param;
 import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
@@ -80,7 +81,7 @@ public abstract class AbstractParser implements ConfigurableParser {
      * @since Apache Tika 1.14
      */
     @Override
-    public Map<String, String> getParams() {
+    public Map<String, Param<?>> getParams() {
         return this.context.getParams();
     }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
index e58f5c8..c47bbec 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
@@ -32,6 +32,7 @@ import java.lang.reflect.Method;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.tika.config.Param;
 import org.apache.tika.exception.TikaException;
 import org.xml.sax.EntityResolver;
 import org.xml.sax.InputSource;
@@ -57,7 +58,7 @@ public class ParseContext implements Serializable {
     /**
      * Map of configurable arguments
      */
-    private final Map<String, String> params = new HashMap<>();
+    private final Map<String, Param<?>> params = new HashMap<>();
 
     private static final EntityResolver IGNORING_SAX_ENTITY_RESOLVER = new EntityResolver() {
         public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
@@ -205,7 +206,7 @@ public class ParseContext implements Serializable {
      * @param key parameter name
      * @param value value
      */
-    public void setParam(String key, String value){
+    public void setParam(String key, Param<?> value){
         this.params.put(key, value);
     }
 
@@ -221,7 +222,7 @@ public class ParseContext implements Serializable {
      * Gets all the params
      * @return map of key values
      */
-    public Map<String, String> getParams() {
+    public Map<String, Param<?>> getParams() {
         return params;
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/test/java/org/apache/tika/config/ParamTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/config/ParamTest.java b/tika-core/src/test/java/org/apache/tika/config/ParamTest.java
new file mode 100644
index 0000000..7c9007e
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/config/ParamTest.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.math.BigInteger;
+import java.net.URI;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import static org.junit.Assert.*;
+
+public class ParamTest {
+
+    @Test
+    public void testSaveAndLoad() throws Exception {
+
+        Object objects [] =  {
+                Integer.MAX_VALUE,
+                2.5f,
+                4000.57576,
+                true,
+                false,
+                Long.MAX_VALUE,
+                "Hello this is a boring string",
+                new URL("http://apache.org"),
+                new URI("tika://org.apache.tika.ner.parser?impl=xyz"),
+                new BigInteger(Long.MAX_VALUE + "").add(new BigInteger(Long.MAX_VALUE + "")),
+                new File("."),
+        };
+
+        for (Object object : objects) {
+            String name = "name" + System.currentTimeMillis();
+            Param<?> param = new Param<>(name, object);
+            ByteArrayOutputStream stream = new ByteArrayOutputStream();
+            param.save(stream);
+            ByteArrayInputStream inStream = new ByteArrayInputStream(stream.toByteArray());
+            stream.close();
+            inStream.close();
+            Param<?> loaded = Param.load(inStream);
+            assertEquals(param.getName(), loaded.getName());
+            assertEquals(param.getTypeString(), loaded.getTypeString());
+            assertEquals(param.getType(), loaded.getType());
+            assertEquals(param.getValue(), loaded.getValue());
+
+            assertEquals(loaded.getValue(), object);
+            assertEquals(loaded.getName(), name);
+            assertEquals(loaded.getType(), object.getClass());
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
index b8775e0..5a874ac 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.parser;
 
+import org.apache.tika.config.Param;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -56,8 +57,8 @@ public class DummyConfigurableParser extends AbstractParser {
     public void parse(InputStream stream, ContentHandler handler,
                       Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
-        for (Map.Entry<String, String> entry : getParams().entrySet()) {
-            metadata.add(entry.getKey(), entry.getValue());
+        for (Map.Entry<String, Param<?>> entry : getParams().entrySet()) {
+            metadata.add(entry.getKey(), entry.getValue().getValue().toString());
         }
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
index 999cb45..37c71c9 100644
--- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
@@ -19,7 +19,7 @@
     <parsers>
         <parser class="org.apache.tika.parser.DummyConfigurableParser">
             <params>
-                <testparam>testparamval</testparam>
+                <param name="testparam" type="string">testparamval</param>
             </params>
         </parser>
 


[08/12] tika git commit: Updated test case with type checking

Posted by th...@apache.org.
Updated test case with type checking

Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/9e08a6bc
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/9e08a6bc
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/9e08a6bc

Branch: refs/heads/TIKA-1508
Commit: 9e08a6bc0a2b2ffad12e4b6f90725b2201d0a69b
Parents: 0186992
Author: Thamme Gowda <tg...@gmail.com>
Authored: Wed May 25 17:50:49 2016 -0700
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Wed May 25 17:50:49 2016 -0700

----------------------------------------------------------------------
 .../tika/parser/ConfigurableParserTest.java     | 32 ++++++++++++++++++++
 .../tika/parser/DummyConfigurableParser.java    |  4 ++-
 .../tika/config/TIKA-1508-configurable.xml      | 10 ++++++
 3 files changed, 45 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/9e08a6bc/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java b/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java
index f91a2b0..c059626 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java
@@ -22,7 +22,13 @@ import org.apache.tika.metadata.Metadata;
 import org.junit.Assert;
 import org.junit.Test;
 
+import java.io.File;
+import java.math.BigInteger;
+import java.net.URI;
 import java.net.URL;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
 
 public class ConfigurableParserTest {
 
@@ -41,4 +47,30 @@ public class ConfigurableParserTest {
         Assert.assertEquals(TEST_PARAM_VAL, md.get(TEST_PARAM));
         //assert that param from configuration file is read, given to parser and it copied to metadata
     }
+
+    @Test
+    public void testConfigurableParserTypes() throws Exception {
+        URL configFileUrl = getClass().getClassLoader().getResource(TIKA_CFG_FILE);
+        assert configFileUrl != null;
+        TikaConfig config = new TikaConfig(configFileUrl);
+        Tika tika = new Tika(config);
+        Metadata md = new Metadata();
+        tika.parse(configFileUrl.openStream(), md);
+        HashMap<String, Class> expct = new HashMap<String, Class>() {{
+            put("xint", Integer.class);
+            put("xfile", File.class);
+            put("xlong", Long.class);
+            put("xshort", Short.class);
+            put("xfloat", Float.class);
+            put("xdouble", Double.class);
+            put("xbigint", BigInteger.class);
+            put("xurl", URL.class);
+            put("xuri", URI.class);
+            put("xbool", Boolean.class);
+        }};
+
+        for (Map.Entry<String, Class> entry : expct.entrySet()) {
+            Assert.assertEquals(entry.getValue().getName(), md.get(entry.getKey()+"-type"));
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/9e08a6bc/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
index 5a874ac..3914b01 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
@@ -58,7 +58,9 @@ public class DummyConfigurableParser extends AbstractParser {
                       Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
         for (Map.Entry<String, Param<?>> entry : getParams().entrySet()) {
-            metadata.add(entry.getKey(), entry.getValue().getValue().toString());
+            Param<?> param = entry.getValue();
+            metadata.add(entry.getKey(), param.getValue().toString());
+            metadata.add(entry.getKey()+"-type", param.getValue().getClass().getName());
         }
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/9e08a6bc/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
index 37c71c9..006d6fa 100644
--- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml
@@ -20,6 +20,16 @@
         <parser class="org.apache.tika.parser.DummyConfigurableParser">
             <params>
                 <param name="testparam" type="string">testparamval</param>
+                <param name="xshort" type="short">1000</param>
+                <param name="xint" type="int">999999999</param>
+                <param name="xlong" type="long">9999999999999</param>
+                <param name="xbigint" type="bigint">99999999999999999999999999999999999999999999999</param>
+                <param name="xfloat" type="float">10.2</param>
+                <param name="xbool" type="bool">true</param>
+                <param name="xdouble" type="double">4.6</param>
+                <param name="xurl" type="url">http://apache.org</param>
+                <param name="xfile" type="file">/</param>
+                <param name="xuri" type="uri">tika://customuri?param=value</param>
             </params>
         </parser>
 


[06/12] tika git commit: Update javadoc with @since

Posted by th...@apache.org.
Update javadoc with @since

Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/b64612dc
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/b64612dc
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/b64612dc

Branch: refs/heads/TIKA-1508
Commit: b64612dcdb021fbb8b3fbf31d70a02f1bb7736cb
Parents: e780d56
Author: Thamme Gowda <tg...@gmail.com>
Authored: Mon May 23 11:52:55 2016 -0700
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Mon May 23 11:52:55 2016 -0700

----------------------------------------------------------------------
 tika-core/src/main/java/org/apache/tika/base/Configurable.java   | 4 ++--
 .../main/java/org/apache/tika/exception/TikaConfigException.java | 2 +-
 .../src/main/java/org/apache/tika/parser/AbstractParser.java     | 4 +++-
 .../src/main/java/org/apache/tika/parser/ConfigurableParser.java | 2 ++
 .../java/org/apache/tika/parser/DummyConfigurableParser.java     | 1 -
 5 files changed, 8 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/b64612dc/tika-core/src/main/java/org/apache/tika/base/Configurable.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/base/Configurable.java b/tika-core/src/main/java/org/apache/tika/base/Configurable.java
index 6f1c405..4e6418d 100644
--- a/tika-core/src/main/java/org/apache/tika/base/Configurable.java
+++ b/tika-core/src/main/java/org/apache/tika/base/Configurable.java
@@ -23,7 +23,7 @@ import java.util.Map;
 
 /**
  * Defines contract for configurable services
- * @since Apache Tika 1.13
+ * @since Apache Tika 1.14
  */
 public interface Configurable {
 
@@ -31,7 +31,7 @@ public interface Configurable {
      * Configure an instance with Tika Context
      * @param context configuration instance in the form of context
      * @throws TikaConfigException when an instance fails to work at the given context
-     * @since Apache Tika 1.13
+     * @since Apache Tika 1.14
      */
     void configure(ParseContext context) throws TikaConfigException;
 

http://git-wip-us.apache.org/repos/asf/tika/blob/b64612dc/tika-core/src/main/java/org/apache/tika/exception/TikaConfigException.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/exception/TikaConfigException.java b/tika-core/src/main/java/org/apache/tika/exception/TikaConfigException.java
index 1c01fb6..6b83f1f 100644
--- a/tika-core/src/main/java/org/apache/tika/exception/TikaConfigException.java
+++ b/tika-core/src/main/java/org/apache/tika/exception/TikaConfigException.java
@@ -21,7 +21,7 @@ package org.apache.tika.exception;
  * in Tika config file and/or one or more of the parsers failed to initialize
  * from that erroneous config.
  *
- * @since Apache Tika 1.13
+ * @since Apache Tika 1.14
  */
 public class TikaConfigException extends TikaException {
 

http://git-wip-us.apache.org/repos/asf/tika/blob/b64612dc/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
index 72c8bbd..00fac7b 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
@@ -65,7 +65,7 @@ public abstract class AbstractParser implements ConfigurableParser {
      * called by the framework to supply runtime parameters which may be
      * required for initialization
      * @param context the parser context at runtime
-     * @since Apache Tika 1.13
+     * @since Apache Tika 1.14
      */
     @Override
     public void configure(ParseContext context) throws TikaConfigException {
@@ -76,6 +76,8 @@ public abstract class AbstractParser implements ConfigurableParser {
     /**
      * Gets Parameters of this configurable instance
      * @return a map of key value pairs
+     *
+     * @since Apache Tika 1.14
      */
     @Override
     public Map<String, String> getParams() {

http://git-wip-us.apache.org/repos/asf/tika/blob/b64612dc/tika-core/src/main/java/org/apache/tika/parser/ConfigurableParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ConfigurableParser.java b/tika-core/src/main/java/org/apache/tika/parser/ConfigurableParser.java
index 3eabc02..47feefa 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ConfigurableParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ConfigurableParser.java
@@ -24,6 +24,8 @@ import java.io.Serializable;
  * Extension of {@link Parser} with {@link Configurable} contract.
  * This interface shall be implemented to create parsers which accepts runtime parameters
  * from tika configuration file
+ *
+ * @since Tika 1.14
  */
 public interface ConfigurableParser extends Parser,
         Configurable, Serializable {

http://git-wip-us.apache.org/repos/asf/tika/blob/b64612dc/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
index e9466ca..b8775e0 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
@@ -16,7 +16,6 @@
  */
 package org.apache.tika.parser;
 
-import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;


[10/12] tika git commit: Added @Field Annotation to support auto initilaize params from config

Posted by th...@apache.org.
Added @Field Annotation to support auto initilaize params from config


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/aad23d9e
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/aad23d9e
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/aad23d9e

Branch: refs/heads/TIKA-1508
Commit: aad23d9e848b8d8e9a5a7a1f4359ef4a2dbdba24
Parents: c6eefbd
Author: Thamme Gowda <tg...@gmail.com>
Authored: Wed Jun 1 16:28:23 2016 -0700
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Wed Jun 1 16:28:23 2016 -0700

----------------------------------------------------------------------
 .../main/java/org/apache/tika/config/Field.java |  43 +++++
 .../java/org/apache/tika/config/ParamField.java | 167 ++++++++++++++++
 .../java/org/apache/tika/config/TikaConfig.java |   7 +-
 .../org/apache/tika/utils/AnnotationUtils.java  | 131 +++++++++++++
 .../tika/parser/DummyParametrizedParser.java    |  97 ++++++++++
 .../tika/parser/ParametrizedParserTest.java     |  67 +++++++
 .../apache/tika/utils/AnnotationUtilsTest.java  | 190 +++++++++++++++++++
 .../tika/config/TIKA-1986-parametrized.xml      |  37 ++++
 8 files changed, 738 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/aad23d9e/tika-core/src/main/java/org/apache/tika/config/Field.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/Field.java b/tika-core/src/main/java/org/apache/tika/config/Field.java
new file mode 100644
index 0000000..f4fe3f2
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/config/Field.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * Field annotation is a contract for binding {@link Param} value from
+ * Tika Configuration to any instance of {@link org.apache.tika.base.Configurable}
+ * services
+ * @since Apache Tika 1.14
+ */
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.FIELD, ElementType.METHOD})
+public @interface Field{
+    /**
+     *
+     * @return name of the Field
+     */
+    String name() default ParamField.DEFAULT;
+
+    /**
+     * @return whether this field is required or not
+     */
+    boolean required() default false;
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aad23d9e/tika-core/src/main/java/org/apache/tika/config/ParamField.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/ParamField.java b/tika-core/src/main/java/org/apache/tika/config/ParamField.java
new file mode 100644
index 0000000..96063dc
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/config/ParamField.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import java.lang.reflect.*;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+/**
+ * This class stores metdata for {@link Field} annotation are used to map them
+ * to {@link Param} at runtime
+ *
+ * @since Apache Tika 1.14
+ */
+public class ParamField {
+
+    public static final String DEFAULT = "#default";
+
+    //NOTE: since (primitive type) is NOT AssignableFrom (BoxedType),
+    // we just use boxed type for everything!
+    // Example : short.class.isAssignableFrom(Short.class) ? false
+    private static final Map<Class<?>, Class<?>> PRIMITIVE_MAP
+            = new HashMap<Class<?>, Class<?>>(){{
+        put(int.class, Integer.class);
+        put(short.class, Short.class);
+        put(boolean.class, Boolean.class);
+        put(long.class, Long.class);
+        put(float.class, Float.class);
+        put(double.class, Double.class);
+    }};
+
+    private java.lang.reflect.Field field;
+    private Method setter;
+    private String name;
+    private Class<?> type;
+    private boolean required;
+
+    /**
+     * Creates a ParamField object
+     * @param member a field or method which has {@link Field} annotation
+     */
+    public ParamField(AccessibleObject member){
+        if (member instanceof java.lang.reflect.Field) {
+            field = (java.lang.reflect.Field) member;
+        } else {
+            setter = (Method) member;
+        }
+
+        Field annotation = member.getAnnotation(Field.class);
+        required = annotation.required();
+        if (annotation.name().equals(DEFAULT)) {
+            if (field != null){
+                name = field.getName();
+            } else {
+                String funcName = setter.getName();
+                if (funcName.startsWith("set")) {
+                    name = funcName.replaceFirst("^set", "");
+                }
+            }
+        }
+        name = retrieveParamName(annotation);
+        type = retrieveType();
+    }
+
+    public java.lang.reflect.Field getField() {
+        return field;
+    }
+
+    public Method getSetter() {
+        return setter;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public Class<?> getType() {
+        return type;
+    }
+
+    public boolean isRequired() {
+        return required;
+    }
+
+    /**
+     * Sets given value to the annotated field of bean
+     * @param bean bean with annotation for field
+     * @param value value of field
+     * @throws IllegalAccessException when it occurs
+     * @throws InvocationTargetException when it occurs
+     */
+    public void assignValue(Object bean, Object value)
+            throws IllegalAccessException, InvocationTargetException {
+        if (field != null) {
+            field.set(bean, value);
+        } else {
+            setter.invoke(bean, value);
+        }
+    }
+
+    private Class retrieveType() {
+        Class type;
+        if (field != null) {
+            type = field.getType();
+        } else {
+            Class[] params = setter.getParameterTypes();
+            if (params.length != 1) {
+                //todo:Tika config exception
+                String msg = "Invalid setter method. Must have one and only one parameter. ";
+                if (setter.getName().startsWith("get")) {
+                    msg += "Perhaps the annotation is misplaced on " +
+                            setter.getName() +" while a set'X' is expected?";
+                }
+                throw new RuntimeException(msg);
+            }
+            type = params[0];
+        }
+        if (type.isPrimitive() && PRIMITIVE_MAP.containsKey(type)){
+            type = PRIMITIVE_MAP.get(type); //primitive types have hard time
+        }
+        return type;
+    }
+
+    private String retrieveParamName(Field annotation) {
+        String name;
+        if (annotation.name().equals(DEFAULT)) {
+            if (field != null) {
+                name = field.getName();
+            } else {
+                String setterName = setter.getName();
+                if (setterName.startsWith("set") && setterName.length() > 3) {
+                    name = setterName.substring(3, 4).toLowerCase(Locale.ROOT)
+                            + setterName.substring(4);
+                } else {
+                    name = setter.getName();
+                }
+            }
+        } else {
+            name = annotation.name();
+        }
+        return name;
+    }
+
+    @Override
+    public String toString() {
+        return "ParamField{" +
+                "name='" + name + '\'' +
+                ", type=" + type +
+                ", required=" + required +
+                '}';
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aad23d9e/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 17b735e..853cdf0 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -59,6 +59,7 @@ import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.utils.AnnotationUtils;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
@@ -567,8 +568,12 @@ public class TikaConfig {
                 loaded = decorate(loaded, element);
                 //if the instance is configurable, then call configure()
                 if (loaded instanceof Configurable){
+                    Map<String, Param<?>> params = getParams(element);
+                    //Assigning the params to bean fields/setters
+                    AnnotationUtils.assignFieldParams(loaded, params);
+                    //invoking the configure() hook
                     ParseContext context = new ParseContext();
-                    context.getParams().putAll(getParams(element));
+                    context.getParams().putAll(params);
                     ((Configurable) loaded).configure(context); // initialize here
                 }
                 // All done with setup

http://git-wip-us.apache.org/repos/asf/tika/blob/aad23d9e/tika-core/src/main/java/org/apache/tika/utils/AnnotationUtils.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/utils/AnnotationUtils.java b/tika-core/src/main/java/org/apache/tika/utils/AnnotationUtils.java
new file mode 100644
index 0000000..08e004b
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/utils/AnnotationUtils.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.utils;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Param;
+import org.apache.tika.config.ParamField;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaConfigException;
+
+import java.lang.annotation.Annotation;
+import java.lang.reflect.AccessibleObject;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This class contains utilities for dealing with tika annotations
+ * @since Apache Tika 1.14
+ */
+public class AnnotationUtils {
+
+    /**
+     * Cache for annotations for Bean classes which have {@link Field}
+     */
+    private static final Map<Class<?>, List<ParamField>> PARAM_INFO =
+            new HashMap<>();
+
+    /**
+     * Collects all the fields and methods for an annotation
+     * @param clazz bean class with annotations
+     * @param annotation annotation class
+     * @return list of accessible objects such as fields and methods
+     */
+    private static List<AccessibleObject> collectInfo(
+            Class<?> clazz, Class<? extends Annotation> annotation) {
+
+        Class superClazz = clazz;
+        List<AccessibleObject> members = new ArrayList<>();
+        List<AccessibleObject> annotatedMembers = new ArrayList<>();
+        //walk through the inheritance chain
+        while (superClazz != null && superClazz != Object.class) {
+            members.addAll(Arrays.asList(superClazz.getDeclaredFields()));
+            members.addAll(Arrays.asList(superClazz.getDeclaredMethods()));
+            superClazz = superClazz.getSuperclass();
+        }
+
+        for (final AccessibleObject member : members) {
+            if (member.isAnnotationPresent(annotation)) {
+                AccessController.doPrivileged(new PrivilegedAction<Void>(){
+                    @Override
+                    public Void run() {
+                        member.setAccessible(true);
+                        return null;
+                    }
+                });
+                annotatedMembers.add(member);
+            }
+        }
+        return annotatedMembers;
+    }
+
+    /**
+     * Assigns the param values to bean
+     * @throws TikaConfigException when an error occurs while assigning params
+     */
+    public static void assignFieldParams(Object bean, Map<String, Param<?>> params) throws TikaConfigException {
+        Class<?> beanClass = bean.getClass();
+        if (!PARAM_INFO.containsKey(beanClass)) {
+            synchronized (TikaConfig.class){
+                if (!PARAM_INFO.containsKey(beanClass)) {
+                    List<AccessibleObject> aObjs = collectInfo(beanClass,
+                            org.apache.tika.config.Field.class);
+                    List<ParamField> fields = new ArrayList<>(aObjs.size());
+
+                    for (AccessibleObject aObj : aObjs) {
+                        fields.add(new ParamField(aObj));
+                    }
+                    PARAM_INFO.put(beanClass, fields);
+                }
+            }
+        }
+
+        List<ParamField> fields = PARAM_INFO.get(beanClass);
+        for (ParamField field : fields) {
+            Param<?> param = params.get(field.getName());
+            if (param != null){
+                if (field.getType().isAssignableFrom(param.getType())) {
+                    try {
+                        field.assignValue(bean, param.getValue());
+                    } catch (Exception e) {
+                        throw new TikaConfigException(e.getMessage(), e);
+                    }
+                } else {
+                    String msg = String.format("Value '%s' of type '%s' cant be" +
+                            " assigned to field '%s' of defined type '%s'",
+                            param.getValue(), param.getValue().getClass(),
+                            field.getName(), field.getType());
+                    throw new TikaConfigException(msg);
+                }
+            } else if (field.isRequired()){
+                //param not supplied but field is declared as required?
+                String msg = String.format("Param %s is required for %s," +
+                        " but it is not given in config.", field.getName(),
+                        bean.getClass().getName());
+                throw new TikaConfigException(msg);
+            } else {
+                //FIXME: SLF4j is not showing up for import, fix it and send this to LOG.debug
+                //LOG.debug("Param not supplied, field is not mandatory");
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aad23d9e/tika-core/src/test/java/org/apache/tika/parser/DummyParametrizedParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyParametrizedParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyParametrizedParser.java
new file mode 100644
index 0000000..383a80b
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/parser/DummyParametrizedParser.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.math.BigInteger;
+import java.net.URI;
+import java.net.URL;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A test Parsers to test {@link Field}
+ * @since Apache Tika 1.14
+ */
+public class DummyParametrizedParser extends AbstractParser
+        implements ConfigurableParser {
+
+    private static Set<MediaType> MIMES = new HashSet<>();
+    static {
+        MIMES.add(MediaType.TEXT_PLAIN);
+        MIMES.add(MediaType.TEXT_HTML);
+        MIMES.add(MediaType.APPLICATION_XML);
+        MIMES.add(MediaType.OCTET_STREAM);
+    }
+
+    @Field(name = "testparam") private String testParam;
+    @Field private short xshort;
+    @Field private int xint;
+    @Field private long xlong;
+    @Field(name = "xbigint") private BigInteger xbigInt;
+    @Field private float xfloat;
+    @Field private double xdouble;
+    @Field private boolean xbool;
+    @Field private URL xurl;
+    @Field private URI xuri;
+
+    @Field private String missing = "default";
+
+    private String inner = "inner";
+    private File xfile;
+
+    @Field
+    public void setXfile(File xfile){
+        this.xfile = xfile;
+    }
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+
+        return MIMES;
+    }
+
+    @Override
+    public void parse(InputStream stream, ContentHandler handler,
+                      Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+
+        metadata.add("testparam", testParam);
+        metadata.add("xshort", xshort + "");
+        metadata.add("xint", xint + "");
+        metadata.add("xlong", xlong + "");
+        metadata.add("xbigint", xbigInt + "");
+        metadata.add("xfloat", xfloat + "");
+        metadata.add("xdouble", xdouble + "");
+        metadata.add("xbool", xbool + "");
+        metadata.add("xuri", xuri + "");
+        metadata.add("xurl", xurl + "");
+        metadata.add("xfile", xfile + "");
+
+        metadata.add("inner", inner + "");
+        metadata.add("missing", missing + "");
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aad23d9e/tika-core/src/test/java/org/apache/tika/parser/ParametrizedParserTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/ParametrizedParserTest.java b/tika-core/src/test/java/org/apache/tika/parser/ParametrizedParserTest.java
new file mode 100644
index 0000000..290f819
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/parser/ParametrizedParserTest.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
+
+public class ParametrizedParserTest {
+
+    private static final String TIKA_CFG_FILE = "org/apache/tika/config/TIKA-1986-parametrized.xml";
+    private static final Map<String, String> expcted = new HashMap<String, String>() {
+        {
+            put("testparam", "testparamval");
+            put("xshort", "1000");
+            put("xint", "999999999");
+            put("xlong", "9999999999999");
+            put("xbigint", "99999999999999999999999999999999999999999999999");
+            put("xfloat", "10.2");
+            put("xbool", "true");
+            put("xdouble", "4.6");
+            put("xurl", "http://apache.org");
+            put("xfile", "/");
+            put("xuri", "tika://customuri?param=value");
+
+            put("inner", "inner");
+            put("missing", "default");
+        }
+    };
+
+
+    @Test
+    public void testConfigurableParserTypes() throws Exception {
+        URL configFileUrl = getClass().getClassLoader().getResource(TIKA_CFG_FILE);
+        assert configFileUrl != null;
+        TikaConfig config = new TikaConfig(configFileUrl);
+        Tika tika = new Tika(config);
+        Metadata md = new Metadata();
+        tika.parse(configFileUrl.openStream(), md);
+
+        for (Map.Entry<String, String> entry : expcted.entrySet()) {
+            Assert.assertEquals("mismatch for " + entry.getKey(), entry.getValue(), md.get(entry.getKey()));
+        }
+    }
+
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/aad23d9e/tika-core/src/test/java/org/apache/tika/utils/AnnotationUtilsTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/utils/AnnotationUtilsTest.java b/tika-core/src/test/java/org/apache/tika/utils/AnnotationUtilsTest.java
new file mode 100644
index 0000000..eaa3549
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/utils/AnnotationUtilsTest.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.utils;
+
+import aQute.bnd.annotation.metatype.Configurable;
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+
+
+/**
+ * @since 6/1/16
+ */
+public class AnnotationUtilsTest {
+
+    @Test
+    public void testMisMatchType() {
+
+        class MyParser extends Configurable {
+            @Field(required = true) int config;
+        }
+
+        Map<String, Param<?>> params = new HashMap<>();
+        try {
+            params.put("config", new Param<>("config", 1));
+
+            MyParser bean = new MyParser();
+            AnnotationUtils.assignFieldParams(bean, params);
+            Assert.assertEquals(bean.config, 1);
+        } catch (TikaConfigException e) {
+            e.printStackTrace();
+            Assert.fail("Exception Not expected");
+        }
+
+        params.clear();
+        try {
+            params.put("config", new Param<>("config", "a string value"));
+            AnnotationUtils.assignFieldParams(new MyParser(), params);
+            Assert.fail("Exception expected");
+        } catch (TikaConfigException e) {
+            //expected
+        }
+    }
+
+    @Test
+    public void testPrimitiveAndBoxedTypes() {
+
+        class MyParser extends Configurable {
+            @Field(required = true) int config;
+            @Field(required = true, name = "config") Integer config2;
+        }
+
+        Map<String, Param<?>> params = new HashMap<>();
+        try {
+            MyParser bean = new MyParser();
+            int val = 100;
+            params.put("config", new Param<>("config", val));
+            AnnotationUtils.assignFieldParams(bean, params);
+            Assert.assertTrue(bean.config == bean.config2);
+            Assert.assertTrue(bean.config == val);
+        } catch (TikaConfigException e) {
+            e.printStackTrace();
+            Assert.fail("Exception Not expected");
+        }
+
+    }
+
+    @Test
+    public void testRequiredParam() {
+
+        class MyParser extends Configurable {
+            @Field(required = true) String config;
+        }
+
+        Map<String, Param<?>> params = new HashMap<>();
+        String someval = "someval";
+        params.put("config", new Param<>("config", someval));
+        try {
+            MyParser bean = new MyParser();
+            AnnotationUtils.assignFieldParams(bean, params);
+            Assert.assertEquals(bean.config, someval);
+        } catch (TikaConfigException e) {
+            e.printStackTrace();
+            Assert.fail("Exception Not expected");
+        }
+
+        params.clear();
+        try {
+            AnnotationUtils.assignFieldParams(new MyParser(), params);
+            Assert.fail("Exception expected");
+        } catch (TikaConfigException e) {
+            //expected
+        }
+    }
+
+
+    @Test
+    public void testParserInheritance() {
+
+        class Parent {
+            @Field(required = true) int overridden;
+            @Field(required = true) int parentField;
+
+        }
+
+        class Child extends Parent {
+            @Field(required = true) int overridden;
+            @Field(required = true) int childField;
+        }
+
+        int val = 1;
+        Map<String, Param<?>> params = new HashMap<>();
+        params.put("overridden", new Param<>("oevrriden", val));
+        params.put("parentField", new Param<>("parentField", val));
+        params.put("childField", new Param<>("childField", val));
+
+        try {
+            Child child = new Child();
+            AnnotationUtils.assignFieldParams(child, params);
+            Assert.assertEquals(child.overridden, val);
+            Assert.assertEquals(child.parentField, val);
+            Assert.assertEquals(child.childField, val);
+        } catch (TikaConfigException e) {
+            e.printStackTrace();
+            Assert.fail("Exception Not expected");
+        }
+
+        try {
+            params.remove("parentField");
+            AnnotationUtils.assignFieldParams(new Child(), params);
+            Assert.fail("Exception expected, parent class field not set");
+        } catch (TikaConfigException e) {
+            //expected
+        }
+    }
+
+
+
+    @Test
+    public void testParamValueInheritance() {
+
+        class Bean {
+            @Field(required = true) CharSequence field;
+        }
+
+        Bean parser = new Bean();
+        Map<String, Param<?>> params = new HashMap<>();
+        try {
+            String val = "someval";
+            params.put("field", new Param<String>("field", String.class, val));
+            AnnotationUtils.assignFieldParams(parser, params);
+            Assert.assertEquals(val, parser.field);
+        } catch (Exception e){
+            e.printStackTrace();
+            Assert.fail("Exception not expected, string is assignable to CharSequence");
+        }
+
+        try {
+            Date val = new Date();
+            params.put("field", new Param<Date>("field", Date.class, val));
+            AnnotationUtils.assignFieldParams(parser, params);
+            Assert.fail("Exception expected, Date is not assignable to CharSequence.");
+        } catch (TikaConfigException e){
+            //expected
+
+        }
+
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aad23d9e/tika-core/src/test/resources/org/apache/tika/config/TIKA-1986-parametrized.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1986-parametrized.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1986-parametrized.xml
new file mode 100644
index 0000000..6689a19
--- /dev/null
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1986-parametrized.xml
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+    <parsers>
+        <parser class="org.apache.tika.parser.DummyParametrizedParser">
+            <params>
+                <param name="testparam" type="string">testparamval</param>
+                <param name="xshort" type="short">1000</param>
+                <param name="xint" type="int">999999999</param>
+                <param name="xlong" type="long">9999999999999</param>
+                <param name="xbigint" type="bigint">99999999999999999999999999999999999999999999999</param>
+                <param name="xfloat" type="float">10.2</param>
+                <param name="xbool" type="bool">true</param>
+                <param name="xdouble" type="double">4.6</param>
+                <param name="xurl" type="url">http://apache.org</param>
+                <param name="xfile" type="file">/</param>
+                <param name="xuri" type="uri">tika://customuri?param=value</param>
+            </params>
+        </parser>
+
+    </parsers>
+</properties>


[03/12] tika git commit: Added a TikaConfigException, params getter

Posted by th...@apache.org.
Added a TikaConfigException, params getter


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/64db9614
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/64db9614
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/64db9614

Branch: refs/heads/TIKA-1508
Commit: 64db9614cfaa3e873a9dc9efc6d201d887f6a4c5
Parents: ae51417
Author: Thamme Gowda <tg...@gmail.com>
Authored: Sat Mar 12 06:43:44 2016 -0800
Committer: Thamme Gowda <tg...@gmail.com>
Committed: Sat Mar 12 06:43:44 2016 -0800

----------------------------------------------------------------------
 .../java/org/apache/tika/base/Configurable.java | 32 ++++++++++++++--
 .../tika/exception/TikaConfigException.java     | 39 ++++++++++++++++++++
 .../org/apache/tika/parser/AbstractParser.java  | 15 +++++++-
 3 files changed, 81 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/64db9614/tika-core/src/main/java/org/apache/tika/base/Configurable.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/base/Configurable.java b/tika-core/src/main/java/org/apache/tika/base/Configurable.java
index 8ae1b30..6f1c405 100644
--- a/tika-core/src/main/java/org/apache/tika/base/Configurable.java
+++ b/tika-core/src/main/java/org/apache/tika/base/Configurable.java
@@ -1,8 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.tika.base;
 
-import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.parser.ParseContext;
 
+import java.util.Map;
+
 /**
  * Defines contract for configurable services
  * @since Apache Tika 1.13
@@ -10,10 +28,16 @@ import org.apache.tika.parser.ParseContext;
 public interface Configurable {
 
     /**
-     * Confure an instance with Tika Context
+     * Configure an instance with Tika Context
      * @param context configuration instance in the form of context
-     * @throws TikaException when an instance fails to work at the given context
+     * @throws TikaConfigException when an instance fails to work at the given context
      * @since Apache Tika 1.13
      */
-    void configure(ParseContext context) throws TikaException;
+    void configure(ParseContext context) throws TikaConfigException;
+
+    /**
+     * Gets parameters of this configurable instance
+     * @return parameters in the form  of a map of key value pairs
+     */
+    Map<String, String> getParams();
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/64db9614/tika-core/src/main/java/org/apache/tika/exception/TikaConfigException.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/exception/TikaConfigException.java b/tika-core/src/main/java/org/apache/tika/exception/TikaConfigException.java
new file mode 100644
index 0000000..1c01fb6
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/exception/TikaConfigException.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.exception;
+
+/**
+ * Tika Config Exception is an exception to occur when there is an error
+ * in Tika config file and/or one or more of the parsers failed to initialize
+ * from that erroneous config.
+ *
+ * @since Apache Tika 1.13
+ */
+public class TikaConfigException extends TikaException {
+
+    /**
+     * Creates an instance of exception
+     * @param msg message
+     */
+    public TikaConfigException(String msg) {
+        super(msg);
+    }
+
+    public TikaConfigException(String msg, Throwable cause) {
+        super(msg, cause);
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/64db9614/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
index 10f731e..72c8bbd 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
@@ -18,8 +18,10 @@ package org.apache.tika.parser;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Map;
 import java.util.Properties;
 
+import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.xml.sax.ContentHandler;
@@ -66,7 +68,18 @@ public abstract class AbstractParser implements ConfigurableParser {
      * @since Apache Tika 1.13
      */
     @Override
-    public void configure(ParseContext context) throws TikaException {
+    public void configure(ParseContext context) throws TikaConfigException {
         this.context = context;
     }
+
+
+    /**
+     * Gets Parameters of this configurable instance
+     * @return a map of key value pairs
+     */
+    @Override
+    public Map<String, String> getParams() {
+        return this.context.getParams();
+    }
 }
+