You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/09/01 15:23:47 UTC

[tika] 02/03: Git add files...no idea how this mv failed...

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-3179
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 057186813a46b2d2a90a16945c1f5604d350caf5
Author: tallison <ta...@apache.org>
AuthorDate: Tue Sep 1 11:22:42 2020 -0400

    Git add files...no idea how this mv failed...
---
 .../tika/dl/imagerec/dl4j-inception3-config.xml    |  35 +
 .../apache/tika/dl/imagerec/dl4j-vgg16-config.xml  |  32 +
 .../parser/recognition/AgeRecogniserConfig.java    |  69 ++
 .../apache/tika/parser/captioning/tf/im2txtapi.py  | 266 +++++++
 .../parser/recognition/tika-config-tflow-rest.xml  |  33 +
 .../recognition/ObjectRecognitionParserTest.java   | 199 +++++
 .../tf/TensorflowVideoRecParserTest.java           |  55 ++
 .../parser/ctakes/CTAKESAnnotationProperty.java    |  46 ++
 .../tika/parser/geo/NameEntityExtractor.java       | 122 +++
 .../tika/parser/journal/GrobidRESTParser.java      | 116 +++
 .../apache/tika/parser/journal/TEIDOMParser.java   | 882 +++++++++++++++++++++
 .../tika/parser/ner/nltk/NLTKNERecogniser.java     | 147 ++++
 .../services/org.apache.tika.parser.Parser         |  18 +
 .../tika/parser/ner/grobid/GrobidServer.properties |  17 +
 .../org/apache/tika/parser/journal/TEITest.java    |  69 ++
 .../tika/parser/ner/NamedEntityParserTest.java     |  91 +++
 .../tika/parser/ner/nltk/NLTKNERecogniserTest.java |  49 ++
 .../apache/tika/parser/ner/opennlp/get-models.sh   |  26 +
 .../apache/tika/parser/ner/regex/tika-config.xml   |  27 +
 19 files changed, 2299 insertions(+)

diff --git a/tika-parsers-advanced/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/dl4j-inception3-config.xml b/tika-parsers-advanced/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/dl4j-inception3-config.xml
new file mode 100644
index 0000000..2728063
--- /dev/null
+++ b/tika-parsers-advanced/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/dl4j-inception3-config.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<properties>
+  <parsers>
+    <parser class="org.apache.tika.parser.recognition.ObjectRecognitionParser">
+      <mime>image/jpeg</mime>
+      <params>
+        <param name="modelWeightsPath" type="string">https://github.com/USCDataScience/tika-dockers/releases/download/v0.2/inception_v3_keras_2.h5</param>
+        <param name="labelFile" type="string">https://github.com/USCDataScience/tika-dockers/releases/download/v0.2/imagenet_class_index.json</param>
+        <param name="topN" type="int">10</param>
+        <param name="minConfidence" type="double">0.015</param>
+        <param name="class" type="string">org.apache.tika.dl.imagerec.DL4JInceptionV3Net</param>
+      </params>
+    </parser>
+  </parsers>
+</properties>
diff --git a/tika-parsers-advanced/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/dl4j-vgg16-config.xml b/tika-parsers-advanced/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/dl4j-vgg16-config.xml
new file mode 100644
index 0000000..940a4b6
--- /dev/null
+++ b/tika-parsers-advanced/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/dl4j-vgg16-config.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<properties>
+    <parsers>
+        <parser class="org.apache.tika.parser.recognition.ObjectRecognitionParser">
+            <mime>image/jpeg</mime>
+            <params>
+                <param name="topN" type="int">3</param>
+                <param name="minConfidence" type="double">0.015</param>
+                <param name="class" type="string">org.apache.tika.dl.imagerec.DL4JVGG16Net</param>
+                <param name="modelType" type="string">VGG16</param>
+                <param name="serialize" type="bool">true</param>
+            </params>
+        </parser>
+    </parsers>
+</properties>
diff --git a/tika-parsers-advanced/tika-nlp/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java b/tika-parsers-advanced/tika-nlp/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java
new file mode 100644
index 0000000..92427f4
--- /dev/null
+++ b/tika-parsers-advanced/tika-nlp/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright owlocationNameEntitieship.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.recognition;
+
+import java.net.URL;
+import java.util.Map;
+
+import org.apache.tika.config.Param;
+
+
+/**
+ * Stores URL for AgePredictor 
+ */
+public class AgeRecogniserConfig {
+
+	private String pathClassifyModel = null;
+	private String pathClassifyRegression = null;
+
+	public AgeRecogniserConfig(Map<String, Param> params) {
+
+		URL classifyUrl = AgeRecogniserConfig.class.getResource(
+				params.get("age.path.classify").getValue().toString());
+
+		if (classifyUrl != null) {
+			setPathClassifyModel(classifyUrl.getFile());
+		}
+
+		URL regressionUrl = AgeRecogniserConfig.class.getResource(
+				params.get("age.path.regression").getValue().toString());
+
+		if (regressionUrl != null) {
+			setPathClassifyRegression(regressionUrl.getFile());
+		}
+	}
+
+	public String getPathClassifyModel() {
+		return pathClassifyModel;
+	}
+
+	public void setPathClassifyModel(String pathClassifyModel) {
+		this.pathClassifyModel = pathClassifyModel;
+	}
+
+	public String getPathClassifyRegression() {
+		return pathClassifyRegression;
+	}
+
+	public void setPathClassifyRegression(String pathClassifyRegression) {
+		this.pathClassifyRegression = pathClassifyRegression;
+	}
+    
+    
+   
+}
\ No newline at end of file
diff --git a/tika-parsers-advanced/tika-parser-advancedmedia-module/src/main/resources/org/apache/tika/parser/captioning/tf/im2txtapi.py b/tika-parsers-advanced/tika-parser-advancedmedia-module/src/main/resources/org/apache/tika/parser/captioning/tf/im2txtapi.py
new file mode 100644
index 0000000..97f1f2a
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-advancedmedia-module/src/main/resources/org/apache/tika/parser/captioning/tf/im2txtapi.py
@@ -0,0 +1,266 @@
+#!/usr/bin/env python
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#    http://www.apache.org/licenses/LICENSE-2.0
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+
+"""
+    This script exposes image captioning service over a REST API. Image captioning implementation based on the paper,
+
+        "Show and Tell: A Neural Image Caption Generator"
+        Oriol Vinyals, Alexander Toshev, Samy Bengio, Dumitru Erhan
+
+    For more details, please visit :
+        http://arxiv.org/abs/1411.4555
+    Requirements :
+      Flask
+      tensorflow
+      numpy
+      requests
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import logging
+import math
+import requests
+import sys
+
+from flask import Flask, request, Response, jsonify
+from io import BytesIO
+from PIL import Image
+from time import time
+
+import tensorflow as tf
+import xml.etree.ElementTree as ET
+
+import model_wrapper
+import vocabulary
+import caption_generator
+
+# turning off the traceback by limiting its depth
+sys.tracebacklimit = 0
+
+# informative log messages for advanced users to troubleshoot errors when modifying model_info.xml
+try:
+    info = ET.parse('/usr/share/apache-tika/models/dl/image/caption/model_info.xml').getroot()
+except IOError:
+    logging.exception('model_info.xml is not found')
+    sys.exit(1)
+
+model_main = info.find('model_main')
+if model_main is None:
+    logging.exception('<checkpoint_path> tag under <model_main> tag in model_info.xml is not found')
+    sys.exit(1)
+
+checkpoint_path = model_main.find('checkpoint_path')
+if checkpoint_path is None:
+    logging.exception('<checkpoint_path> tag under <model_main> tag in model_info.xml is not found')
+    sys.exit(1)
+else:
+    checkpoint_path = checkpoint_path.text
+
+vocab_file = model_main.find('vocab_file')
+if vocab_file is None:
+    logging.exception('<vocab_file> tag under <model_main> tag in model_info.xml is not found')
+    sys.exit(1)
+else:
+    vocab_file = vocab_file.text
+
+port = info.get('port')
+if port is None:
+    logging.exception('port attribute in <service> tag in model_info.xml is not found')
+    sys.exit(1)
+
+# turning on the traceback by setting it to default
+sys.tracebacklimit = 1000
+
+FLAGS = tf.flags.FLAGS
+tf.flags.DEFINE_string("checkpoint_path", checkpoint_path, """Directory containing the model checkpoint file.""")
+tf.flags.DEFINE_string('vocab_file', vocab_file, """Text file containing the vocabulary.""")
+tf.flags.DEFINE_integer('port', port, """Server PORT, default:8764""")
+
+tf.logging.set_verbosity(tf.logging.INFO)
+
+
+class Initializer(Flask):
+    """
+        Class to initialize the REST API, this class loads the model from the given checkpoint path in model_info.xml
+        and prepares a caption_generator object
+    """
+
+    def __init__(self, name):
+        super(Initializer, self).__init__(name)
+        # build the inference graph
+        g = tf.Graph()
+        with g.as_default():
+            model = model_wrapper.ModelWrapper()
+            restore_fn = model.build_graph(FLAGS.checkpoint_path)
+        g.finalize()
+        # make the model globally available
+        self.model = model
+        # create the vocabulary
+        self.vocab = vocabulary.Vocabulary(FLAGS.vocab_file)
+        self.sess = tf.Session(graph=g)
+        # load the model from checkpoint
+        restore_fn(self.sess)
+
+
+def current_time():
+    """Returns current time in milli seconds"""
+
+    return int(1000 * time())
+
+
+app = Initializer(__name__)
+
+
+def get_remote_file(url, success=200, timeout=10):
+    """
+        Given HTTP URL, this api gets the content of it
+        returns (Content-Type, image_content)
+    """
+    try:
+        app.logger.info("GET: %s" % url)
+        auth = None
+        res = requests.get(url, stream=True, timeout=timeout, auth=auth)
+        if res.status_code == success:
+            return res.headers.get('Content-Type', 'application/octet-stream'), res.raw.data
+    except:
+        pass
+    return None, None
+
+
+@app.route("/")
+def index():
+    """The index page which provide information about other API end points"""
+
+    return """
+    <div>
+    <h1> Image Captioning REST API </h1>
+    <h3> The following API end points are valid </h3>
+        <ul>
+            <h4> Inception V3 </h4>
+            <li> <code>/inception/v3/ping </code> - <br/>
+                <b> Description : </b> checks availability of the service. returns "pong" with status 200 when it is available
+            </li>
+            <li> <code>/inception/v3/caption/image</code> - <br/>
+                <table>
+                <tr><th align="left"> Description </th><td> This is a service that can caption images</td></tr>
+                <tr><th align="left"> How to supply Image Content </th></tr>
+                <tr><th align="left"> With HTTP GET : </th> <td>
+                    Include a query parameter <code>url </code> which is an http url of JPEG image <br/>
+                    Example: <code> curl "localhost:8764/inception/v3/caption/image?url=http://xyz.com/example.jpg"</code>
+                </td></tr>
+                <tr><th align="left"> With HTTP POST :</th><td>
+                    POST JPEG image content as binary data in request body. <br/>
+                    Example: <code> curl -X POST "localhost:8764/inception/v3/caption/image" --data-binary @example.jpg </code>
+                </td></tr>
+                </table>
+            </li>
+        <ul>
+    </div>
+    """
+
+
+@app.route("/inception/v3/ping", methods=["GET"])
+def ping_pong():
+    """API to do health check. If this says status code 200, then healthy"""
+
+    return "pong"
+
+
+@app.route("/inception/v3/caption/image", methods=["GET", "POST"])
+def caption_image():
+    """API to caption images"""
+    image_format = "not jpeg"
+
+    st = current_time()
+    # get beam_size
+    beam_size = int(request.args.get("beam_size", "3"))
+    # get max_caption_length
+    max_caption_length = int(request.args.get("max_caption_length", "20"))
+    # get image_data
+    if request.method == 'POST':
+        image_data = request.get_data()
+    else:
+        url = request.args.get("url")
+        c_type, image_data = get_remote_file(url)
+        if not image_data:
+            return Response(status=400, response=jsonify(error="Could not HTTP GET %s" % url))
+        if 'image/jpeg' in c_type:
+            image_format = "jpeg"
+
+    # use c_type to find whether image_format is jpeg or not
+    # if jpeg, don't convert
+    if image_format == "jpeg":
+        jpg_image = image_data
+    # if not jpeg
+    else:
+        # open the image from raw bytes
+        image = Image.open(BytesIO(image_data))
+        # convert the image to RGB format, otherwise will give errors when converting to jpeg, if the image isn't RGB
+        rgb_image = image.convert("RGB")
+        # convert the RGB image to jpeg
+        image_bytes = BytesIO()
+        rgb_image.save(image_bytes, format="jpeg", quality=95)
+        jpg_image = image_bytes.getvalue()
+        image_bytes.close()
+
+    read_time = current_time() - st
+    # restart counter
+    st = current_time()
+
+    generator = caption_generator.CaptionGenerator(app.model,
+                                                   app.vocab,
+                                                   beam_size=beam_size,
+                                                   max_caption_length=max_caption_length)
+    captions = generator.beam_search(app.sess, jpg_image)
+
+    captioning_time = current_time() - st
+    app.logger.info("Captioning time : %d" % captioning_time)
+
+    array_captions = []
+    for caption in captions:
+        sentence = [app.vocab.id_to_word(w) for w in caption.sentence[1:-1]]
+        sentence = " ".join(sentence)
+        array_captions.append({
+            'sentence': sentence,
+            'confidence': math.exp(caption.logprob)
+        })
+
+    response = {
+        'beam_size': beam_size,
+        'max_caption_length': max_caption_length,
+        'captions': array_captions,
+        'time': {
+            'read': read_time,
+            'captioning': captioning_time,
+            'units': 'ms'
+        }
+    }
+    return Response(response=json.dumps(response), status=200, mimetype="application/json")
+
+
+def main(_):
+    if not app.debug:
+        print("Serving on port %d" % FLAGS.port)
+    app.run(host="0.0.0.0", port=FLAGS.port)
+
+
+if __name__ == '__main__':
+    tf.app.run()
diff --git a/tika-parsers-advanced/tika-parser-advancedmedia-module/src/main/resources/org/apache/tika/parser/recognition/tika-config-tflow-rest.xml b/tika-parsers-advanced/tika-parser-advancedmedia-module/src/main/resources/org/apache/tika/parser/recognition/tika-config-tflow-rest.xml
new file mode 100644
index 0000000..69a65d0
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-advancedmedia-module/src/main/resources/org/apache/tika/parser/recognition/tika-config-tflow-rest.xml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<properties>
+    <parsers>
+        <parser class="org.apache.tika.parser.recognition.ObjectRecognitionParser">
+            <mime>image/jpeg</mime>
+            <mime>image/png</mime>
+            <mime>image/gif</mime>
+            <params>
+                <param name="apiBaseUri" type="uri">http://localhost:8764/inception/v4</param>
+                <param name="topN" type="int">2</param>
+                <param name="minConfidence" type="double">0.015</param>
+                <param name="class" type="string">org.apache.tika.parser.recognition.tf.TensorflowRESTRecogniser</param>
+            </params>
+        </parser>
+    </parsers>
+</properties>
diff --git a/tika-parsers-advanced/tika-parser-advancedmedia-module/src/test/java/org/apache/tika/parser/recognition/ObjectRecognitionParserTest.java b/tika-parsers-advanced/tika-parser-advancedmedia-module/src/test/java/org/apache/tika/parser/recognition/ObjectRecognitionParserTest.java
new file mode 100644
index 0000000..25520af
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-advancedmedia-module/src/test/java/org/apache/tika/parser/recognition/ObjectRecognitionParserTest.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.recognition;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.recognition.tf.TensorflowImageRecParser;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.List;
+
+/**
+ * Testcases for Object Recognition Parser
+ */
+public class ObjectRecognitionParserTest {
+
+    // Config files
+    private static final String CONFIG_FILE_OBJ_REC = "org/apache/tika/parser/recognition/tika-config-tflow.xml";
+    private static final String CONFIG_REST_FILE_OBJ_REC = "org/apache/tika/parser/recognition/tika-config-tflow-rest.xml";
+    private static final String CONFIG_REST_FILE_IM2TXT = "org/apache/tika/parser/recognition/tika-config-tflow-im2txt-rest.xml";
+
+    // Test images
+    private static final String CAT_IMAGE_JPEG = "test-documents/testJPEG.jpg";
+    private static final String CAT_IMAGE_PNG = "test-documents/testPNG.png";
+    private static final String CAT_IMAGE_GIF = "test-documents/testGIF.gif";
+
+    private static final String BASEBALL_IMAGE_JPEG = "test-documents/baseball.jpg";
+    private static final String BASEBALL_IMAGE_PNG = "test-documents/baseball.png";
+    private static final String BASEBALL_IMAGE_GIF = "test-documents/baseball.gif";
+
+    private static final ClassLoader loader = ObjectRecognitionParserTest.class.getClassLoader();
+
+    private static final Logger LOG = LoggerFactory.getLogger(ObjectRecognitionParserTest.class);
+    
+    @Test
+    public void jpegTFObjRecTest() throws IOException, TikaException, SAXException {
+      TensorflowImageRecParser p = new TensorflowImageRecParser();
+      Assume.assumeTrue(p.isAvailable());      
+        try (InputStream stream = loader.getResourceAsStream(CONFIG_FILE_OBJ_REC)) {
+            assert stream != null;
+            Tika tika = new Tika(new TikaConfig(stream));
+            Metadata metadata = new Metadata();
+            try (InputStream imageStream = loader.getResourceAsStream(CAT_IMAGE_JPEG)) {
+                Reader reader = tika.parse(imageStream, metadata);
+                List<String> lines = IOUtils.readLines(reader);
+                String text = StringUtils.join(lines, " ");
+                String[] expectedObjects = {"Egyptian cat", "tabby, tabby cat"};
+                String metaValues = StringUtils.join(metadata.getValues(ObjectRecognitionParser.MD_KEY_OBJ_REC), " ");
+                for (String expectedObject : expectedObjects) {
+                    String message = "'" + expectedObject + "' must have been detected";
+                    Assert.assertTrue(message, text.contains(expectedObject));
+                    Assert.assertTrue(message, metaValues.contains(expectedObject));
+                }
+            }
+        }
+    }
+
+    @Test
+    public void jpegRESTObjRecTest() throws Exception {
+        String apiUrl = "http://localhost:8764/inception/v4/ping";
+        boolean available = false;
+        int status = 500;
+        try{
+          status = WebClient.create(apiUrl).get().getStatus();
+          available = status == 200;
+        }
+        catch(Exception ignore){}
+        Assume.assumeTrue(available);
+        String[] expectedObjects = {"Egyptian cat", "tabby, tabby cat"};
+        doRecognize(CONFIG_REST_FILE_OBJ_REC, CAT_IMAGE_JPEG,
+                ObjectRecognitionParser.MD_KEY_OBJ_REC, expectedObjects);
+    }
+
+    @Test
+    public void pngRESTObjRecTest() throws Exception {
+        String apiUrl = "http://localhost:8764/inception/v4/ping";
+        boolean available = false;
+        int status = 500;
+        try{
+            status = WebClient.create(apiUrl).get().getStatus();
+            available = status == 200;
+        }
+        catch(Exception ignore){}
+        Assume.assumeTrue(available);
+        String[] expectedObjects = {"Egyptian cat", "tabby, tabby cat"};
+        doRecognize(CONFIG_REST_FILE_OBJ_REC, CAT_IMAGE_PNG,
+                ObjectRecognitionParser.MD_KEY_OBJ_REC, expectedObjects);
+    }
+
+    @Test
+    public void gifRESTObjRecTest() throws Exception {
+        String apiUrl = "http://localhost:8764/inception/v4/ping";
+        boolean available = false;
+        int status = 500;
+        try{
+            status = WebClient.create(apiUrl).get().getStatus();
+            available = status == 200;
+        }
+        catch(Exception ignore){}
+        Assume.assumeTrue(available);
+        String[] expectedObjects = {"Egyptian cat"};
+        doRecognize(CONFIG_REST_FILE_OBJ_REC, CAT_IMAGE_GIF,
+                ObjectRecognitionParser.MD_KEY_OBJ_REC, expectedObjects);
+    }
+
+    @Test
+    public void jpegRESTim2txtTest() throws Exception {
+        String apiUrl = "http://localhost:8764/inception/v3/ping";
+        boolean available = false;
+        int status = 500;
+        try{
+          status = WebClient.create(apiUrl).get().getStatus();
+          available = status == 200;
+        }
+        catch(Exception ignore){}
+        Assume.assumeTrue(available);   
+        String[] expectedCaption = {"a baseball player holding a bat on a field"};
+        doRecognize(CONFIG_REST_FILE_IM2TXT, BASEBALL_IMAGE_JPEG,
+                ObjectRecognitionParser.MD_KEY_IMG_CAP, expectedCaption);
+    }
+
+    @Test
+    public void pngRESTim2txtTest() throws Exception {
+        String apiUrl = "http://localhost:8764/inception/v3/ping";
+        boolean available = false;
+        int status = 500;
+        try{
+          status = WebClient.create(apiUrl).get().getStatus();
+          available = status == 200;
+        }
+        catch(Exception ignore){}
+        Assume.assumeTrue(available);  
+        String[] expectedCaption = {"a baseball player holding a bat on a field"};
+        doRecognize(CONFIG_REST_FILE_IM2TXT, BASEBALL_IMAGE_PNG,
+                ObjectRecognitionParser.MD_KEY_IMG_CAP, expectedCaption);
+    }
+
+    @Test
+    public void gifRESTim2txtTest() throws Exception {
+        String apiUrl = "http://localhost:8764/inception/v3/ping";
+        boolean available = false;
+        int status = 500;
+        try{
+          status = WebClient.create(apiUrl).get().getStatus();
+          available = status == 200;
+        }
+        catch(Exception ignore){}
+        Assume.assumeTrue(available);  
+        String[] expectedCaption = {"a baseball player pitching a ball on top of a field"};
+        doRecognize(CONFIG_REST_FILE_IM2TXT, BASEBALL_IMAGE_GIF,
+                ObjectRecognitionParser.MD_KEY_IMG_CAP, expectedCaption);
+    }
+
+    private void doRecognize(String configFile, String testImg, String mdKey, String[] expectedObjects) throws Exception {
+        try (InputStream stream = loader.getResourceAsStream(configFile)) {
+            assert stream != null;
+            Tika tika = new Tika(new TikaConfig(stream));
+            Metadata metadata = new Metadata();
+            try (InputStream imageStream = loader.getResourceAsStream(testImg)) {
+                Reader reader = tika.parse(imageStream, metadata);
+                String text = IOUtils.toString(reader);
+                String metaValues = StringUtils.join(metadata.getValues(mdKey), " ");
+                LOG.info("MetaValues = {}", metaValues);
+                for (String expectedObject : expectedObjects) {
+                    String message = "'" + expectedObject + "' must have been detected";
+                    Assert.assertTrue(message, text.contains(expectedObject));
+                    Assert.assertTrue(message, metaValues.contains(expectedObject));
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/tika-parsers-advanced/tika-parser-advancedmedia-module/src/test/java/org/apache/tika/parser/recognition/tf/TensorflowVideoRecParserTest.java b/tika-parsers-advanced/tika-parser-advancedmedia-module/src/test/java/org/apache/tika/parser/recognition/tf/TensorflowVideoRecParserTest.java
new file mode 100644
index 0000000..ded686a
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-advancedmedia-module/src/test/java/org/apache/tika/parser/recognition/tf/TensorflowVideoRecParserTest.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.recognition.tf;
+
+import org.apache.tika.config.Param;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.recognition.RecognisedObject;
+import org.junit.Assert;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.xml.sax.helpers.DefaultHandler;
+
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+
+@Ignore
+public class TensorflowVideoRecParserTest {
+
+    @Test
+    public void recognise() throws Exception {
+        TensorflowRESTVideoRecogniser recogniser = new TensorflowRESTVideoRecogniser();
+        recogniser.initialize(new HashMap<String, Param>());
+        try (InputStream stream = getClass().getClassLoader().getResourceAsStream("test-documents/testVideoMp4.mp4")) {
+            List<RecognisedObject> objects = recogniser.recognise(stream, new DefaultHandler(), new Metadata(), new ParseContext());
+            
+            Assert.assertTrue(objects.size() > 0);
+            Set<String> objectLabels = new HashSet<>();
+            for (RecognisedObject object : objects) {
+                objectLabels.add(object.getLabel());
+            }
+            Assert.assertTrue(objectLabels.size() > 0);
+        }
+    }
+
+}
\ No newline at end of file
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ctakes/CTAKESAnnotationProperty.java b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ctakes/CTAKESAnnotationProperty.java
new file mode 100644
index 0000000..1c1be02
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ctakes/CTAKESAnnotationProperty.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.ctakes;
+
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+
+/**
+ * This enumeration includes the properties that an {@link IdentifiedAnnotation} object can provide.
+ *
+ */
+public enum CTAKESAnnotationProperty {
+    BEGIN("start"),
+    END("end"),
+    CONDITIONAL("conditional"),
+    CONFIDENCE("confidence"),
+    DISCOVERY_TECNIQUE("discoveryTechnique"),
+    GENERIC("generic"),
+    HISTORY_OF("historyOf"),
+    ID("id"),
+    ONTOLOGY_CONCEPT_ARR("ontologyConceptArr"),
+    POLARITY("polarity");
+
+    private String name;
+
+    CTAKESAnnotationProperty(String name) {
+        this.name = name;
+    }
+
+    public String getName() {
+        return name;
+    }
+}
\ No newline at end of file
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/geo/NameEntityExtractor.java b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/geo/NameEntityExtractor.java
new file mode 100644
index 0000000..c998e40
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/geo/NameEntityExtractor.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.geo;
+
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.io.IOUtils;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.util.Span;
+
+public class NameEntityExtractor {
+    ArrayList<String> locationNameEntities;
+    String bestNameEntity;
+    private HashMap<String, Integer> tf;
+    private final NameFinderME nameFinder;
+
+    public NameEntityExtractor(NameFinderME nameFinder) throws IOException {
+        this.locationNameEntities = new ArrayList<String>();
+        this.bestNameEntity = null;
+        this.nameFinder = nameFinder;
+        this.tf = new HashMap<String, Integer>();
+    }
+
+    /*
+     * Use OpenNLP to extract location names that's appearing in the steam.
+     * OpenNLP's default Name Finder accuracy is not very good, please refer to
+     * its documentation.
+     * 
+     * @param stream stream that passed from this.parse()
+     */
+    public void getAllNameEntitiesfromInput(InputStream stream) throws IOException {
+        String[] in = IOUtils.toString(stream, UTF_8).split(" ");
+        Span nameE[];
+        
+        //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind
+        synchronized (nameFinder) {
+            nameE = nameFinder.find(in);
+            //the same name finder is reused, so clear adaptive data
+            nameFinder.clearAdaptiveData();
+        }
+
+        String spanNames = Arrays.toString(Span.spansToStrings(nameE, in));
+        spanNames = spanNames.substring(1, spanNames.length() - 1);
+        String[] tmp = spanNames.split(",");
+
+        for (String name : tmp) {
+            name = name.trim();
+            this.locationNameEntities.add(name);
+        }
+
+
+    }
+
+    /*
+     * Get the best location entity extracted from the input stream. Simply
+     * return the most frequent entity, If there several highest frequent
+     * entity, pick one randomly. May not be the optimal solution, but works.
+     * 
+     * @param locationNameEntities OpenNLP name finder's results, stored in
+     * ArrayList
+     */
+    public void getBestNameEntity() {
+        if (this.locationNameEntities.size() == 0)
+            return;
+
+        for (int i = 0; i < this.locationNameEntities.size(); ++i) {
+            if (tf.containsKey(this.locationNameEntities.get(i)))
+                tf.put(this.locationNameEntities.get(i),
+                        tf.get(this.locationNameEntities.get(i)) + 1);
+            else
+                tf.put(this.locationNameEntities.get(i), 1);
+        }
+        int max = 0;
+        List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(
+                tf.entrySet());
+        Collections.shuffle(list);
+        Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
+            public int compare(Map.Entry<String, Integer> o1,
+                    Map.Entry<String, Integer> o2) {
+                // Descending Order
+                return o2.getValue().compareTo(o1.getValue());
+            }
+        });
+
+        this.locationNameEntities.clear();// update so that they are in
+                                          // descending order
+        for (Map.Entry<String, Integer> entry : list) {
+            this.locationNameEntities.add(entry.getKey());
+            if (entry.getValue() > max) {
+                max = entry.getValue();
+                this.bestNameEntity = entry.getKey();
+            }
+        }
+    }
+}
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java
new file mode 100644
index 0000000..110c504
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/journal/GrobidRESTParser.java
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.journal;
+
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.cxf.jaxrs.ext.multipart.Attachment;
+import org.apache.cxf.jaxrs.ext.multipart.ContentDisposition;
+import org.apache.cxf.jaxrs.ext.multipart.MultipartBody;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.ContentHandler;
+
+public class GrobidRESTParser {
+
+    private static final Logger LOG = LoggerFactory.getLogger(GrobidRESTParser.class);
+
+
+    private static final String GROBID_REST_HOST = "http://localhost:8080";
+
+    private static final String GROBID_ISALIVE_PATH = "/grobid"; // isalive
+    // doesn't work
+    // nfc why
+
+    private static final String GROBID_PROCESSHEADER_PATH = "/processHeaderDocument";
+
+    private String restHostUrlStr;
+
+    public GrobidRESTParser() {
+        String restHostUrlStr = null;
+        try {
+            restHostUrlStr = readRestUrl();
+        } catch (IOException e) {
+            LOG.warn("can't read rest url", e);
+        }
+
+        if (restHostUrlStr == null
+                || (restHostUrlStr != null && restHostUrlStr.equals(""))) {
+            this.restHostUrlStr = GROBID_REST_HOST;
+        } else {
+            this.restHostUrlStr = restHostUrlStr;
+        }
+    }
+
+    public void parse(String filePath, ContentHandler handler, Metadata metadata,
+                      ParseContext context) throws FileNotFoundException {
+
+        File pdfFile = new File(filePath);
+        ContentDisposition cd = new ContentDisposition(
+                "form-data; name=\"input\"; filename=\"" + pdfFile.getName() + "\"");
+        Attachment att = new Attachment("input", new FileInputStream(pdfFile), cd);
+        MultipartBody body = new MultipartBody(att);
+
+        Response response = WebClient
+                .create(restHostUrlStr + GROBID_PROCESSHEADER_PATH)
+                .accept(MediaType.APPLICATION_XML).type(MediaType.MULTIPART_FORM_DATA)
+                .post(body);
+
+        try {
+            String resp = response.readEntity(String.class);
+            Metadata teiMet = new TEIDOMParser().parse(resp, context);
+            for (String key : teiMet.names()) {
+                metadata.add("grobid:header_" + key, teiMet.get(key));
+            }
+        } catch (Exception e) {
+            LOG.warn("Couldn't read response", e);
+        }
+    }
+
+    private static String readRestUrl() throws IOException {
+        Properties grobidProperties = new Properties();
+        grobidProperties.load(GrobidRESTParser.class
+                .getResourceAsStream("GrobidExtractor.properties"));
+
+        return grobidProperties.getProperty("grobid.server.url");
+    }
+
+    protected static boolean canRun() {
+        Response response = null;
+
+        try {
+            response = WebClient.create(readRestUrl() + GROBID_ISALIVE_PATH)
+                    .accept(MediaType.TEXT_HTML).get();
+            String resp = response.readEntity(String.class);
+            return resp != null && !resp.equals("") && resp.startsWith("<h4>");
+        } catch (Exception e) {
+            //swallow...can't run
+            return false;
+        }
+    }
+
+}
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/journal/TEIDOMParser.java b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/journal/TEIDOMParser.java
new file mode 100644
index 0000000..b79ec93
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/journal/TEIDOMParser.java
@@ -0,0 +1,882 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.journal;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.utils.XMLReaderUtils;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+public class TEIDOMParser {
+
+    public TEIDOMParser() {
+    }
+
+    public Metadata parse(String source, ParseContext parseContext) throws TikaException, SAXException, IOException {
+
+        Document root = XMLReaderUtils.buildDOM(
+                new ByteArrayInputStream(source.getBytes(StandardCharsets.UTF_8)), parseContext);
+
+        Metadata metadata = new Metadata();
+        createGrobidMetadata(source, root.getDocumentElement(), metadata);
+        return metadata;
+    }
+
+    private void createGrobidMetadata(String source, Element root,
+                                      Metadata metadata) {
+        if (root != null) {
+
+            Node text = getFirstChild(root.getChildNodes(), "text");
+            if (text != null) {
+                parseText(text, metadata);
+            }
+            Node teiHeader = getFirstChild(root.getChildNodes(), "teiHeader");
+            Node fileDesc = getFirstChild(teiHeader.getChildNodes(), "fileDesc");
+            if (fileDesc != null) {
+                parseFileDesc(fileDesc, metadata);
+
+            }
+            Node profileDesc = getFirstChild(teiHeader.getChildNodes(), "profileDesc");
+            if (profileDesc != null) {
+                parseProfileDesc(profileDesc, metadata);
+            }
+
+        }
+
+        addStaticMet(source, root, metadata);
+    }
+
+    private void addStaticMet(String source, Element obj, Metadata metadata) {
+        metadata.add("Class", Metadata.class.getName());
+        //no longer available after we got rid of json.org's and its .toJSONObject()
+//        metadata.add("TEIJSONSource", obj.toString());
+        metadata.add("TEIXMLSource", source);
+    }
+
+    private void parseText(Node text, Metadata metadata) {
+        String lang = getFirstAttribute(text, "xml", "lang");
+        if (lang != null) {
+            metadata.add("Language", lang);
+        }
+    }
+
+    private void parseFileDesc(Node fileDesc, Metadata metadata) {
+        Node titleStmt = getFirstChild(fileDesc.getChildNodes(), "titleStmt");
+
+        if (titleStmt != null) {
+            parseTitleStmt(titleStmt, metadata);
+        }
+
+        Node sourceDesc = getFirstChild(fileDesc.getChildNodes(), "sourceDesc");
+        if (sourceDesc != null) {
+            parseSourceDesc(sourceDesc, metadata);
+        }
+    }
+
+    private void parseTitleStmt(Node titleStmt, Metadata metadata) {
+        Node title = getFirstChild(titleStmt.getChildNodes(), "title");
+        if (title != null) {
+            String titleText = title.getTextContent();
+            if (titleText != null) {
+                metadata.add("Title", titleText);
+            }
+        }
+    }
+
+    private void parseSourceDesc(Node sourceDesc, Metadata metadata) {
+        Node biblStruct = getFirstChild(sourceDesc.getChildNodes(), "biblStruct");
+        if (biblStruct != null) {
+            parseBiblStruct(biblStruct, metadata);
+        }
+    }
+
+    private void parseBiblStruct(Node biblStruct, Metadata metadata) {
+
+        Node analytic = getFirstChild(biblStruct.getChildNodes(), "analytic");
+        if (analytic != null) {
+            List<Node> authorNodes = getChildNodes(analytic.getChildNodes(), "author");
+            List<Author> authorList = new ArrayList<>();
+            for (Node authorNode : authorNodes) {
+                parseAuthor(authorNode, authorList);
+            }
+
+            metadata.add("Address", getMetadataAddresses(authorList));
+            metadata.add("Affiliation", getMetadataAffiliations(authorList));
+            metadata.add("Authors", getMetadataAuthors(authorList));
+            metadata.add("FullAffiliations",
+                    getMetadataFullAffiliations(authorList));
+
+
+        } else {
+            metadata.add("Error", "Unable to parse: no analytic section in JSON");
+        }
+
+    }
+
+    private String getMetadataFullAffiliations(List<Author> authorList) {
+        List<Affiliation> unique = new ArrayList<Affiliation>();
+        StringBuilder metAffils = new StringBuilder();
+
+        for (Author a : authorList) {
+            for (Affiliation af : a.getAffiliations()) {
+                if (!unique.contains(af)) {
+                    unique.add(af);
+                }
+            }
+        }
+        metAffils.append("[");
+        for (Affiliation af : unique) {
+            metAffils.append(af.toString());
+            metAffils.append(",");
+        }
+        metAffils.append(metAffils.deleteCharAt(metAffils.length() - 1));
+        metAffils.append("]");
+        return metAffils.toString();
+    }
+
+    private String getMetadataAuthors(List<Author> authorList) {
+        // generates Chris A. Mattmann 1, 2 Daniel J. Crichton 1 Nenad Medvidovic 2
+        // Steve Hughes 1
+        List<Affiliation> unique = new ArrayList<Affiliation>();
+        StringBuilder metAuthors = new StringBuilder();
+
+        for (Author a : authorList) {
+            for (Affiliation af : a.getAffiliations()) {
+                if (!unique.contains(af)) {
+                    unique.add(af);
+                }
+            }
+        }
+
+        for (Author a : authorList) {
+            metAuthors.append(printOrBlank(a.getFirstName()));
+            metAuthors.append(printOrBlank(a.getMiddleName()));
+            metAuthors.append(printOrBlank(a.getSurName()));
+
+            StringBuilder affilBuilder = new StringBuilder();
+            for (int idx = 0; idx < unique.size(); idx++) {
+                Affiliation af = unique.get(idx);
+                if (a.getAffiliations().contains(af)) {
+                    affilBuilder.append((idx + 1));
+                    affilBuilder.append(",");
+                }
+            }
+
+            if (affilBuilder.length() > 0)
+                affilBuilder.deleteCharAt(affilBuilder.length() - 1);
+
+            metAuthors.append(affilBuilder.toString());
+            metAuthors.append(" ");
+        }
+
+        return metAuthors.toString();
+    }
+
+    private String getMetadataAffiliations(List<Author> authorList) {
+        // generates 1 Jet Propulsion Laboratory California Institute of Technology
+        // ; 2 Computer Science Department University of Southern California
+        List<Affiliation> unique = new ArrayList<Affiliation>();
+        StringBuilder metAffil = new StringBuilder();
+
+        for (Author a : authorList) {
+            for (Affiliation af : a.getAffiliations()) {
+                if (!unique.contains(af)) {
+                    unique.add(af);
+                }
+            }
+        }
+
+        int count = 1;
+        for (Affiliation a : unique) {
+            metAffil.append(count);
+            metAffil.append(" ");
+            metAffil.append(a.getOrgName().toString());
+            metAffil.deleteCharAt(metAffil.length() - 1);
+            metAffil.append("; ");
+            count++;
+        }
+
+        if (count > 1) {
+            metAffil.deleteCharAt(metAffil.length() - 1);
+            metAffil.deleteCharAt(metAffil.length() - 1);
+        }
+
+        return metAffil.toString();
+    }
+
+    private String getMetadataAddresses(List<Author> authorList) {
+        // generates: "Pasadena, CA 91109, USA Los Angeles, CA 90089, USA",
+        List<Address> unique = new ArrayList<Address>();
+        StringBuilder metAddress = new StringBuilder();
+
+        for (Author a : authorList) {
+            for (Affiliation af : a.getAffiliations()) {
+                if (!unique.contains(af.getAddress())) {
+                    unique.add(af.getAddress());
+                }
+            }
+        }
+
+        for (Address ad : unique) {
+            metAddress.append(ad.toString());
+            metAddress.append(" ");
+        }
+
+        return metAddress.toString();
+    }
+
+    private void parseAuthor(Node authorNode, List<Author> authorList) {
+        Author author = new Author();
+        Node persName = getFirstChild(authorNode.getChildNodes(), "persName");
+        if (persName != null) {
+            List<Node> forenames = getChildNodes(persName.getChildNodes(), "forename");
+            for (Node forenameNode : forenames) {
+                parseNamePart(forenameNode, author);
+            }
+            Node surnameNode = getFirstChild(persName.getChildNodes(), "surname");
+            if (surnameNode != null) {
+                String surnameContent = surnameNode.getTextContent();
+                if (surnameContent != null) {
+                    author.setSurName(surnameContent);
+                }
+            }
+        }
+        List<Node> affiliationNodes = getChildNodes(authorNode.getChildNodes(), "affiliation");
+        for (Node affiliationNode : affiliationNodes) {
+            parseOneAffiliation(affiliationNode, author);
+        }
+
+
+        authorList.add(author);
+    }
+
+    private void parseNamePart(Node namePart, Author author) {
+        String type = getFirstAttribute(namePart, null, "type");
+        String content = namePart.getTextContent();
+        if (type != null && content != null) {
+
+            if (type.equals("first")) {
+                author.setFirstName(content);
+            }
+
+            if (type.equals("middle")) {
+                author.setMiddleName(content);
+            }
+        }
+    }
+
+    private void parseOneAffiliation(Node affiliationNode, Author author) {
+
+        Affiliation affiliation = new Affiliation();
+        Node address = getFirstChild(affiliationNode.getChildNodes(), "address");
+        if (address != null) {
+            parseAddress(address, affiliation);
+        }
+
+        List<Node> orgNameNodes = getChildNodes(affiliationNode.getChildNodes(), "orgName");
+        OrgName orgName = new OrgName();
+        for (Node orgNameNode : orgNameNodes) {
+            parseOrgName(orgNameNode, orgName);
+        }
+        affiliation.setOrgName(orgName);
+
+        author.getAffiliations().add(affiliation);
+    }
+
+    private void parseAddress(Node addressNode, Affiliation affiliation) {
+        Address address = new Address();
+        Node region = getFirstChild(addressNode.getChildNodes(), "region");
+        if (region != null && region.getTextContent() != null) {
+            address.setRegion(region.getTextContent());
+        }
+        Node postCode = getFirstChild(addressNode.getChildNodes(), "postCode");
+        if (postCode != null && postCode.getTextContent() != null) {
+            address.setPostCode(postCode.getTextContent());
+        }
+        Node settlementNode = getFirstChild(addressNode.getChildNodes(), "settlement");
+        if (settlementNode != null && settlementNode.getTextContent() != null) {
+            address.setSettlment(settlementNode.getTextContent());
+        }
+
+        Node countryNode = getFirstChild(addressNode.getChildNodes(), "country");
+        if (countryNode != null) {
+            Country country = new Country();
+            String key = getFirstAttribute(countryNode, null, "key");
+            if (key != null) {
+                country.setKey(key);
+            }
+            String content = countryNode.getTextContent();
+            if (content != null) {
+                country.setContent(content);
+            }
+            address.setCountry(country);
+        }
+
+        affiliation.setAddress(address);
+    }
+
+    private void parseOrgName(Node orgNode, OrgName orgName) {
+        OrgTypeName typeName = new OrgTypeName();
+        String orgContent = orgNode.getTextContent();
+        if (orgContent != null) {
+            typeName.setName(orgContent);
+        }
+        String orgType = getFirstAttribute(orgNode, null, "type");
+        if (orgType != null) {
+            typeName.setType(orgType);
+        }
+
+        orgName.getTypeNames().add(typeName);
+    }
+
+    private void parseProfileDesc(Node profileDesc, Metadata metadata) {
+        Node abstractNode = getFirstChild(profileDesc.getChildNodes(), "abstract");
+        if (abstractNode != null) {
+            Node pNode = getFirstChild(abstractNode.getChildNodes(), "p");
+            if (pNode != null) {
+                metadata.add("Abstract", pNode.getTextContent());
+            }
+        }
+
+        Node textClassNode = getFirstChild(profileDesc.getChildNodes(), "textClass");
+        if (textClassNode != null) {
+            Node keywordsNode = getFirstChild(textClassNode.getChildNodes(), "keywords");
+            if (keywordsNode != null) {
+                List<Node> terms = getChildNodes(keywordsNode.getChildNodes(), "term");
+                if (terms.size() == 0) {
+                    // test AJ15.pdf
+                    metadata.add("Keyword", keywordsNode.getTextContent());
+                } else {
+                    for (Node term : terms) {
+                        metadata.add("Keyword", term.getTextContent());
+                    }
+                }
+
+            }
+        }
+
+    }
+
+    private String printOrBlank(String val) {
+        if (val != null && !val.equals("")) {
+            return val + " ";
+        } else
+            return " ";
+    }
+
+    class Author {
+
+        private String surName;
+
+        private String middleName;
+
+        private String firstName;
+
+        private List<Affiliation> affiliations;
+
+        public Author() {
+            this.surName = null;
+            this.middleName = null;
+            this.firstName = null;
+            this.affiliations = new ArrayList<Affiliation>();
+        }
+
+        /**
+         * @return the surName
+         */
+        public String getSurName() {
+            return surName;
+        }
+
+        /**
+         * @param surName the surName to set
+         */
+        public void setSurName(String surName) {
+            this.surName = surName;
+        }
+
+        /**
+         * @return the middleName
+         */
+        public String getMiddleName() {
+            return middleName;
+        }
+
+        /**
+         * @param middleName the middleName to set
+         */
+        public void setMiddleName(String middleName) {
+            this.middleName = middleName;
+        }
+
+        /**
+         * @return the firstName
+         */
+        public String getFirstName() {
+            return firstName;
+        }
+
+        /**
+         * @param firstName the firstName to set
+         */
+        public void setFirstName(String firstName) {
+            this.firstName = firstName;
+        }
+
+        /**
+         * @return the affiliations
+         */
+        public List<Affiliation> getAffiliations() {
+            return affiliations;
+        }
+
+        /**
+         * @param affiliations the affiliations to set
+         */
+        public void setAffiliations(List<Affiliation> affiliations) {
+            this.affiliations = affiliations;
+        }
+
+        /*
+         * (non-Javadoc)
+         *
+         * @see java.lang.Object#toString()
+         */
+        @Override
+        public String toString() {
+            return "Author [surName=" + surName + ", middleName=" + middleName != null ? middleName
+                    : "" + ", firstName=" + firstName + ", affiliations=" + affiliations
+                    + "]";
+        }
+
+    }
+
+    class Affiliation {
+
+        private OrgName orgName;
+
+        private Address address;
+
+        public Affiliation() {
+            this.orgName = new OrgName();
+            this.address = new Address();
+        }
+
+        /**
+         * @return the orgName
+         */
+        public OrgName getOrgName() {
+            return orgName;
+        }
+
+        /**
+         * @param orgName the orgName to set
+         */
+        public void setOrgName(OrgName orgName) {
+            this.orgName = orgName;
+        }
+
+        /**
+         * @return the address
+         */
+        public Address getAddress() {
+            return address;
+        }
+
+        /**
+         * @param address the address to set
+         */
+        public void setAddress(Address address) {
+            this.address = address;
+        }
+
+        /*
+         * (non-Javadoc)
+         *
+         * @see java.lang.Object#equals(java.lang.Object)
+         */
+        @Override
+        public boolean equals(Object obj) {
+            Affiliation otherA = (Affiliation) obj;
+            return this.getAddress().equals(otherA.getAddress())
+                    && this.getOrgName().equals(otherA.getOrgName());
+
+        }
+
+        /*
+         * (non-Javadoc)
+         *
+         * @see java.lang.Object#toString()
+         */
+        @Override
+        public String toString() {
+            return "Affiliation {orgName=" + orgName + ", address=" + address + "}";
+        }
+
+    }
+
+    class OrgName {
+        private List<OrgTypeName> typeNames;
+
+        public OrgName() {
+            this.typeNames = new ArrayList<OrgTypeName>();
+        }
+
+        /**
+         * @return the typeNames
+         */
+        public List<OrgTypeName> getTypeNames() {
+            return typeNames;
+        }
+
+        /**
+         * @param typeNames the typeNames to set
+         */
+        public void setTypeNames(List<OrgTypeName> typeNames) {
+            this.typeNames = typeNames;
+        }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see java.lang.Object#toString()
+     */
+
+        @Override
+        public String toString() {
+            StringBuilder builder = new StringBuilder();
+            for (OrgTypeName on : this.typeNames) {
+                builder.append(on.getName());
+                builder.append(" ");
+            }
+            return builder.toString();
+        }
+
+        /*
+         * (non-Javadoc)
+         *
+         * @see java.lang.Object#equals(java.lang.Object)
+         */
+        @Override
+        public boolean equals(Object obj) {
+            OrgName otherA = (OrgName) obj;
+
+            if (otherA.getTypeNames() != null) {
+                if (this.typeNames == null) {
+                    return false;
+                } else {
+                    return this.typeNames.size() == otherA.getTypeNames().size();
+                }
+            } else {
+                if (this.typeNames == null) {
+                    return true;
+                } else
+                    return false;
+            }
+
+        }
+
+    }
+
+    class OrgTypeName {
+        private String name;
+        private String type;
+
+        public OrgTypeName() {
+            this.name = null;
+            this.type = null;
+        }
+
+        /**
+         * @return the name
+         */
+        public String getName() {
+            return name;
+        }
+
+        /**
+         * @param name the name to set
+         */
+        public void setName(String name) {
+            this.name = name;
+        }
+
+        /**
+         * @return the type
+         */
+        public String getType() {
+            return type;
+        }
+
+        /**
+         * @param type the type to set
+         */
+        public void setType(String type) {
+            this.type = type;
+        }
+
+        /*
+         * (non-Javadoc)
+         *
+         * @see java.lang.Object#equals(java.lang.Object)
+         */
+        @Override
+        public boolean equals(Object obj) {
+            OrgTypeName otherOrgName = (OrgTypeName) obj;
+            return this.type.equals(otherOrgName.getType())
+                    && this.name.equals(otherOrgName.getName());
+        }
+
+    }
+
+    private class Address {
+
+        private String region;
+        private String postCode;
+        private String settlment;
+        private Country country;
+
+        public Address() {
+            this.region = null;
+            this.postCode = null;
+            this.settlment = null;
+            this.country = new Country();
+        }
+
+        /**
+         * @return the region
+         */
+        public String getRegion() {
+            return region;
+        }
+
+        /**
+         * @param region the region to set
+         */
+        public void setRegion(String region) {
+            this.region = region;
+        }
+
+        /**
+         * @return the postCode
+         */
+        public String getPostCode() {
+            return postCode;
+        }
+
+        /**
+         * @param postCode the postCode to set
+         */
+        public void setPostCode(String postCode) {
+            this.postCode = postCode;
+        }
+
+        /**
+         * @return the settlment
+         */
+        public String getSettlment() {
+            return settlment;
+        }
+
+        /**
+         * @param settlment the settlment to set
+         */
+        public void setSettlment(String settlment) {
+            this.settlment = settlment;
+        }
+
+        /**
+         * @return the country
+         */
+        public Country getCountry() {
+            return country;
+        }
+
+        /**
+         * @param country the country to set
+         */
+        public void setCountry(Country country) {
+            this.country = country;
+        }
+
+        /*
+         * (non-Javadoc)
+         *
+         * @see java.lang.Object#equals(java.lang.Object)
+         */
+        @Override
+        public boolean equals(Object obj) {
+            Address otherA = (Address) obj;
+            if (this.settlment == null) {
+                return otherA.getSettlment() == null;
+            } else if (this.country == null) {
+                return otherA.getCountry() == null;
+            } else if (this.postCode == null) {
+                return otherA.getPostCode() == null;
+            } else if (this.region == null) {
+                return otherA.getRegion() == null;
+            }
+
+            return this.settlment.equals(otherA.getSettlment())
+                    && this.country.equals(otherA.getCountry())
+                    && this.postCode.equals(otherA.getPostCode())
+                    && this.region.equals(otherA.getRegion());
+        }
+
+        /*
+         * (non-Javadoc)
+         *
+         * @see java.lang.Object#toString()
+         */
+        @Override
+        public String toString() {
+            StringBuilder builder = new StringBuilder();
+            builder.append(settlment);
+            builder.append(", ");
+            builder.append(region);
+            builder.append(" ");
+            builder.append(postCode);
+            builder.append(" ");
+            builder.append(country.getContent());
+            return builder.toString();
+        }
+    }
+
+    private class Country {
+        private String key;
+        private String content;
+
+        public Country() {
+            this.key = null;
+            this.content = null;
+        }
+
+        /**
+         * @return the key
+         */
+        public String getKey() {
+            return key;
+        }
+
+        /**
+         * @param key the key to set
+         */
+        public void setKey(String key) {
+            this.key = key;
+        }
+
+        /**
+         * @return the content
+         */
+        public String getContent() {
+            return content;
+        }
+
+        /**
+         * @param content the content to set
+         */
+        public void setContent(String content) {
+            this.content = content;
+        }
+
+        /*
+         * (non-Javadoc)
+         *
+         * @see java.lang.Object#equals(java.lang.Object)
+         */
+        @Override
+        public boolean equals(Object obj) {
+            Country otherC = (Country) obj;
+
+            if (this.key == null) {
+                if (otherC.getKey() != null) {
+                    return false;
+                } else {
+                    if (this.content == null) {
+                        if (otherC.getContent() != null) {
+                            return false;
+                        } else {
+                            return true;
+                        }
+                    } else {
+                        return content.equals(otherC.getContent());
+                    }
+                }
+            } else {
+                if (this.content == null) {
+                    if (otherC.getContent() != null) {
+                        return false;
+                    } else {
+                        return this.key.equals(otherC.getKey());
+                    }
+                } else {
+                    return this.key.equals(otherC.getKey())
+                            && this.content.equals(otherC.getContent());
+                }
+            }
+        }
+    }
+
+    //returns first child with this name, null otherwise
+    private static Node getFirstChild(NodeList childNodes, String name) {
+        for (int i = 0; i < childNodes.getLength(); i++) {
+            Node n = childNodes.item(i);
+            if (n.getNodeName().equals(name)) {
+                return n;
+            }
+        }
+        return null;
+    }
+
+    private static String getFirstAttribute(Node node, String ns, String name) {
+        if (node.hasAttributes()) {
+            NamedNodeMap attrs = node.getAttributes();
+            for (int i = 0; i < attrs.getLength(); i++) {
+                Node attr = attrs.item(i);
+                if (attr.getLocalName().equals(name)) {
+                    return attr.getNodeValue();
+                }
+            }
+        }
+        return null;
+    }
+
+    private static List<Node> getChildNodes(NodeList childNodes, String localName) {
+        List<Node> ret = new ArrayList<>();
+        for (int i = 0; i < childNodes.getLength(); i++) {
+            Node child = childNodes.item(i);
+            if (child.getLocalName() != null && child.getLocalName().equals(localName)) {
+                ret.add(child);
+            }
+        }
+        return ret;
+    }
+
+}
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
new file mode 100644
index 0000000..0e3d2d7
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.ner.nltk;
+
+import org.apache.tika.parser.ner.NERecogniser;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Collection;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Properties;
+import java.util.Iterator;
+import java.util.Locale;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+
+import org.apache.cxf.jaxrs.client.WebClient;
+
+/**
+ *  This class offers an implementation of {@link NERecogniser} based on
+ *  ne_chunk() module of NLTK. This NER requires additional setup,
+ *  due to Http requests to an endpoint server that runs NLTK.
+ *  See <a href="http://wiki.apache.org/tika/TikaAndNLTK">
+ *
+ */
+public class NLTKNERecogniser implements NERecogniser {
+
+    private static final Logger LOG = LoggerFactory.getLogger(NLTKNERecogniser.class);
+    private static boolean available = false;
+    private static final String NLTK_REST_HOST = "http://localhost:8881";
+    private String restHostUrlStr;
+     /**
+     * some common entities identified by NLTK
+     */
+    public static final Set<String> ENTITY_TYPES = new HashSet<String>(){{
+        add("NAMES");
+    }};
+
+
+    public NLTKNERecogniser(){
+        try {
+
+            String restHostUrlStr="";
+            try {
+                restHostUrlStr = readRestUrl();
+            } catch (IOException e) {
+                LOG.warn("Can't read rest url", e);
+            }
+
+            if (restHostUrlStr == null || restHostUrlStr.equals("")) {
+                this.restHostUrlStr = NLTK_REST_HOST;
+            } else {
+                this.restHostUrlStr = restHostUrlStr;
+            }
+
+            Response response = WebClient.create(restHostUrlStr).accept(MediaType.TEXT_HTML).get();
+            int responseCode = response.getStatus();
+            if(responseCode == 200){
+                available = true;
+            }
+            else{
+                LOG.info("NLTKRest Server is not running");
+            }
+
+        } catch (Exception e) {
+            LOG.warn(e.getMessage(), e);
+        }
+    }
+
+    private static String readRestUrl() throws IOException {
+        Properties nltkProperties = new Properties();
+        nltkProperties.load(NLTKNERecogniser.class
+                .getResourceAsStream("NLTKServer.properties"));
+
+        return nltkProperties.getProperty("nltk.server.url");
+    }
+
+    /**
+     * @return {@code true} if server endpoint is available.
+     * returns {@code false} if server endpoint is not avaliable for service.
+     */
+    public boolean isAvailable() {
+        return available;
+    }
+
+    /**
+     * Gets set of entity types recognised by this recogniser
+     * @return set of entity classes/types
+     */
+    public Set<String> getEntityTypes() {
+        return ENTITY_TYPES;
+    }
+
+    /**
+     * recognises names of entities in the text
+     * @param text text which possibly contains names
+     * @return map of entity type -&gt; set of names
+     */
+    public Map<String, Set<String>> recognise(String text) {
+        Map<String, Set<String>> entities = new HashMap<>();
+        try {
+            String url = restHostUrlStr + "/nltk";
+            Response response = WebClient.create(url).accept(MediaType.TEXT_HTML).post(text);
+            int responseCode = response.getStatus();
+            if (responseCode == 200) {
+                String result = response.readEntity(String.class);
+                JSONParser parser = new JSONParser();
+                JSONObject j = (JSONObject) parser.parse(result);
+                Iterator<?> keys = j.keySet().iterator();
+                while( keys.hasNext() ) {
+                    String key = (String)keys.next();
+                    if ( !key.equals("result") ) {
+                        ENTITY_TYPES.add(key);
+                        entities.put(key.toUpperCase(Locale.ENGLISH), new HashSet((Collection) j.get(key)));
+                    }
+                }
+            }
+        }
+        catch (Exception e) {
+            LOG.debug(e.getMessage(), e);
+        }
+
+        return entities;
+    }
+
+
+}
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser b/tika-parsers-advanced/tika-parser-nlp-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
new file mode 100644
index 0000000..cea7492
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
@@ -0,0 +1,18 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.tika.parser.geo.GeoParser
+org.apache.tika.parser.journal.JournalParser
+org.apache.tika.parser.sentiment.SentimentAnalysisParser
\ No newline at end of file
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/main/resources/org/apache/tika/parser/ner/grobid/GrobidServer.properties b/tika-parsers-advanced/tika-parser-nlp-module/src/main/resources/org/apache/tika/parser/ner/grobid/GrobidServer.properties
new file mode 100644
index 0000000..a7718ab
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/main/resources/org/apache/tika/parser/ner/grobid/GrobidServer.properties
@@ -0,0 +1,17 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+grobid.server.url=http://localhost:8080
+grobid.endpoint.text=/processQuantityText
\ No newline at end of file
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/journal/TEITest.java b/tika-parsers-advanced/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/journal/TEITest.java
new file mode 100644
index 0000000..2b82af9
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/journal/TEITest.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.journal;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.junit.Test;
+
+public class TEITest extends TikaTest {
+
+
+    @Test
+    public void testBasic() throws Exception {
+        TEIDOMParser teiParser = new TEIDOMParser();
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        try (InputStream is = getResourceAsStream("/test-documents/testTEI.xml")) {
+            IOUtils.copy(is, bos);
+        }
+        String xml = new String (bos.toByteArray(), StandardCharsets.UTF_8);
+        Metadata metadata = teiParser.parse(xml, new ParseContext());
+        assertEquals("Montbonnot Saint-Martin, Montbonnot Saint-Martin, Montbonnot Saint-Martin, " +
+                "Montbonnot Saint-Martin, null 38330, 38330, 38330, 38330 " +
+                "France, France, France, France ", metadata.get("Address"));
+        String[] keywords = new String[]{
+                "F22 [Analysis of Algorithms and Problem Complexity]: Nonnumerical Algorithms and Problems\u2014Sequencing",
+                "and scheduling; D41 [Operating Systems]: Process management\u2014Scheduling, Concurrency",
+                "Keywords",
+                "Parallel Computing, Algorithms, Scheduling, Parallel Tasks,",
+                "Moldable Tasks, Bi-criteria"
+        };
+        assertArrayEquals(keywords, metadata.getValues("Keyword"));
+        assertEquals("Pierre-François  Dutot 1 Lionel  Eyraud 1 Grégory  Gr´ 1 Grégory  Mouní 1 Denis  Trystram 1 ",
+                metadata.get("Authors"));
+        assertEquals("Bi-criteria Algorithm for Scheduling Jobs on Cluster Platforms *",
+                metadata.get("Title"));
+        assertEquals("1 ID-IMAG ID-IMAG ID-IMAG ID-IMAG", metadata.get("Affiliation"));
+        assertEquals("[Affiliation {orgName=ID-IMAG ID-IMAG ID-IMAG ID-IMAG , " +
+                        "address=Montbonnot Saint-Martin, Montbonnot Saint-Martin, Montbonnot Saint-Martin, Montbonnot Saint-Martin, " +
+                        "null 38330, 38330, 38330, 38330 France, France, France, France}" +
+                        "[Affiliation {orgName=ID-IMAG ID-IMAG ID-IMAG ID-IMAG , " +
+                        "address=Montbonnot Saint-Martin, Montbonnot Saint-Martin, Montbonnot Saint-Martin, Montbonnot Saint-Martin, " +
+                        "null 38330, 38330, 38330, 38330 France, France, France, France}]",
+                metadata.get("FullAffiliations"));
+    }
+}
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/ner/NamedEntityParserTest.java b/tika-parsers-advanced/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/ner/NamedEntityParserTest.java
new file mode 100644
index 0000000..6e17415
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/ner/NamedEntityParserTest.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.ner;
+
+import org.apache.tika.Tika;
+import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ner.opennlp.OpenNLPNERecogniser;
+import org.apache.tika.parser.ner.regex.RegexNERecogniser;
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.HashSet;
+
+import static org.junit.Assume.assumeTrue;
+
+/**
+ *Test case for {@link NamedEntityParser}
+ */
+public class NamedEntityParserTest extends TikaTest {
+
+    public static final String CONFIG_FILE = "tika-config.xml";
+
+    @Test
+    public void testParse() throws Exception {
+
+        //test config is added to resources directory
+        TikaConfig config = new TikaConfig(getClass().getResourceAsStream(CONFIG_FILE));
+        Tika tika = new Tika(config);
+        String text = "I am student at University of Southern California (USC)," +
+                " located in Los Angeles . USC's football team is called by name Trojans." +
+                " Mr. John McKay was a head coach of the team from 1960 - 1975";
+        Metadata md = new Metadata();
+        tika.parse(new ByteArrayInputStream(text.getBytes(Charset.defaultCharset())), md);
+
+        HashSet<String> set = new HashSet<String>();
+        set.addAll(Arrays.asList(md.getValues("X-Parsed-By")));
+        assumeTrue(set.contains(NamedEntityParser.class.getName()));
+
+        set.clear();
+        set.addAll(Arrays.asList(md.getValues("NER_PERSON")));
+        assumeTrue(set.contains("John McKay"));
+
+        set.clear();
+        set.addAll(Arrays.asList(md.getValues("NER_LOCATION")));
+        assumeTrue(set.contains("Los Angeles"));
+
+        set.clear();
+        set.addAll(Arrays.asList(md.getValues("NER_ORGANIZATION")));
+        assumeTrue(set.contains("University of Southern California"));
+
+        set.clear();
+        set.addAll(Arrays.asList(md.getValues("NER_DATE")));
+        assumeTrue(set.contains("1960 - 1975"));
+
+    }
+
+    @Test
+    public void testNerChain() throws Exception {
+        String classNames = OpenNLPNERecogniser.class.getName()
+                + "," + RegexNERecogniser.class.getName();
+        System.setProperty(NamedEntityParser.SYS_PROP_NER_IMPL, classNames);
+        TikaConfig config = new TikaConfig(getClass().getResourceAsStream(CONFIG_FILE));
+        Tika tika = new Tika(config);
+        String text = "University of Southern California (USC), is located in Los Angeles ." +
+                " Campus is busy from monday to saturday";
+        Metadata md = new Metadata();
+        tika.parse(new ByteArrayInputStream(text.getBytes(Charset.defaultCharset())), md);
+        HashSet<String> keys = new HashSet<>(Arrays.asList(md.names()));
+        assumeTrue(keys.contains("NER_WEEK_DAY"));
+        assumeTrue(keys.contains("NER_LOCATION"));
+
+    }
+}
\ No newline at end of file
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java b/tika-parsers-advanced/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
new file mode 100644
index 0000000..4b0101e
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright owlocationNameEntitieship.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.ner.nltk;
+
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ner.NamedEntityParser;
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.junit.Assert.assertTrue;
+
+public class NLTKNERecogniserTest {
+    @Test
+    public void testGetEntityTypes() throws Exception {
+        String text = "America is a big country.";
+        System.setProperty(NamedEntityParser.SYS_PROP_NER_IMPL, NLTKNERecogniser.class.getName());
+
+        Tika tika = new Tika(new TikaConfig(NamedEntityParser.class.getResourceAsStream("tika-config.xml")));
+        Metadata md = new Metadata();
+        tika.parse(new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)), md);
+
+        Set<String> names = new HashSet<>(Arrays.asList(md.getValues("NER_NAMES")));
+        if(names.size() != 0) {
+            assertTrue(names.contains("America"));
+            assertTrue(names.size() == 1); 
+        }
+    }
+}
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/get-models.sh b/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/get-models.sh
new file mode 100755
index 0000000..c17899e
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/get-models.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+echo "Getting OpenNLP NER models"
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-person.bin" -O ner-person.bin
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-location.bin" -O ner-location.bin
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-organization.bin" -O ner-organization.bin
+
+# Additional 4
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-date.bin" -O ner-date.bin
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-money.bin" -O ner-money.bin
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-time.bin" -O ner-time.bin
+wget "http://opennlp.sourceforge.net/models-1.5/en-ner-percentage.bin" -O ner-percentage.bin
\ No newline at end of file
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/regex/tika-config.xml b/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/regex/tika-config.xml
new file mode 100644
index 0000000..267c399
--- /dev/null
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/regex/tika-config.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+    <parsers>
+        <parser class="org.apache.tika.parser.ner.NamedEntityParser">
+            <mime>text/plain</mime>
+            <mime>text/html</mime>
+            <mime>application/xhtml+xml</mime>
+        </parser>
+    </parsers>
+
+</properties>
\ No newline at end of file