You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/09/04 21:42:14 UTC

[tika] branch main updated: TIKA-3179 -- rename tika-nlp to tika-age-recogniser and add explanation of parser module hierarchy to overview.html.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new e91be37  TIKA-3179 -- rename tika-nlp to tika-age-recogniser and add explanation of parser module hierarchy to overview.html.
e91be37 is described below

commit e91be37c1c7ce706af4c2dbe23eb24308f134547
Author: tallison <ta...@apache.org>
AuthorDate: Fri Sep 4 17:41:56 2020 -0400

    TIKA-3179 -- rename tika-nlp to tika-age-recogniser and add explanation of parser module hierarchy to overview.html.
---
 overview.html                                      | 36 ++++++++++++++++++++++
 tika-parsers-advanced/pom.xml                      |  2 +-
 .../{tika-nlp => tika-age-recogniser}/pom.xml      |  6 ++--
 .../tika/parser/recognition/AgeRecogniser.java     |  0
 .../parser/recognition/AgeRecogniserConfig.java    |  0
 .../tika/parser/recognition/AgeRecogniserTest.java |  0
 .../tika/parser/recognition/tika-config-age.xml    |  0
 .../tika/parser/ner/opennlp/ModelGetter.groovy     |  2 +-
 8 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/overview.html b/overview.html
new file mode 100644
index 0000000..0132ea2
--- /dev/null
+++ b/overview.html
@@ -0,0 +1,36 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+    <title>Apache Tika</title>
+</head>
+<body>
+
+<h1>Parsers</h1>
+As of Apache Tika 2.0, there are three parser packages.
+<ol>
+    <li>tike-parser-modules: basic parsers -- java only, no native code, no parsers that require rest calls</li>
+    <li>tike-parsers-extended: extended parsers -- allowed: native code and parsers that require rest calls, heavy dependencies</li>
+    <li>tika-parsers-advanced: parsers/recognizers that rely on machine learning and/or natural language processing</li>
+</ol>
+<p>
+The <i>tika-parser-modules</i> modules are packaged as one in <i>tika-parsers</i>.  The tika-app and tika-server jars
+depend on <i>tika-parsers</i>.  If you need an extended-parser or an advanced parser, you're responsible for adding
+those jars to your classpath.
+</p>
+</body>
+</html>
\ No newline at end of file
diff --git a/tika-parsers-advanced/pom.xml b/tika-parsers-advanced/pom.xml
index 637883c..d9320fc 100644
--- a/tika-parsers-advanced/pom.xml
+++ b/tika-parsers-advanced/pom.xml
@@ -36,7 +36,7 @@
         <module>tika-parser-nlp-module</module>
         <!-- needs to come after tika-parser-nlp-module because tika-parser-nlp-module
              downloads model files -->
-        <module>tika-nlp</module>
+        <module>tika-age-recogniser</module>
         <module>tika-parser-advancedmedia-module</module>
         <module>tika-dl</module>
     </modules>
diff --git a/tika-parsers-advanced/tika-nlp/pom.xml b/tika-parsers-advanced/tika-age-recogniser/pom.xml
similarity index 96%
rename from tika-parsers-advanced/tika-nlp/pom.xml
rename to tika-parsers-advanced/tika-age-recogniser/pom.xml
index 6f75b36..d8cc0cd 100644
--- a/tika-parsers-advanced/tika-nlp/pom.xml
+++ b/tika-parsers-advanced/tika-age-recogniser/pom.xml
@@ -25,7 +25,7 @@
     <version>2.0.0-SNAPSHOT</version>
   </parent>
 
-  <artifactId>tika-nlp</artifactId>
+  <artifactId>tika-age-recogniser</artifactId>
   <packaging>jar</packaging>
 
   <name>Apache Tika Natural Language Processing</name>
@@ -280,8 +280,8 @@
                             <copy failonerror="false" file="${basedir}/../tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/en-pos-maxent.bin" todir="${basedir}/model/opennlp/" />
                             <copy failonerror="false" file="${basedir}/../tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/en-sent.bin" todir="${basedir}/model/opennlp/" />
 			                <copy failonerror="false" file="${basedir}/../tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/en-token.bin" todir="${basedir}/model/opennlp/" />
-			                <copy failonerror="false" file="${basedir}/../tika-nlp/src/test/resources/org/apache/tika/parser/recognition/classify-bigram.bin" todir="${basedir}/model/org/apache/tika/parser/recognition/" />
-			                <copy failonerror="false" file="${basedir}/../tika-nlp/src/test/resources/org/apache/tika/parser/recognition/regression-global.bin" todir="${basedir}/model/org/apache/tika/parser/recognition/" />
+			                <copy failonerror="false" file="${basedir}/../tika-age-recogniser/src/test/resources/org/apache/tika/parser/recognition/classify-bigram.bin" todir="${basedir}/model/org/apache/tika/parser/recognition/" />
+			                <copy failonerror="false" file="${basedir}/../tika-age-recogniser/src/test/resources/org/apache/tika/parser/recognition/regression-global.bin" todir="${basedir}/model/org/apache/tika/parser/recognition/" />
                         </target>
                     </configuration>
                 </execution>
diff --git a/tika-parsers-advanced/tika-nlp/src/main/java/org/apache/tika/parser/recognition/AgeRecogniser.java b/tika-parsers-advanced/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniser.java
similarity index 100%
rename from tika-parsers-advanced/tika-nlp/src/main/java/org/apache/tika/parser/recognition/AgeRecogniser.java
rename to tika-parsers-advanced/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniser.java
diff --git a/tika-parsers-advanced/tika-nlp/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java b/tika-parsers-advanced/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java
similarity index 100%
rename from tika-parsers-advanced/tika-nlp/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java
rename to tika-parsers-advanced/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java
diff --git a/tika-parsers-advanced/tika-nlp/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java b/tika-parsers-advanced/tika-age-recogniser/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java
similarity index 100%
rename from tika-parsers-advanced/tika-nlp/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java
rename to tika-parsers-advanced/tika-age-recogniser/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java
diff --git a/tika-parsers-advanced/tika-nlp/src/test/resources/org/apache/tika/parser/recognition/tika-config-age.xml b/tika-parsers-advanced/tika-age-recogniser/src/test/resources/org/apache/tika/parser/recognition/tika-config-age.xml
similarity index 100%
rename from tika-parsers-advanced/tika-nlp/src/test/resources/org/apache/tika/parser/recognition/tika-config-age.xml
rename to tika-parsers-advanced/tika-age-recogniser/src/test/resources/org/apache/tika/parser/recognition/tika-config-age.xml
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy b/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy
index ff4d1a9..57e64e9 100644
--- a/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy
@@ -105,7 +105,7 @@ def agePrefixPath = "src/test/resources/org/apache/tika/parser/recognition/"
 if (new File("tika-parsers").exists() && new File("tika-app").exists()  ) {
     // running from parent maven project, but resources should go to sub-module
     prefixPath = "tika-parsers-advanced/tika-parser-nlp-module/" + prefixPath
-    agePrefixPath = "tika-parsers-advanced/tika-nlp/" + agePrefixPath
+    agePrefixPath = "tika-parsers-advanced/tika-age-recogniser/" + agePrefixPath
 }
 
 def modelFiles = //filePath : url