You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/09/04 21:42:14 UTC
[tika] branch main updated: TIKA-3179 -- rename tika-nlp to
tika-age-recogniser and add explanation of parser module hierarchy to
overview.html.
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new e91be37 TIKA-3179 -- rename tika-nlp to tika-age-recogniser and add explanation of parser module hierarchy to overview.html.
e91be37 is described below
commit e91be37c1c7ce706af4c2dbe23eb24308f134547
Author: tallison <ta...@apache.org>
AuthorDate: Fri Sep 4 17:41:56 2020 -0400
TIKA-3179 -- rename tika-nlp to tika-age-recogniser and add explanation of parser module hierarchy to overview.html.
---
overview.html | 36 ++++++++++++++++++++++
tika-parsers-advanced/pom.xml | 2 +-
.../{tika-nlp => tika-age-recogniser}/pom.xml | 6 ++--
.../tika/parser/recognition/AgeRecogniser.java | 0
.../parser/recognition/AgeRecogniserConfig.java | 0
.../tika/parser/recognition/AgeRecogniserTest.java | 0
.../tika/parser/recognition/tika-config-age.xml | 0
.../tika/parser/ner/opennlp/ModelGetter.groovy | 2 +-
8 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/overview.html b/overview.html
new file mode 100644
index 0000000..0132ea2
--- /dev/null
+++ b/overview.html
@@ -0,0 +1,36 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+ <title>Apache Tika</title>
+</head>
+<body>
+
+<h1>Parsers</h1>
+As of Apache Tika 2.0, there are three parser packages.
+<ol>
+ <li>tike-parser-modules: basic parsers -- java only, no native code, no parsers that require rest calls</li>
+ <li>tike-parsers-extended: extended parsers -- allowed: native code and parsers that require rest calls, heavy dependencies</li>
+ <li>tika-parsers-advanced: parsers/recognizers that rely on machine learning and/or natural language processing</li>
+</ol>
+<p>
+The <i>tika-parser-modules</i> modules are packaged as one in <i>tika-parsers</i>. The tika-app and tika-server jars
+depend on <i>tika-parsers</i>. If you need an extended-parser or an advanced parser, you're responsible for adding
+those jars to your classpath.
+</p>
+</body>
+</html>
\ No newline at end of file
diff --git a/tika-parsers-advanced/pom.xml b/tika-parsers-advanced/pom.xml
index 637883c..d9320fc 100644
--- a/tika-parsers-advanced/pom.xml
+++ b/tika-parsers-advanced/pom.xml
@@ -36,7 +36,7 @@
<module>tika-parser-nlp-module</module>
<!-- needs to come after tika-parser-nlp-module because tika-parser-nlp-module
downloads model files -->
- <module>tika-nlp</module>
+ <module>tika-age-recogniser</module>
<module>tika-parser-advancedmedia-module</module>
<module>tika-dl</module>
</modules>
diff --git a/tika-parsers-advanced/tika-nlp/pom.xml b/tika-parsers-advanced/tika-age-recogniser/pom.xml
similarity index 96%
rename from tika-parsers-advanced/tika-nlp/pom.xml
rename to tika-parsers-advanced/tika-age-recogniser/pom.xml
index 6f75b36..d8cc0cd 100644
--- a/tika-parsers-advanced/tika-nlp/pom.xml
+++ b/tika-parsers-advanced/tika-age-recogniser/pom.xml
@@ -25,7 +25,7 @@
<version>2.0.0-SNAPSHOT</version>
</parent>
- <artifactId>tika-nlp</artifactId>
+ <artifactId>tika-age-recogniser</artifactId>
<packaging>jar</packaging>
<name>Apache Tika Natural Language Processing</name>
@@ -280,8 +280,8 @@
<copy failonerror="false" file="${basedir}/../tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/en-pos-maxent.bin" todir="${basedir}/model/opennlp/" />
<copy failonerror="false" file="${basedir}/../tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/en-sent.bin" todir="${basedir}/model/opennlp/" />
<copy failonerror="false" file="${basedir}/../tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/en-token.bin" todir="${basedir}/model/opennlp/" />
- <copy failonerror="false" file="${basedir}/../tika-nlp/src/test/resources/org/apache/tika/parser/recognition/classify-bigram.bin" todir="${basedir}/model/org/apache/tika/parser/recognition/" />
- <copy failonerror="false" file="${basedir}/../tika-nlp/src/test/resources/org/apache/tika/parser/recognition/regression-global.bin" todir="${basedir}/model/org/apache/tika/parser/recognition/" />
+ <copy failonerror="false" file="${basedir}/../tika-age-recogniser/src/test/resources/org/apache/tika/parser/recognition/classify-bigram.bin" todir="${basedir}/model/org/apache/tika/parser/recognition/" />
+ <copy failonerror="false" file="${basedir}/../tika-age-recogniser/src/test/resources/org/apache/tika/parser/recognition/regression-global.bin" todir="${basedir}/model/org/apache/tika/parser/recognition/" />
</target>
</configuration>
</execution>
diff --git a/tika-parsers-advanced/tika-nlp/src/main/java/org/apache/tika/parser/recognition/AgeRecogniser.java b/tika-parsers-advanced/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniser.java
similarity index 100%
rename from tika-parsers-advanced/tika-nlp/src/main/java/org/apache/tika/parser/recognition/AgeRecogniser.java
rename to tika-parsers-advanced/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniser.java
diff --git a/tika-parsers-advanced/tika-nlp/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java b/tika-parsers-advanced/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java
similarity index 100%
rename from tika-parsers-advanced/tika-nlp/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java
rename to tika-parsers-advanced/tika-age-recogniser/src/main/java/org/apache/tika/parser/recognition/AgeRecogniserConfig.java
diff --git a/tika-parsers-advanced/tika-nlp/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java b/tika-parsers-advanced/tika-age-recogniser/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java
similarity index 100%
rename from tika-parsers-advanced/tika-nlp/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java
rename to tika-parsers-advanced/tika-age-recogniser/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java
diff --git a/tika-parsers-advanced/tika-nlp/src/test/resources/org/apache/tika/parser/recognition/tika-config-age.xml b/tika-parsers-advanced/tika-age-recogniser/src/test/resources/org/apache/tika/parser/recognition/tika-config-age.xml
similarity index 100%
rename from tika-parsers-advanced/tika-nlp/src/test/resources/org/apache/tika/parser/recognition/tika-config-age.xml
rename to tika-parsers-advanced/tika-age-recogniser/src/test/resources/org/apache/tika/parser/recognition/tika-config-age.xml
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy b/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy
index ff4d1a9..57e64e9 100644
--- a/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy
@@ -105,7 +105,7 @@ def agePrefixPath = "src/test/resources/org/apache/tika/parser/recognition/"
if (new File("tika-parsers").exists() && new File("tika-app").exists() ) {
// running from parent maven project, but resources should go to sub-module
prefixPath = "tika-parsers-advanced/tika-parser-nlp-module/" + prefixPath
- agePrefixPath = "tika-parsers-advanced/tika-nlp/" + agePrefixPath
+ agePrefixPath = "tika-parsers-advanced/tika-age-recogniser/" + agePrefixPath
}
def modelFiles = //filePath : url