You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2016/03/02 06:42:16 UTC
[07/20] tika git commit: nltk modification
nltk modification
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/1b14b39d
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/1b14b39d
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/1b14b39d
Branch: refs/heads/master
Commit: 1b14b39d3e1b453620b2f7b26a933103a78c958a
Parents: 14ca320
Author: manali <ma...@gmail.com>
Authored: Fri Feb 19 17:37:25 2016 -0800
Committer: manali <ma...@gmail.com>
Committed: Fri Feb 19 17:37:25 2016 -0800
----------------------------------------------------------------------
.../src/main/java/org/apache/tika/mime/MimeType.java | 1 +
.../resources/org/apache/tika/mime/tika-mimetypes.xml | 13 +++++++------
.../tika/parser/ner/nltk/NLTKNERecogniserTest.java | 2 +-
3 files changed, 9 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/1b14b39d/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeType.java b/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
index b4d651e..fc520cf 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
@@ -270,6 +270,7 @@ public final class MimeType implements Comparable<MimeType>, Serializable {
}
}
+
void addMagic(Magic magic) {
if (magic == null) {
return;
http://git-wip-us.apache.org/repos/asf/tika/blob/1b14b39d/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 1d7b42b..52dd67b 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -38,6 +38,12 @@
-->
<mime-info>
+ <mime-type type="application/dicom">
+ <_comment>DICOM medical imaging data</_comment>
+ <magic priority="50">
+ <match value="DICM" type="string" offset="128"/>
+ </magic>
+ </mime-type>
<mime-type type="application/activemessage"/>
<mime-type type="application/andrew-inset">
<glob pattern="*.ez"/>
@@ -112,12 +118,7 @@
<mime-type type="application/dec-dx"/>
<mime-type type="application/dialog-info+xml"/>
- <mime-type type="application/dicom">
- <_comment>DICOM medical imaging data</_comment>
- <magic priority="50">
- <match value="DICM" type="string" offset="128"/>
- </magic>
- </mime-type>
+
<mime-type type="application/dita+xml">
<sub-class-of type="application/xml"/>
http://git-wip-us.apache.org/repos/asf/tika/blob/1b14b39d/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
index 563e836..2861051 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
@@ -25,6 +25,7 @@ import org.junit.Ignore;
import org.junit.Test;
import java.io.ByteArrayInputStream;
+import java.io.File;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashSet;
@@ -37,7 +38,6 @@ public class NLTKNERecogniserTest {
public void testGetEntityTypes() throws Exception {
String text = "America";
System.setProperty(NamedEntityParser.SYS_PROP_NER_IMPL, NLTKNERecogniser.class.getName());
-
Tika tika = new Tika(new TikaConfig(NamedEntityParser.class.getResourceAsStream("tika-config.xml")));
Metadata md = new Metadata();
tika.parse(new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)), md);