You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2016/03/02 06:42:16 UTC

[07/20] tika git commit: nltk modification

nltk modification


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/1b14b39d
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/1b14b39d
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/1b14b39d

Branch: refs/heads/master
Commit: 1b14b39d3e1b453620b2f7b26a933103a78c958a
Parents: 14ca320
Author: manali <ma...@gmail.com>
Authored: Fri Feb 19 17:37:25 2016 -0800
Committer: manali <ma...@gmail.com>
Committed: Fri Feb 19 17:37:25 2016 -0800

----------------------------------------------------------------------
 .../src/main/java/org/apache/tika/mime/MimeType.java   |  1 +
 .../resources/org/apache/tika/mime/tika-mimetypes.xml  | 13 +++++++------
 .../tika/parser/ner/nltk/NLTKNERecogniserTest.java     |  2 +-
 3 files changed, 9 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/1b14b39d/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeType.java b/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
index b4d651e..fc520cf 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
@@ -270,6 +270,7 @@ public final class MimeType implements Comparable<MimeType>, Serializable {
         }
     }
 
+
     void addMagic(Magic magic) {
         if (magic == null) {
             return;

http://git-wip-us.apache.org/repos/asf/tika/blob/1b14b39d/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 1d7b42b..52dd67b 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -38,6 +38,12 @@
 -->
 <mime-info>
 
+  <mime-type type="application/dicom">
+    <_comment>DICOM medical imaging data</_comment>
+    <magic priority="50">
+      <match value="DICM" type="string" offset="128"/>
+    </magic>
+  </mime-type>
   <mime-type type="application/activemessage"/>
   <mime-type type="application/andrew-inset">
     <glob pattern="*.ez"/>
@@ -112,12 +118,7 @@
   <mime-type type="application/dec-dx"/>
   <mime-type type="application/dialog-info+xml"/>
 
-  <mime-type type="application/dicom">
-    <_comment>DICOM medical imaging data</_comment>
-    <magic priority="50">
-      <match value="DICM" type="string" offset="128"/>
-    </magic>
-  </mime-type>
+
 
   <mime-type type="application/dita+xml">
     <sub-class-of type="application/xml"/>

http://git-wip-us.apache.org/repos/asf/tika/blob/1b14b39d/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
index 563e836..2861051 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
@@ -25,6 +25,7 @@ import org.junit.Ignore;
 import org.junit.Test;
 
 import java.io.ByteArrayInputStream;
+import java.io.File;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.HashSet;
@@ -37,7 +38,6 @@ public class NLTKNERecogniserTest {
     public void testGetEntityTypes() throws Exception {
         String text = "America";
         System.setProperty(NamedEntityParser.SYS_PROP_NER_IMPL, NLTKNERecogniser.class.getName());
-
         Tika tika = new Tika(new TikaConfig(NamedEntityParser.class.getResourceAsStream("tika-config.xml")));
         Metadata md = new Metadata();
         tika.parse(new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)), md);