You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/03/28 16:46:29 UTC

[tika] 02/03: TIKA-2616 -- preserve message/news

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 1cd565c1296e815b2f8f052556f9437920181428
Author: tballison <ta...@mitre.org>
AuthorDate: Wed Mar 28 11:46:31 2018 -0400

    TIKA-2616 -- preserve message/news
---
 .../src/main/resources/org/apache/tika/mime/tika-mimetypes.xml |  5 ++++-
 .../src/test/java/org/apache/tika/mime/TestMimeTypes.java      |  6 +++++-
 .../src/test/resources/test-documents/testMessageNews.txt      | 10 ++++++++++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 8e131f9..346eb73 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -5723,7 +5723,6 @@
       <match value="\nReturn-Path:" type="stringignorecase" offset="0:1000"/>
       <match value="\nX-Originating-IP:" type="stringignorecase" offset="0:1000"/>
       <match value="\nReceived:" type="stringignorecase" offset="0:1000"/>
-      <match value="\nMessage-ID:" type="stringignorecase" offset="0:1000"/>
       <match value="Date:" type="string" offset="0"/>
       <match value="User-Agent:" type="string" offset="0"/>
       <match value="MIME-Version:" type="stringignorecase" offset="0"/>
@@ -5746,6 +5745,10 @@
         <match value="\nMIME-Version:" type="stringignorecase" offset="0:8192"/>
       </match>
     </magic>
+    <magic priority="40">
+      <!-- lower priority than message/news -->
+      <match value="\nMessage-ID:" type="stringignorecase" offset="0:1000"/>
+    </magic>
     <glob pattern="*.eml"/>
     <glob pattern="*.mime"/>
     <glob pattern="*.mht"/>
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 0e43c25..e1f9b70 100644
--- a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -912,7 +912,11 @@ public class TestMimeTypes {
         assertTypeDetection("testEML_embedded_xhtml_and_img.eml", "message/rfc822");
 
     }
-    
+
+    @Test
+    public void testMessageNews() throws Exception {
+        assertTypeByData("message/news", "testMessageNews.txt");
+    }
     @Test
     public void testAxCrypt() throws Exception {
         // test-TXT.txt encrypted with a key of "tika"
diff --git a/tika-parsers/src/test/resources/test-documents/testMessageNews.txt b/tika-parsers/src/test/resources/test-documents/testMessageNews.txt
new file mode 100644
index 0000000..11e8eff
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testMessageNews.txt
@@ -0,0 +1,10 @@
+Path: moe.ksu.ksu.edu!zaphod.mps.ohio-state.edu!mips!mips!public!btr.btr.com!mcmelmon
+From: xyz@qrs
+Newsgroups: alt.startrek.creative
+Subject: something or other
+Message-ID: <62...@public.BTR.COM>
+Date: 15 Apr 92 16:46:49 GMT
+Distribution: na
+Lines: 475
+
+Scene IV
\ No newline at end of file

-- 
To stop receiving notification emails like this one, please contact
tallison@apache.org.