You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/03/28 16:46:29 UTC
[tika] 02/03: TIKA-2616 -- preserve message/news
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 1cd565c1296e815b2f8f052556f9437920181428
Author: tballison <ta...@mitre.org>
AuthorDate: Wed Mar 28 11:46:31 2018 -0400
TIKA-2616 -- preserve message/news
---
.../src/main/resources/org/apache/tika/mime/tika-mimetypes.xml | 5 ++++-
.../src/test/java/org/apache/tika/mime/TestMimeTypes.java | 6 +++++-
.../src/test/resources/test-documents/testMessageNews.txt | 10 ++++++++++
3 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 8e131f9..346eb73 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -5723,7 +5723,6 @@
<match value="\nReturn-Path:" type="stringignorecase" offset="0:1000"/>
<match value="\nX-Originating-IP:" type="stringignorecase" offset="0:1000"/>
<match value="\nReceived:" type="stringignorecase" offset="0:1000"/>
- <match value="\nMessage-ID:" type="stringignorecase" offset="0:1000"/>
<match value="Date:" type="string" offset="0"/>
<match value="User-Agent:" type="string" offset="0"/>
<match value="MIME-Version:" type="stringignorecase" offset="0"/>
@@ -5746,6 +5745,10 @@
<match value="\nMIME-Version:" type="stringignorecase" offset="0:8192"/>
</match>
</magic>
+ <magic priority="40">
+ <!-- lower priority than message/news -->
+ <match value="\nMessage-ID:" type="stringignorecase" offset="0:1000"/>
+ </magic>
<glob pattern="*.eml"/>
<glob pattern="*.mime"/>
<glob pattern="*.mht"/>
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 0e43c25..e1f9b70 100644
--- a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -912,7 +912,11 @@ public class TestMimeTypes {
assertTypeDetection("testEML_embedded_xhtml_and_img.eml", "message/rfc822");
}
-
+
+ @Test
+ public void testMessageNews() throws Exception {
+ assertTypeByData("message/news", "testMessageNews.txt");
+ }
@Test
public void testAxCrypt() throws Exception {
// test-TXT.txt encrypted with a key of "tika"
diff --git a/tika-parsers/src/test/resources/test-documents/testMessageNews.txt b/tika-parsers/src/test/resources/test-documents/testMessageNews.txt
new file mode 100644
index 0000000..11e8eff
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testMessageNews.txt
@@ -0,0 +1,10 @@
+Path: moe.ksu.ksu.edu!zaphod.mps.ohio-state.edu!mips!mips!public!btr.btr.com!mcmelmon
+From: xyz@qrs
+Newsgroups: alt.startrek.creative
+Subject: something or other
+Message-ID: <62...@public.BTR.COM>
+Date: 15 Apr 92 16:46:49 GMT
+Distribution: na
+Lines: 475
+
+Scene IV
\ No newline at end of file
--
To stop receiving notification emails like this one, please contact
tallison@apache.org.