You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by lf...@apache.org on 2022/05/22 21:24:08 UTC

[tika] branch main updated: TIKA-3771: remove eml magic too common causing false positives

This is an automated email from the ASF dual-hosted git repository.

lfcnassif pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new ed1c86a52 TIKA-3771: remove eml magic too common causing false positives
ed1c86a52 is described below

commit ed1c86a52d8e07d0d57decfe82ed73a90fb57c8e
Author: Luis Nassif <lf...@gmail.com>
AuthorDate: Sun May 22 18:15:43 2022 -0300

    TIKA-3771: remove eml magic too common causing false positives
---
 .../resources/org/apache/tika/mime/tika-mimetypes.xml     |   1 -
 .../test/java/org/apache/tika/mime/MimeDetectionTest.java |   8 ++++++++
 .../resources/org/apache/tika/mime/test-pngNotEml.bin     | Bin 0 -> 938 bytes
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 43d7820d3..db7359608 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -6457,7 +6457,6 @@
         <match value="\nUser-Agent:" type="string" offset="0:1024"/>
         <match value="\nX-Mailer:" type="string" offset="0:1024"/>
         <match value="\nX-Originating-IP:" type="stringignorecase" offset="0:1024"/>
-        <match value="\nX-" type="string" offset="0:1024"/>
         <match value="\nDKIM-" type="string" offset="0:1024"/>
         <match value="\nARC-" type="string" offset="0:1024"/>        
       </match>
diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
index 2c1a71ebd..690da4f29 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
@@ -260,4 +260,12 @@ public class MimeDetectionTest {
         metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "testingTESTINGtesting");
         assertEquals(helloXType, MIME_TYPES.detect(new ByteArrayInputStream(helloWorld), metadata));
     }
+
+    /**
+     * Test for TIKA-3771.
+     */
+    @Test
+    public void testPNGWithSomeEmlHeaders() throws IOException {
+        testFile("image/png", "test-pngNotEml.bin");
+    }
 }
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test-pngNotEml.bin b/tika-core/src/test/resources/org/apache/tika/mime/test-pngNotEml.bin
new file mode 100644
index 000000000..9fcd031ab
Binary files /dev/null and b/tika-core/src/test/resources/org/apache/tika/mime/test-pngNotEml.bin differ