You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by lf...@apache.org on 2022/05/22 21:24:08 UTC
[tika] branch main updated: TIKA-3771: remove eml magic too common causing false positives
This is an automated email from the ASF dual-hosted git repository.
lfcnassif pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new ed1c86a52 TIKA-3771: remove eml magic too common causing false positives
ed1c86a52 is described below
commit ed1c86a52d8e07d0d57decfe82ed73a90fb57c8e
Author: Luis Nassif <lf...@gmail.com>
AuthorDate: Sun May 22 18:15:43 2022 -0300
TIKA-3771: remove eml magic too common causing false positives
---
.../resources/org/apache/tika/mime/tika-mimetypes.xml | 1 -
.../test/java/org/apache/tika/mime/MimeDetectionTest.java | 8 ++++++++
.../resources/org/apache/tika/mime/test-pngNotEml.bin | Bin 0 -> 938 bytes
3 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 43d7820d3..db7359608 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -6457,7 +6457,6 @@
<match value="\nUser-Agent:" type="string" offset="0:1024"/>
<match value="\nX-Mailer:" type="string" offset="0:1024"/>
<match value="\nX-Originating-IP:" type="stringignorecase" offset="0:1024"/>
- <match value="\nX-" type="string" offset="0:1024"/>
<match value="\nDKIM-" type="string" offset="0:1024"/>
<match value="\nARC-" type="string" offset="0:1024"/>
</match>
diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
index 2c1a71ebd..690da4f29 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
@@ -260,4 +260,12 @@ public class MimeDetectionTest {
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "testingTESTINGtesting");
assertEquals(helloXType, MIME_TYPES.detect(new ByteArrayInputStream(helloWorld), metadata));
}
+
+ /**
+ * Test for TIKA-3771.
+ */
+ @Test
+ public void testPNGWithSomeEmlHeaders() throws IOException {
+ testFile("image/png", "test-pngNotEml.bin");
+ }
}
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/test-pngNotEml.bin b/tika-core/src/test/resources/org/apache/tika/mime/test-pngNotEml.bin
new file mode 100644
index 000000000..9fcd031ab
Binary files /dev/null and b/tika-core/src/test/resources/org/apache/tika/mime/test-pngNotEml.bin differ