You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/08/09 14:37:54 UTC

[tika] branch main updated: TIKA-3833 -- bump priority for bz2 slightly

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 67b2c368f TIKA-3833 -- bump priority for bz2 slightly
67b2c368f is described below

commit 67b2c368fe28cc2b2a4e13d5065db6877c26d1be
Author: tballison <ta...@apache.org>
AuthorDate: Tue Aug 9 10:37:43 2022 -0400

    TIKA-3833 -- bump priority for bz2 slightly
---
 .../main/resources/org/apache/tika/mime/tika-mimetypes.xml |   3 ++-
 .../src/test/resources/test-documents/test-bz2.txt.bz2     | Bin 0 -> 56 bytes
 .../src/test/java/org/apache/tika/mime/TestMimeTypes.java  |   3 +++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 9b24ae3f4..babd241db 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -3515,7 +3515,8 @@
   <mime-type type="application/x-bzip2">
     <sub-class-of type="application/x-bzip"/>
     <_comment>Bzip 2 UNIX Compressed File</_comment>
-    <magic priority="40">
+    <!-- slightly higher than bzip because slightly longer -->
+    <magic priority="41">
       <match value="\x42\x5a\x68\x39\x31" type="string" offset="0"/>
     </magic>
     <glob pattern="*.bz2"/>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/test-documents/test-bz2.txt.bz2 b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/test-documents/test-bz2.txt.bz2
new file mode 100644
index 000000000..2df50c499
Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/test-documents/test-bz2.txt.bz2 differ
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index ea2ecbeff..06a986fda 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -384,6 +384,9 @@ public class TestMimeTypes {
         assertTypeByName("application/x-cpio", "test.cpio");
         assertTypeByName("application/vnd.ms-cab-compressed", "test.cab");
 
+        assertTypeByNameAndData("application/x-bzip2", "test-bz2.txt.bz2");
+        assertTypeByName("application/x-bzip2", "test-bz2.txt.bz2");
+        assertTypeByData("application/x-bzip2", "test-bz2.txt.bz2");
         // TODO Add an example .deb and .udeb, then check these
 
         // Check the mime magic patterns for them work too