You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2023/03/22 15:51:52 UTC

[tika] 01/01: TIKA-3991 -- add detection for cannon raw crw, cr2 and cr3

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-3991
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 93e256523a7e50e0c9688d39dcdc1e0010c931c5
Author: tallison <ta...@apache.org>
AuthorDate: Wed Mar 22 11:51:32 2023 -0400

    TIKA-3991 -- add detection for cannon raw crw, cr2 and cr3
---
 CHANGES.txt                                        |  3 ++
 .../org/apache/tika/mime/tika-mimetypes.xml        | 36 +++++++++++++++++++++-
 .../java/org/apache/tika/mime/TestMimeTypes.java   |  5 +--
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 818e935c3..e4fe13726 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,8 @@
 Release 2.7.1 - ???
 
+   * Fix 'image/x-raw-canon' to 'image/x-canon-crw' and add detection
+     magic detection for canon raw file types: crw, cr2 and cr3 (TIKA-3991).
+
    * Add detection and a parser for ActiveMime files (TIKA-3987).
 
    * Users may now avoid the ZeroByteFileException via a
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index cb1b5d48c..e74de8a48 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -6268,12 +6268,46 @@
     <glob pattern="*.raf"/>
   </mime-type>
 
-  <mime-type type="image/x-raw-canon">
+  <mime-type type="image/x-canon-crw">
     <_comment>Canon raw image</_comment>
+    <magic priority="50">
+      <match value="\x49\x49\x1a\x00\x00\x00HEAPCCDR" type="string" offset="0"/>
+    </magic>
     <glob pattern="*.crw"/>
+  </mime-type>
+
+  <mime-type type="image/x-canon-cr2">
+    <_comment>Canon raw image, version 2, TIFF-based</_comment>
+    <!-- basically tiff header with 'CR' at offset 8; major version is at offset 9, minor at 10/.
+     priority must be higher than tiff -->
+    <magic priority="60">
+      <!-- MM.* = Big endian (M=Motorola) and 0x002a in big endian    -->
+      <match value="MM\x00\x2a" type="string" offset="0">
+        <match value="CR" type="string" offset="8"/>
+      </match>
+      <!-- II*. = Little endian (I=Intel) and 0x002a in little endian -->
+      <match value="II\x2a\x00" type="string" offset="0">
+        <match value="CR" type="string" offset="8"/>
+      </match>
+      <!-- MM.+ = Big endian (M=Motorola) and 0x002a in big endian-->
+      <match value="MM\x00\x2b" type="string" offset="0">
+        <match value="CR" type="string" offset="8"/>
+      </match>
+    </magic>
+    <sub-class-of type="image/tiff" />
     <glob pattern="*.cr2"/>
   </mime-type>
 
+  <mime-type type="image/x-canon-cr3">
+    <_comment>Canon raw image, version 3, Quicktime-based</_comment>
+    <glob pattern="*.cr3"/>
+    <!-- needs to be higher than quicktime -->
+    <magic priority="60">
+      <match value="ftypcrx " type="string" offset="4"/>
+    </magic>
+    <sub-class-of type="video/quicktime" />
+  </mime-type>
+
   <mime-type type="image/x-raw-kodak">
     <_comment>Kodak raw image</_comment>
     <glob pattern="*.k25"/>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index d14c5eb9b..ea9d8d5f8 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -832,8 +832,9 @@ public class TestMimeTypes {
         assertTypeByName("image/x-raw-adobe", "x.DNG");
         assertTypeByName("image/x-raw-hasselblad", "x.3fr");
         assertTypeByName("image/x-raw-fuji", "x.raf");
-        assertTypeByName("image/x-raw-canon", "x.crw");
-        assertTypeByName("image/x-raw-canon", "x.cr2");
+        assertTypeByName("image/x-canon-crw", "x.crw");
+        assertTypeByName("image/x-canon-cr2", "x.cr2");
+        assertTypeByName("image/x-canon-cr3", "x.cr3");
         assertTypeByName("image/x-raw-kodak", "x.k25");
         assertTypeByName("image/x-raw-kodak", "x.kdc");
         assertTypeByName("image/x-raw-kodak", "x.dcs");