You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2023/03/31 17:38:40 UTC

[tika] branch main updated: TIKA-3991 -- Add detection for canon raw: crw, cr2 and cr3 (#1033)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 9c6b7f76d TIKA-3991 -- Add detection for canon raw: crw, cr2 and cr3 (#1033)
9c6b7f76d is described below

commit 9c6b7f76d83653628203d62c378b840cf5e0be90
Author: Tim Allison <ta...@apache.org>
AuthorDate: Fri Mar 31 13:38:32 2023 -0400

    TIKA-3991 -- Add detection for canon raw: crw, cr2 and cr3 (#1033)
    
    * TIKA-3991 -- add detection for cannon raw crw, cr2 and cr3
---
 CHANGES.txt                                        |  2 ++
 .../org/apache/tika/mime/tika-mimetypes.xml        | 35 ++++++++++++++++++++++
 .../java/org/apache/tika/mime/TestMimeTypes.java   |  3 +-
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 818e935c3..eeab24304 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
 Release 2.7.1 - ???
 
+   * Add magic detection for canon raw file types: crw, cr2 and cr3 (TIKA-3991).
+
    * Add detection and a parser for ActiveMime files (TIKA-3987).
 
    * Users may now avoid the ZeroByteFileException via a
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 175a07a49..a877a2e11 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -6275,11 +6275,46 @@
   </mime-type>
 
   <mime-type type="image/x-raw-canon">
+    <!-- should we move this to: image/x-canon-crw ? -->
     <_comment>Canon raw image</_comment>
+    <magic priority="50">
+      <match value="\x49\x49\x1a\x00\x00\x00HEAPCCDR" type="string" offset="0"/>
+    </magic>
     <glob pattern="*.crw"/>
+  </mime-type>
+
+  <mime-type type="image/x-canon-cr2">
+    <_comment>Canon raw image, version 2, TIFF-based</_comment>
+    <!-- basically tiff header with 'CR' at offset 8; major version is at offset 9, minor at 10/.
+     priority must be higher than tiff -->
+    <magic priority="60">
+      <!-- MM.* = Big endian (M=Motorola) and 0x002a in big endian    -->
+      <match value="MM\x00\x2a" type="string" offset="0">
+        <match value="CR" type="string" offset="8"/>
+      </match>
+      <!-- II*. = Little endian (I=Intel) and 0x002a in little endian -->
+      <match value="II\x2a\x00" type="string" offset="0">
+        <match value="CR" type="string" offset="8"/>
+      </match>
+      <!-- MM.+ = Big endian (M=Motorola) and 0x002a in big endian-->
+      <match value="MM\x00\x2b" type="string" offset="0">
+        <match value="CR" type="string" offset="8"/>
+      </match>
+    </magic>
+    <sub-class-of type="image/tiff" />
     <glob pattern="*.cr2"/>
   </mime-type>
 
+  <mime-type type="image/x-canon-cr3">
+    <_comment>Canon raw image, version 3, Quicktime-based</_comment>
+    <glob pattern="*.cr3"/>
+    <!-- needs to be higher than quicktime -->
+    <magic priority="60">
+      <match value="ftypcrx " type="string" offset="4"/>
+    </magic>
+    <sub-class-of type="video/quicktime" />
+  </mime-type>
+
   <mime-type type="image/x-raw-kodak">
     <_comment>Kodak raw image</_comment>
     <glob pattern="*.k25"/>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 0fa73894c..8a33c4fe6 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -833,7 +833,8 @@ public class TestMimeTypes {
         assertTypeByName("image/x-raw-hasselblad", "x.3fr");
         assertTypeByName("image/x-raw-fuji", "x.raf");
         assertTypeByName("image/x-raw-canon", "x.crw");
-        assertTypeByName("image/x-raw-canon", "x.cr2");
+        assertTypeByName("image/x-canon-cr2", "x.cr2");
+        assertTypeByName("image/x-canon-cr3", "x.cr3");
         assertTypeByName("image/x-raw-kodak", "x.k25");
         assertTypeByName("image/x-raw-kodak", "x.kdc");
         assertTypeByName("image/x-raw-kodak", "x.dcs");