You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2023/03/22 15:51:52 UTC
[tika] 01/01: TIKA-3991 -- add detection for cannon raw crw, cr2 and cr3
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch TIKA-3991
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 93e256523a7e50e0c9688d39dcdc1e0010c931c5
Author: tallison <ta...@apache.org>
AuthorDate: Wed Mar 22 11:51:32 2023 -0400
TIKA-3991 -- add detection for cannon raw crw, cr2 and cr3
---
CHANGES.txt | 3 ++
.../org/apache/tika/mime/tika-mimetypes.xml | 36 +++++++++++++++++++++-
.../java/org/apache/tika/mime/TestMimeTypes.java | 5 +--
3 files changed, 41 insertions(+), 3 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 818e935c3..e4fe13726 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,8 @@
Release 2.7.1 - ???
+ * Fix 'image/x-raw-canon' to 'image/x-canon-crw' and add detection
+ magic detection for canon raw file types: crw, cr2 and cr3 (TIKA-3991).
+
* Add detection and a parser for ActiveMime files (TIKA-3987).
* Users may now avoid the ZeroByteFileException via a
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index cb1b5d48c..e74de8a48 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -6268,12 +6268,46 @@
<glob pattern="*.raf"/>
</mime-type>
- <mime-type type="image/x-raw-canon">
+ <mime-type type="image/x-canon-crw">
<_comment>Canon raw image</_comment>
+ <magic priority="50">
+ <match value="\x49\x49\x1a\x00\x00\x00HEAPCCDR" type="string" offset="0"/>
+ </magic>
<glob pattern="*.crw"/>
+ </mime-type>
+
+ <mime-type type="image/x-canon-cr2">
+ <_comment>Canon raw image, version 2, TIFF-based</_comment>
+ <!-- basically tiff header with 'CR' at offset 8; major version is at offset 9, minor at 10/.
+ priority must be higher than tiff -->
+ <magic priority="60">
+ <!-- MM.* = Big endian (M=Motorola) and 0x002a in big endian -->
+ <match value="MM\x00\x2a" type="string" offset="0">
+ <match value="CR" type="string" offset="8"/>
+ </match>
+ <!-- II*. = Little endian (I=Intel) and 0x002a in little endian -->
+ <match value="II\x2a\x00" type="string" offset="0">
+ <match value="CR" type="string" offset="8"/>
+ </match>
+ <!-- MM.+ = Big endian (M=Motorola) and 0x002a in big endian-->
+ <match value="MM\x00\x2b" type="string" offset="0">
+ <match value="CR" type="string" offset="8"/>
+ </match>
+ </magic>
+ <sub-class-of type="image/tiff" />
<glob pattern="*.cr2"/>
</mime-type>
+ <mime-type type="image/x-canon-cr3">
+ <_comment>Canon raw image, version 3, Quicktime-based</_comment>
+ <glob pattern="*.cr3"/>
+ <!-- needs to be higher than quicktime -->
+ <magic priority="60">
+ <match value="ftypcrx " type="string" offset="4"/>
+ </magic>
+ <sub-class-of type="video/quicktime" />
+ </mime-type>
+
<mime-type type="image/x-raw-kodak">
<_comment>Kodak raw image</_comment>
<glob pattern="*.k25"/>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index d14c5eb9b..ea9d8d5f8 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -832,8 +832,9 @@ public class TestMimeTypes {
assertTypeByName("image/x-raw-adobe", "x.DNG");
assertTypeByName("image/x-raw-hasselblad", "x.3fr");
assertTypeByName("image/x-raw-fuji", "x.raf");
- assertTypeByName("image/x-raw-canon", "x.crw");
- assertTypeByName("image/x-raw-canon", "x.cr2");
+ assertTypeByName("image/x-canon-crw", "x.crw");
+ assertTypeByName("image/x-canon-cr2", "x.cr2");
+ assertTypeByName("image/x-canon-cr3", "x.cr3");
assertTypeByName("image/x-raw-kodak", "x.k25");
assertTypeByName("image/x-raw-kodak", "x.kdc");
assertTypeByName("image/x-raw-kodak", "x.dcs");