You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2023/03/31 17:38:40 UTC
[tika] branch main updated: TIKA-3991 -- Add detection for canon raw: crw, cr2 and cr3 (#1033)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 9c6b7f76d TIKA-3991 -- Add detection for canon raw: crw, cr2 and cr3 (#1033)
9c6b7f76d is described below
commit 9c6b7f76d83653628203d62c378b840cf5e0be90
Author: Tim Allison <ta...@apache.org>
AuthorDate: Fri Mar 31 13:38:32 2023 -0400
TIKA-3991 -- Add detection for canon raw: crw, cr2 and cr3 (#1033)
* TIKA-3991 -- add detection for cannon raw crw, cr2 and cr3
---
CHANGES.txt | 2 ++
.../org/apache/tika/mime/tika-mimetypes.xml | 35 ++++++++++++++++++++++
.../java/org/apache/tika/mime/TestMimeTypes.java | 3 +-
3 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 818e935c3..eeab24304 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
Release 2.7.1 - ???
+ * Add magic detection for canon raw file types: crw, cr2 and cr3 (TIKA-3991).
+
* Add detection and a parser for ActiveMime files (TIKA-3987).
* Users may now avoid the ZeroByteFileException via a
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 175a07a49..a877a2e11 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -6275,11 +6275,46 @@
</mime-type>
<mime-type type="image/x-raw-canon">
+ <!-- should we move this to: image/x-canon-crw ? -->
<_comment>Canon raw image</_comment>
+ <magic priority="50">
+ <match value="\x49\x49\x1a\x00\x00\x00HEAPCCDR" type="string" offset="0"/>
+ </magic>
<glob pattern="*.crw"/>
+ </mime-type>
+
+ <mime-type type="image/x-canon-cr2">
+ <_comment>Canon raw image, version 2, TIFF-based</_comment>
+ <!-- basically tiff header with 'CR' at offset 8; major version is at offset 9, minor at 10/.
+ priority must be higher than tiff -->
+ <magic priority="60">
+ <!-- MM.* = Big endian (M=Motorola) and 0x002a in big endian -->
+ <match value="MM\x00\x2a" type="string" offset="0">
+ <match value="CR" type="string" offset="8"/>
+ </match>
+ <!-- II*. = Little endian (I=Intel) and 0x002a in little endian -->
+ <match value="II\x2a\x00" type="string" offset="0">
+ <match value="CR" type="string" offset="8"/>
+ </match>
+ <!-- MM.+ = Big endian (M=Motorola) and 0x002a in big endian-->
+ <match value="MM\x00\x2b" type="string" offset="0">
+ <match value="CR" type="string" offset="8"/>
+ </match>
+ </magic>
+ <sub-class-of type="image/tiff" />
<glob pattern="*.cr2"/>
</mime-type>
+ <mime-type type="image/x-canon-cr3">
+ <_comment>Canon raw image, version 3, Quicktime-based</_comment>
+ <glob pattern="*.cr3"/>
+ <!-- needs to be higher than quicktime -->
+ <magic priority="60">
+ <match value="ftypcrx " type="string" offset="4"/>
+ </magic>
+ <sub-class-of type="video/quicktime" />
+ </mime-type>
+
<mime-type type="image/x-raw-kodak">
<_comment>Kodak raw image</_comment>
<glob pattern="*.k25"/>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 0fa73894c..8a33c4fe6 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -833,7 +833,8 @@ public class TestMimeTypes {
assertTypeByName("image/x-raw-hasselblad", "x.3fr");
assertTypeByName("image/x-raw-fuji", "x.raf");
assertTypeByName("image/x-raw-canon", "x.crw");
- assertTypeByName("image/x-raw-canon", "x.cr2");
+ assertTypeByName("image/x-canon-cr2", "x.cr2");
+ assertTypeByName("image/x-canon-cr3", "x.cr3");
assertTypeByName("image/x-raw-kodak", "x.k25");
assertTypeByName("image/x-raw-kodak", "x.kdc");
assertTypeByName("image/x-raw-kodak", "x.dcs");