You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/11/18 15:21:27 UTC
[tika] 01/01: TIKA-3308 -- add detection for svg files that lack the xml header
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch TIKA-3308
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 04341240651f12bd63585ef3b4dc78df83630c7c
Author: tallison <ta...@apache.org>
AuthorDate: Fri Nov 18 10:21:11 2022 -0500
TIKA-3308 -- add detection for svg files that lack the xml header
---
CHANGES.txt | 2 ++
.../src/main/resources/org/apache/tika/mime/tika-mimetypes.xml | 6 ++++++
.../src/test/resources/test-documents/testSVG_no_xml_header.svg | 4 ++++
.../src/test/java/org/apache/tika/mime/TestMimeTypes.java | 1 +
4 files changed, 13 insertions(+)
diff --git a/CHANGES.txt b/CHANGES.txt
index 54d14214e..85c7796a6 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
Release 2.6.1 - ???
+ * Add SVG detection for svg files lacking the xml header (TIKA-3308).
+
* Add a JDBCPipesReporter (TIKA-3931).
* Add multivalued field strategy option in jdbc-emitter (TIKA-3930).
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 2baa84d0e..4d1347c93 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -5827,6 +5827,12 @@
<acronym>SVG</acronym>
<_comment>Scalable Vector Graphics</_comment>
<root-XML localName="svg" namespaceURI="http://www.w3.org/2000/svg"/>
+ <magic priority="50">
+ <!-- Version of 0x0001 is PSD -->
+ <match value="<svg" type="string" offset="0">
+ <match value="http://www.w3.org/2000/svg" type="string" offset="5:256"/>
+ </match>
+ </magic>
<glob pattern="*.svg"/>
<glob pattern="*.svgz"/>
</mime-type>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/test-documents/testSVG_no_xml_header.svg b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/test-documents/testSVG_no_xml_header.svg
new file mode 100644
index 000000000..0e53461be
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/test-documents/testSVG_no_xml_header.svg
@@ -0,0 +1,4 @@
+<svg width="1cm" height="1cm" version="1.1" xmlns="http://www.w3.org/2000/svg">
+ <desc>Test SVG image</desc>
+ <rect x="0.1cm" y="0.1cm" width="0.8cm" height="0.8cm"/>
+</svg>
\ No newline at end of file
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 1f475f2bd..d14c5eb9b 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -607,6 +607,7 @@ public class TestMimeTypes {
assertTypeByData("image/svg+xml", "testSVG.svg");
assertTypeByName("image/svg+xml", "x.svg");
assertTypeByName("image/svg+xml", "x.SVG");
+ assertTypeByData("image/svg+xml", "testSVG_no_xml_header.svg");
// Should *.svgz be svg or gzip
assertType("application/gzip", "testSVG.svgz");