You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/11/18 16:48:27 UTC
[tika] branch main updated: TIKA-3308 -- add detection for svg files that lack the xml header (#808)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 0145868ab TIKA-3308 -- add detection for svg files that lack the xml header (#808)
0145868ab is described below
commit 0145868ab5c1f2718dc3267e50737d22effb3ce6
Author: Tim Allison <ta...@apache.org>
AuthorDate: Fri Nov 18 11:48:21 2022 -0500
TIKA-3308 -- add detection for svg files that lack the xml header (#808)
---
CHANGES.txt | 2 ++
.../src/main/resources/org/apache/tika/mime/tika-mimetypes.xml | 6 ++++++
.../src/test/resources/test-documents/testSVG_no_xml_header.svg | 4 ++++
.../src/test/java/org/apache/tika/mime/TestMimeTypes.java | 1 +
4 files changed, 13 insertions(+)
diff --git a/CHANGES.txt b/CHANGES.txt
index af854c029..5a355a3b8 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
Release 2.6.1 - ???
+ * Add SVG detection for svg files lacking the xml header (TIKA-3308).
+
* Upgrade to Bouncy Castle 1.71 and jdk18on jars (TIKA-3933).
* Add a JDBCPipesReporter (TIKA-3931).
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 2baa84d0e..4d1347c93 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -5827,6 +5827,12 @@
<acronym>SVG</acronym>
<_comment>Scalable Vector Graphics</_comment>
<root-XML localName="svg" namespaceURI="http://www.w3.org/2000/svg"/>
+ <magic priority="50">
+ <!-- Version of 0x0001 is PSD -->
+ <match value="<svg" type="string" offset="0">
+ <match value="http://www.w3.org/2000/svg" type="string" offset="5:256"/>
+ </match>
+ </magic>
<glob pattern="*.svg"/>
<glob pattern="*.svgz"/>
</mime-type>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/test-documents/testSVG_no_xml_header.svg b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/test-documents/testSVG_no_xml_header.svg
new file mode 100644
index 000000000..0e53461be
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/test-documents/testSVG_no_xml_header.svg
@@ -0,0 +1,4 @@
+<svg width="1cm" height="1cm" version="1.1" xmlns="http://www.w3.org/2000/svg">
+ <desc>Test SVG image</desc>
+ <rect x="0.1cm" y="0.1cm" width="0.8cm" height="0.8cm"/>
+</svg>
\ No newline at end of file
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 1f475f2bd..d14c5eb9b 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -607,6 +607,7 @@ public class TestMimeTypes {
assertTypeByData("image/svg+xml", "testSVG.svg");
assertTypeByName("image/svg+xml", "x.svg");
assertTypeByName("image/svg+xml", "x.SVG");
+ assertTypeByData("image/svg+xml", "testSVG_no_xml_header.svg");
// Should *.svgz be svg or gzip
assertType("application/gzip", "testSVG.svgz");