You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/11/18 15:21:26 UTC

[tika] branch TIKA-3308 created (now 043412406)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-3308
in repository https://gitbox.apache.org/repos/asf/tika.git


      at 043412406 TIKA-3308 -- add detection for svg files that lack the xml header

This branch includes the following new commits:

     new 043412406 TIKA-3308 -- add detection for svg files that lack the xml header

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[tika] 01/01: TIKA-3308 -- add detection for svg files that lack the xml header

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-3308
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 04341240651f12bd63585ef3b4dc78df83630c7c
Author: tallison <ta...@apache.org>
AuthorDate: Fri Nov 18 10:21:11 2022 -0500

    TIKA-3308 -- add detection for svg files that lack the xml header
---
 CHANGES.txt                                                         | 2 ++
 .../src/main/resources/org/apache/tika/mime/tika-mimetypes.xml      | 6 ++++++
 .../src/test/resources/test-documents/testSVG_no_xml_header.svg     | 4 ++++
 .../src/test/java/org/apache/tika/mime/TestMimeTypes.java           | 1 +
 4 files changed, 13 insertions(+)

diff --git a/CHANGES.txt b/CHANGES.txt
index 54d14214e..85c7796a6 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
 Release 2.6.1 - ???
 
+   * Add SVG detection for svg files lacking the xml header (TIKA-3308).
+
    * Add a JDBCPipesReporter (TIKA-3931).
 
    * Add multivalued field strategy option in jdbc-emitter (TIKA-3930).
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 2baa84d0e..4d1347c93 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -5827,6 +5827,12 @@
     <acronym>SVG</acronym>
     <_comment>Scalable Vector Graphics</_comment>
     <root-XML localName="svg" namespaceURI="http://www.w3.org/2000/svg"/>
+    <magic priority="50">
+      <!-- Version of 0x0001 is PSD -->
+      <match value="&lt;svg" type="string" offset="0">
+        <match value="http://www.w3.org/2000/svg" type="string" offset="5:256"/>
+      </match>
+    </magic>
     <glob pattern="*.svg"/>
     <glob pattern="*.svgz"/>
   </mime-type>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/test-documents/testSVG_no_xml_header.svg b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/test-documents/testSVG_no_xml_header.svg
new file mode 100644
index 000000000..0e53461be
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/test-documents/testSVG_no_xml_header.svg
@@ -0,0 +1,4 @@
+<svg width="1cm" height="1cm" version="1.1" xmlns="http://www.w3.org/2000/svg">
+    <desc>Test SVG image</desc>
+    <rect x="0.1cm" y="0.1cm" width="0.8cm" height="0.8cm"/>
+</svg>
\ No newline at end of file
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 1f475f2bd..d14c5eb9b 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -607,6 +607,7 @@ public class TestMimeTypes {
         assertTypeByData("image/svg+xml", "testSVG.svg");
         assertTypeByName("image/svg+xml", "x.svg");
         assertTypeByName("image/svg+xml", "x.SVG");
+        assertTypeByData("image/svg+xml", "testSVG_no_xml_header.svg");
 
         // Should *.svgz be svg or gzip
         assertType("application/gzip", "testSVG.svgz");