You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/03/07 20:38:50 UTC

[tika] branch branch_1x updated (a9b4b36 -> 33f756f)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git.


    from a9b4b36  TIKA-2590 -- revert listenForAllRecords = false thanks to Grigoriy Alekseev
     new c566cc4  TIKA-2590 update Changes.txt
     new 33f756f  TIKA-2527 -- Various new mimes and typo fixes in tika-mimetypes.xml via Andreas Meier.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 CHANGES.txt                                        |  6 ++
 .../org/apache/tika/mime/tika-mimetypes.xml        | 99 +++++++++++++++++++++-
 2 files changed, 101 insertions(+), 4 deletions(-)

-- 
To stop receiving notification emails like this one, please contact
tallison@apache.org.

[tika] 02/02: TIKA-2527 -- Various new mimes and typo fixes in tika-mimetypes.xml via Andreas Meier.

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 33f756fa4581ae3d1643ea7299121139a5c1bc6d
Author: tballison <ta...@mitre.org>
AuthorDate: Wed Mar 7 15:37:47 2018 -0500

    TIKA-2527 -- Various new mimes and typo fixes in tika-mimetypes.xml via Andreas Meier.
---
 CHANGES.txt                                        |  3 +
 .../org/apache/tika/mime/tika-mimetypes.xml        | 99 +++++++++++++++++++++-
 2 files changed, 98 insertions(+), 4 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 5a26f32..077d894 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,8 @@
 Release 1.18 - ???
 
+   * Various new mimes and typo fixes in tika-mimetypes.xml
+     via Andreas Meier (TIKA-2527).
+
    * Revert to listenForAllRecords=false in ExcelExtractor
      via Grigoriy Alekseev (TIKA-2590)
 
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 56dfd53..fbcb0c5 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -119,6 +119,27 @@
   <mime-type type="application/cnrp+xml"/>
   <mime-type type="application/commonground"/>
   <mime-type type="application/conference-info+xml"/>
+  
+  <mime-type type="application/coreldraw">
+     <alias type="application/x-coreldraw"/>
+     <alias type="application/x-cdr"/>
+     <alias type="application/cdr"/>
+     <alias type="image/x-cdr"/>
+     <alias type="image/cdr"/>
+     <_comment>CorelDraw</_comment>
+     <_comment>cdr: CorelDraw</_comment>
+     <_comment>des: CorelDraw X4 and newer</_comment>
+     <magic priority="60">
+        <match value="RIFF" type="string" offset="0">
+           <match value="CDR" type="string" offset="8" />
+           <match value="cdr" type="string" offset="8" />
+           <match value="DES" type="string" offset="8" />
+           <match value="des" type="string" offset="8" />
+        </match>
+     </magic>
+     <glob pattern="*.cdr"/>
+  </mime-type>
+  
   <mime-type type="application/cpl+xml"/>
   <mime-type type="application/csta+xml"/>
   <mime-type type="application/cstadata+xml"/>
@@ -348,7 +369,7 @@
     <alias type="application/mac-binhex"/>
     <alias type="application/binhex"/>
     <magic priority="50">
-      <match value="must\ be\ converted\ with\ BinHex" type="string" offset="11"/>
+      <match value="must be converted with BinHex" type="string" offset="11"/>
     </magic>
     <glob pattern="*.hqx"/>
   </mime-type>
@@ -840,6 +861,8 @@
   <mime-type type="application/smil+xml">
     <alias type="application/smil"/>
     <_comment>SMIL Multimedia</_comment>
+    <root-XML localName="smil"/>
+    <sub-class-of type="application/xml"/>
     <glob pattern="*.smi"/>
     <glob pattern="*.smil"/>
     <glob pattern="*.sml"/>
@@ -1391,10 +1414,22 @@
   <mime-type type="application/vnd.intu.qfx">
     <glob pattern="*.qfx"/>
   </mime-type>
+  <mime-type type="application/vnd.iptc.g2.catalogitem+xml"/>
   <mime-type type="application/vnd.iptc.g2.conceptitem+xml"/>
   <mime-type type="application/vnd.iptc.g2.knowledgeitem+xml"/>
   <mime-type type="application/vnd.iptc.g2.newsitem+xml"/>
+    
+  <mime-type type="application/vnd.iptc.g2.newsmessage+xml">
+    <root-XML localName="newsMessage"/>
+    <root-XML localName="newsMessage" namespaceURI="http://iptc.org/std/nar/2006-10-01/"/>
+    <sub-class-of type="application/xml"/>
+    <_comment>XML syntax for IPTC NewsMessages</_comment>
+    <glob pattern="*.nar"/>
+  </mime-type>
+  
   <mime-type type="application/vnd.iptc.g2.packageitem+xml"/>
+  <mime-type type="application/vnd.iptc.g2.planningitem+xml"/>
+
   <mime-type type="application/vnd.ipunplugged.rcprofile">
     <glob pattern="*.rcprofile"/>
   </mime-type>
@@ -2775,6 +2810,30 @@
   <mime-type type="application/wspolicy+xml">
     <glob pattern="*.wspolicy"/>
   </mime-type>
+  
+  <mime-type type="image/x-tga">
+     <alias type="image/x-targa"/>
+     <!-- trailer bytes: 54 52 55 45 56 49 53 49 4F 4E 2D 58 46 49 4C 45 2E 00
+          trailer as string: TRUEVISION-XFILE\\x2E\\x00
+          Some .tga files may be conflicting with application/x-123 recognition, 
+          therefore this mime-type must be set in front of application/x-123 -->
+     <_comment>Targa image data</_comment>
+     <magic priority="90">
+        <match value="0x01010000" type="big32" offset="1" >
+           <match value=".*[\\x54\\x52\\x55\\x45\\x56\\x49\\x53\\x49\\x4F\\x4E\\x2D\\x58\\x46\\x49\\x4C\\x45\\x2E\\x00]" type="regex" offset="8" />
+        </match>
+        <match value="0x00020000" type="big32" offset="1" >
+           <match value=".*[\\x54\\x52\\x55\\x45\\x56\\x49\\x53\\x49\\x4F\\x4E\\x2D\\x58\\x46\\x49\\x4C\\x45\\x2E\\x00]" type="regex" offset="8" />
+        </match>
+        <match value="0x00030000" type="big32" offset="1" >
+           <match value=".*[\\x54\\x52\\x55\\x45\\x56\\x49\\x53\\x49\\x4F\\x4E\\x2D\\x58\\x46\\x49\\x4C\\x45\\x2E\\x00]" type="regex" offset="8" />
+        </match>
+     </magic>
+     <glob pattern="*.tga"/>
+     <glob pattern="*.icb"/>
+     <glob pattern="*.vda"/>
+     <!-- <glob pattern="*.vst"/> --> <!-- conflicting with application/vnd.visio-->
+  </mime-type>
 
   <mime-type type="application/x-123">
     <magic priority="50">
@@ -3599,6 +3658,29 @@
       <match value="-lz5-" type="string" offset="2"/>
     </magic>
   </mime-type>
+  
+  <mime-type type="application/x-lz4">
+     <_comment>First match LZ4 Frame</_comment>
+     <_comment>Second match Legacy Frame</_comment>
+     <magic priority="60">
+        <match value="0x184d2204" type="little32" offset="0" />
+        <match value="0x184c2102" type="little32" offset="0" />
+     </magic>
+     <glob pattern="*.lz4"/>
+  </mime-type>
+  
+  <mime-type type="application/x-lzip">
+    <_comment>Lzip (LZMA) compressed archive</_comment>
+    <magic priority="50">
+      <match value="\x4c\x5a\x49\x50" type="string" offset="0"/>
+    </magic>
+    <glob pattern="*.lz"/>
+  </mime-type>
+  
+  <mime-type type="application/x-lzma">
+    <_comment>LZMA compressed archive</_comment>
+    <glob pattern="*.lzma"/>
+  </mime-type>  
 
   <mime-type type="application/x-mobipocket-ebook">
     <acronym>MOBI</acronym>
@@ -4010,7 +4092,7 @@
     <acronym>ESRI Shapefiles</acronym>
     <_comment>ESRI Shapefiles</_comment>
     <magic priority="60">
-      <match value="0x0000270a" type="big32" offset="2" />
+      <match value="0x0000270a" type="big32" offset="0" />
     </magic>
     <glob pattern="*.shp"/>
   </mime-type>
@@ -4748,7 +4830,7 @@
     <glob pattern="*.aac"/>
   </mime-type>
 
-  <mime-type type="audio/x-adbcm">
+  <mime-type type="audio/x-adpcm">
     <magic priority="20">
       <match value=".snd" type="string" offset="0">
         <match value="23" type="big32" offset="12"/>
@@ -4774,6 +4856,15 @@
     <glob pattern="*.aiff"/>
     <glob pattern="*.aifc"/>
   </mime-type>
+  
+  <mime-type type="audio/x-caf">
+     <_comment>Core Audio Format</_comment>
+     <_comment>com.apple.coreaudio-format</_comment>
+     <magic priority="60">
+        <match value="caff" type="string" offset="0" />
+     </magic>
+     <glob pattern="*.caf"/>
+  </mime-type>
 
   <mime-type type="audio/x-dec-basic">
     <magic priority="20">
@@ -4789,7 +4880,7 @@
     </magic>
   </mime-type>
 
-  <mime-type type="audio/x-dec-adbcm">
+  <mime-type type="audio/x-dec-adpcm">
     <magic priority="20">
       <match value="0x0064732E" type="big32" offset="0">
         <match value="23" type="big32" offset="12"/>

-- 
To stop receiving notification emails like this one, please contact
tallison@apache.org.

[tika] 01/02: TIKA-2590 update Changes.txt

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit c566cc472a4c9daf1e99fb80de9df2390b342350
Author: tballison <ta...@mitre.org>
AuthorDate: Wed Mar 7 15:09:03 2018 -0500

    TIKA-2590 update Changes.txt
---
 CHANGES.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGES.txt b/CHANGES.txt
index 9b05d80..5a26f32 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,8 @@
 Release 1.18 - ???
 
+   * Revert to listenForAllRecords=false in ExcelExtractor
+     via Grigoriy Alekseev (TIKA-2590)
+
    * Add workaround to identify TIFFs that might confuse
      commons-compress's tar detection via Daniel Schmidt
      (TIKA-2591)

-- 
To stop receiving notification emails like this one, please contact
tallison@apache.org.