You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2013/02/13 15:47:20 UTC
svn commit: r1445632 - in /tika/trunk/tika-core/src:
main/resources/org/apache/tika/mime/tika-mimetypes.xml
test/java/org/apache/tika/mime/MimeTypesReaderTest.java
Author: nick
Date: Wed Feb 13 14:47:20 2013
New Revision: 1445632
URL: http://svn.apache.org/r1445632
Log:
Patch from Ryan McKinley from TIKA-1083 - Add Link and UTI information for a number of common mimetypes
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1445632&r1=1445631&r2=1445632&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Wed Feb 13 14:47:20 2013
@@ -171,6 +171,9 @@
<mime-type type="application/iges"/>
<mime-type type="application/illustrator">
+ <acronym>AI</acronym>
+ <_comment>Adobe Illustrator Artwork</_comment>
+ <tika:link>http://en.wikipedia.org/wiki/Adobe_Illustrator_Artwork</tika:link>
<glob pattern="*.ai"/>]
<sub-class-of type="application/postscript"/>
</mime-type>
@@ -186,6 +189,9 @@
<mime-type type="application/isup"/>
<mime-type type="application/java-archive">
+ <_comment>Java Archive</_comment>
+ <tika:link>http://en.wikipedia.org/wiki/.jar</tika:link>
+ <tika:uti>com.sun.java-archive</tika:uti>
<sub-class-of type="application/zip"/>
<glob pattern="*.jar"/>
</mime-type>
@@ -296,6 +302,8 @@
<!-- Use DefaultDetector / org.apache.tika.parser.microsoft.POIFSContainerDetector for more reliable detection of OLE2 documents -->
<alias type="application/vnd.ms-word"/>
<_comment>Microsoft Word Document</_comment>
+ <tika:link>http://en.wikipedia.org/wiki/.doc</tika:link>
+ <tika:uti>com.microsoft.word.doc</tika:uti>
<magic priority="50">
<match value="Microsoft\ Word\ 6.0\ Document" type="string" offset="2080"/>
<match value="Documento\ Microsoft\ Word\ 6" type="string" offset="2080"/>
@@ -383,6 +391,9 @@
<alias type="application/x-pdf"/>
<acronym>PDF</acronym>
<_comment>Portable Document Format</_comment>
+ <tika:link>http://en.wikipedia.org/wiki/PDF</tika:link>
+ <tika:link>http://www.adobe.com/devnet/pdf/pdf_reference_archive.html</tika:link>
+ <tika:uti>com.adobe.pdf</tika:uti>
<magic priority="50">
<match value="%PDF-" type="string" offset="0"/>
</magic>
@@ -3291,6 +3302,10 @@
<mime-type type="application/xhtml-voice+xml"/>
<mime-type type="application/xml">
+ <acronym>XML</acronym>
+ <_comment>Extensible Markup Language</_comment>
+ <tika:link>http://en.wikipedia.org/wiki/Xml</tika:link>
+ <tika:uti>public.xml</tika:uti>
<alias type="text/xml"/>
<magic priority="50">
<match value="<?xml" type="string" offset="0"/>
@@ -3346,6 +3361,8 @@
<mime-type type="application/zip">
<_comment>Compressed Archive File</_comment>
+ <tika:link>http://en.wikipedia.org/wiki/ZIP_(file_format)</tika:link>
+ <tika:uti>com.pkware.zip-archive</tika:uti>
<alias type="application/x-zip-compressed"/>
<magic priority="40">
<match value="PK\003\004" type="string" offset="0"/>
@@ -3801,6 +3818,8 @@
<mime-type type="image/gif">
<acronym>GIF</acronym>
<_comment>Graphics Interchange Format</_comment>
+ <tika:link>http://en.wikipedia.org/wiki/Gif</tika:link>
+ <tika:uti>com.compuserve.gif</tika:uti>
<magic priority="50">
<match value="GIF87a" type="string" offset="0"/>
<match value="GIF89a" type="string" offset="0"/>
@@ -3827,6 +3846,8 @@
<mime-type type="image/jpeg">
<acronym>JPEG</acronym>
<_comment>Joint Photographic Experts Group</_comment>
+ <tika:link>http://en.wikipedia.org/wiki/Jpeg</tika:link>
+ <tika:uti>public.jpeg</tika:uti>
<magic priority="50">
<!-- FFD8 is the SOI (Start Of Image) marker. -->
<!-- It is followed by another marker that starts with FF. -->
@@ -3947,6 +3968,7 @@
<mime-type type="image/vnd.dwg">
<acronym>DWG</acronym>
<_comment>AutoCad Drawing</_comment>
+ <tika:link>http://en.wikipedia.org/wiki/.dwg</tika:link>
<alias type="image/x-dwg"/>
<alias type="application/acad"/>
<alias type="application/x-acad"/>
@@ -3954,6 +3976,8 @@
<alias type="application/dwg"/>
<alias type="application/x-dwg"/>
<alias type="application/x-autocad"/>
+ <alias type="image/vnd.dwg"/>
+ <alias type="drawing/dwg"/>
<glob pattern="*.dwg"/>
<magic priority="50">
<match value="MC0.0" type="string" offset="0"/>
@@ -3970,6 +3994,9 @@
</mime-type>
<mime-type type="image/vnd.dxf">
+ <acronym>DXF</acronym>
+ <_comment>AutoCAD DXF</_comment>
+ <tika:link>http://en.wikipedia.org/wiki/AutoCAD_DXF</tika:link>
<glob pattern="*.dxf"/>
</mime-type>
<mime-type type="image/vnd.fastbidsheet">
@@ -4434,6 +4461,9 @@
<mime-type type="text/example"/>
<mime-type type="text/html">
+ <_comment>HyperText Markup Language</_comment>
+ <acronym>HTML</acronym>
+ <tika:uti>public.html</tika:uti>
<!-- TIKA-327: if you encounter tags in the HTML
with no declared namespace, it's not XHTML, it's just
bad HTML, unfortunately.
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java?rev=1445632&r1=1445631&r2=1445632&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java Wed Feb 13 14:47:20 2013
@@ -122,11 +122,17 @@ public class MimeTypesReaderTest extends
* @since TIKA-1012
*/
public void testReadExtendedMetadata() throws Exception {
- MimeType bmp = this.mimeTypes.forName("image/x-ms-bmp");
- assertEquals("BMP", bmp.getAcronym());
- assertEquals("com.microsoft.bmp", bmp.getUniformTypeIdentifier());
+ MimeType mime = this.mimeTypes.forName("image/x-ms-bmp");
+ assertEquals("BMP", mime.getAcronym());
+ assertEquals("com.microsoft.bmp", mime.getUniformTypeIdentifier());
assertEquals("http://en.wikipedia.org/wiki/BMP_file_format",
- bmp.getLinks().get(0).toString());
+ mime.getLinks().get(0).toString());
+
+ mime = this.mimeTypes.forName("application/xml");
+ assertEquals("XML", mime.getAcronym());
+ assertEquals("public.xml", mime.getUniformTypeIdentifier());
+ assertEquals("http://en.wikipedia.org/wiki/Xml",
+ mime.getLinks().get(0).toString());
}
/**