You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2009/12/01 20:01:58 UTC
svn commit: r885869 - in /lucene/nutch/trunk: CHANGES.txt
conf/tika-mimetypes.xml lib/tika-0.1-incubating.jar lib/tika-core-0.5.jar
src/java/org/apache/nutch/util/MimeUtil.java
Author: ab
Date: Tue Dec 1 19:01:58 2009
New Revision: 885869
URL: http://svn.apache.org/viewvc?rev=885869&view=rev
Log:
NUTCH-767 Update Tika to v0.5 for the MimeType detection.
Added:
lucene/nutch/trunk/lib/tika-core-0.5.jar (with props)
Removed:
lucene/nutch/trunk/lib/tika-0.1-incubating.jar
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/conf/tika-mimetypes.xml
lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=885869&r1=885868&r2=885869&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Tue Dec 1 19:01:58 2009
@@ -2,6 +2,8 @@
Unreleased Changes
+* NUTCH-767 Update Tika to v0.5 for the MimeType detection (Julien Nioche via ab)
+
* NUTCH-769 Fetcher to skip queues for URLS getting repeated exceptions
(Julien Nioche via ab)
Modified: lucene/nutch/trunk/conf/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/conf/tika-mimetypes.xml?rev=885869&r1=885868&r2=885869&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/tika-mimetypes.xml (original)
+++ lucene/nutch/trunk/conf/tika-mimetypes.xml Tue Dec 1 19:01:58 2009
@@ -1,367 +1,1542 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-
- Description: This xml file defines the valid mime types used by Tika.
- The mime types within this file are based on the types in the mime-types.xml
- file available in Apache Nutch.
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!--
+ Description: This xml file defines the valid mime types used by Tika.
+ The mime types within this file are based on the types in the mime-types.xml
+ file available in Apache Nutch.
-->
-
<mime-info>
- <mime-type type="text/plain">
- <magic priority="50">
- <match value="This is TeX," type="string" offset="0" />
- <match value="This is METAFONT," type="string" offset="0" />
- </magic>
- <glob pattern="*.txt" />
- <glob pattern="*.asc" />
- </mime-type>
-
- <mime-type type="text/html">
- <magic priority="50">
- <match value="<!DOCTYPE HTML" type="string"
- offset="0:64" />
- <match value="<!doctype html" type="string"
- offset="0:64" />
- <match value="<HEAD" type="string" offset="0:64" />
- <match value="<head" type="string" offset="0:64" />
- <match value="<TITLE" type="string" offset="0:64" />
- <match value="<title" type="string" offset="0:64" />
- <match value="<html" type="string" offset="0:64" />
- <match value="<HTML" type="string" offset="0:64" />
- <match value="<BODY" type="string" offset="0" />
- <match value="<body" type="string" offset="0" />
- <match value="<TITLE" type="string" offset="0" />
- <match value="<title" type="string" offset="0" />
- <match value="<!--" type="string" offset="0" />
- <match value="<h1" type="string" offset="0" />
- <match value="<H1" type="string" offset="0" />
- <match value="<!doctype HTML" type="string" offset="0" />
- <match value="<!DOCTYPE html" type="string" offset="0" />
- </magic>
- <glob pattern="*.html" />
- <glob pattern="*.htm" />
- </mime-type>
-
- <mime-type type="application/xhtml+xml">
- <glob pattern="*.xhtml" />
- <root-XML namespaceURI='http://www.w3.org/1999/xhtml'
- localName='html' />
- </mime-type>
-
- <mime-type type="application/vnd.ms-powerpoint">
- <glob pattern="*.ppz" />
- <glob pattern="*.ppt" />
- <glob pattern="*.pps" />
- <glob pattern="*.pot" />
- <magic priority="50">
- <match value="0xcfd0e011" type="little32" offset="0" />
- </magic>
- </mime-type>
-
- <mime-type type="application/vnd.ms-excel">
- <magic priority="50">
- <match value="Microsoft Excel 5.0 Worksheet" type="string"
- offset="2080" />
- </magic>
- <glob pattern="*.xls" />
- <glob pattern="*.xlc" />
- <glob pattern="*.xll" />
- <glob pattern="*.xlm" />
- <glob pattern="*.xlw" />
- <glob pattern="*.xla" />
- <glob pattern="*.xlt" />
- <glob pattern="*.xld" />
- <alias type="application/msexcel" />
- </mime-type>
-
- <mime-type type="application/vnd.oasis.opendocument.text">
- <glob pattern="*.odt" />
- </mime-type>
-
-
- <mime-type type="application/zip">
- <alias type="application/x-zip-compressed" />
- <magic priority="40">
- <match value="PK\003\004" type="string" offset="0" />
- </magic>
- <glob pattern="*.zip" />
- </mime-type>
-
- <mime-type type="application/vnd.oasis.opendocument.text">
- <glob pattern="*.oth" />
- </mime-type>
-
- <mime-type type="application/msword">
- <magic priority="50">
- <match value="\x31\xbe\x00\x00" type="string" offset="0" />
- <match value="PO^Q`" type="string" offset="0" />
- <match value="\376\067\0\043" type="string" offset="0" />
- <match value="\333\245-\0\0\0" type="string" offset="0" />
- <match value="Microsoft Word 6.0 Document" type="string"
- offset="2080" />
- <match value="Microsoft Word document data" type="string"
- offset="2112" />
- </magic>
- <glob pattern="*.doc" />
- <alias type="application/vnd.ms-word" />
- </mime-type>
-
- <mime-type type="application/octet-stream">
- <magic priority="50">
- <match value="\037\036" type="string" offset="0" />
- <match value="017437" type="host16" offset="0" />
- <match value="0x1fff" type="host16" offset="0" />
- <match value="\377\037" type="string" offset="0" />
- <match value="0145405" type="host16" offset="0" />
- </magic>
- <glob pattern="*.bin" />
- </mime-type>
-
- <mime-type type="application/pdf">
- <magic priority="50">
- <match value="%PDF-" type="string" offset="0" />
- </magic>
- <glob pattern="*.pdf" />
- <alias type="application/x-pdf" />
- </mime-type>
-
- <mime-type type="application/atom+xml">
- <root-XML localName="feed"
- namespaceURI="http://purl.org/atom/ns#" />
- </mime-type>
-
- <mime-type type="application/mac-binhex40">
- <glob pattern="*.hqx" />
- </mime-type>
-
- <mime-type type="application/mac-compactpro">
- <glob pattern="*.cpt" />
- </mime-type>
-
- <mime-type type="application/rtf">
- <glob pattern="*.rtf"/>
- <alias type="text/rtf" />
- </mime-type>
-
- <mime-type type="application/rss+xml">
- <alias type="text/rss" />
- <root-XML localName="rss" />
- <root-XML namespaceURI="http://purl.org/rss/1.0/" />
- <glob pattern="*.rss" />
- </mime-type>
-
- <!-- added in by mattmann -->
- <mime-type type="application/xml">
- <alias type="text/xml" />
- <glob pattern="*.xml" />
- </mime-type>
-
- <mime-type type="application/x-mif">
- <alias type="application/vnd.mif" />
- </mime-type>
-
- <mime-type type="application/vnd.wap.wbxml">
- <glob pattern="*.wbxml" />
- </mime-type>
-
- <mime-type type="application/vnd.wap.wmlc">
- <_comment>Compiled WML Document</_comment>
- <glob pattern="*.wmlc" />
- </mime-type>
-
- <mime-type type="application/vnd.wap.wmlscriptc">
- <_comment>Compiled WML Script</_comment>
- <glob pattern="*.wmlsc" />
- </mime-type>
-
- <mime-type type="text/vnd.wap.wmlscript">
- <_comment>WML Script</_comment>
- <glob pattern="*.wmls" />
- </mime-type>
-
- <mime-type type="application/x-bzip">
- <alias type="application/x-bzip2" />
- </mime-type>
-
- <mime-type type="application/x-bzip-compressed-tar">
- <glob pattern="*.tbz" />
- <glob pattern="*.tbz2" />
- </mime-type>
-
- <mime-type type="application/x-cdlink">
- <_comment>Virtual CD-ROM CD Image File</_comment>
- <glob pattern="*.vcd" />
- </mime-type>
-
- <mime-type type="application/x-director">
- <_comment>Shockwave Movie</_comment>
- <glob pattern="*.dcr" />
- <glob pattern="*.dir" />
- <glob pattern="*.dxr" />
- </mime-type>
-
- <mime-type type="application/x-futuresplash">
- <_comment>Macromedia FutureSplash File</_comment>
- <glob pattern="*.spl" />
- </mime-type>
-
- <mime-type type="application/x-java">
- <alias type="application/java" />
- </mime-type>
-
- <mime-type type="application/x-koan">
- <_comment>SSEYO Koan File</_comment>
- <glob pattern="*.skp" />
- <glob pattern="*.skd" />
- <glob pattern="*.skt" />
- <glob pattern="*.skm" />
- </mime-type>
-
- <mime-type type="application/x-latex">
- <_comment>LaTeX Source Document</_comment>
- <glob pattern="*.latex" />
- </mime-type>
-
- <!-- JC CHANGED
- <mime-type type="application/x-mif">
- <_comment>FrameMaker MIF document</_comment>
- <glob pattern="*.mif"/>
- </mime-type> -->
-
- <mime-type type="application/x-ms-dos-executable">
- <alias type="application/x-dosexec" />
- </mime-type>
-
- <mime-type type="application/ogg">
- <alias type="application/x-ogg" />
- </mime-type>
-
- <mime-type type="application/x-rar">
- <alias type="application/x-rar-compressed" />
- </mime-type>
-
- <mime-type type="application/x-shellscript">
- <alias type="application/x-sh" />
- </mime-type>
-
- <mime-type type="application/xhtml+xml">
- <glob pattern="*.xht" />
- </mime-type>
-
- <mime-type type="audio/midi">
- <glob pattern="*.kar" />
- </mime-type>
-
- <mime-type type="audio/x-pn-realaudio">
- <alias type="audio/x-realaudio" />
- </mime-type>
-
- <mime-type type="image/tiff">
- <magic priority="50">
- <match value="0x4d4d2a00" type="string" offset="0" />
- <match value="0x49492a00" type="string" offset="0" />
- </magic>
- </mime-type>
-
- <mime-type type="message/rfc822">
- <magic priority="50">
- <match type="string" value="Relay-Version:" offset="0" />
- <match type="string" value="#! rnews" offset="0" />
- <match type="string" value="N#! rnews" offset="0" />
- <match type="string" value="Forward to" offset="0" />
- <match type="string" value="Pipe to" offset="0" />
- <match type="string" value="Return-Path:" offset="0" />
- <match type="string" value="From:" offset="0" />
- <match type="string" value="Message-ID:" offset="0" />
- <match type="string" value="Date:" offset="0" />
- </magic>
- </mime-type>
-
- <mime-type type="application/x-javascript">
- <glob pattern="*.js" />
- </mime-type>
-
-
- <mime-type type="image/vnd.wap.wbmp">
- <_comment>Wireless Bitmap File Format</_comment>
- <glob pattern="*.wbmp" />
- </mime-type>
-
- <mime-type type="image/x-psd">
- <alias type="image/photoshop" />
- </mime-type>
-
- <mime-type type="image/x-xcf">
- <alias type="image/xcf" />
- <magic priority="50">
- <match type="string" value="gimp xcf " offset="0" />
- </magic>
- </mime-type>
-
- <mime-type type="application/x-shockwave-flash">
- <glob pattern="*.swf"/>
- <magic priority="50">
- <match type="string" value="FWS" offset="0"/>
- <match type="string" value="CWS" offset="0"/>
- </magic>
- </mime-type>
-
- <mime-type type="model/iges">
- <_comment>
- Initial Graphics Exchange Specification Format
- </_comment>
- <glob pattern="*.igs" />
- <glob pattern="*.iges" />
- </mime-type>
-
- <mime-type type="model/mesh">
- <glob pattern="*.msh" />
- <glob pattern="*.mesh" />
- <glob pattern="*.silo" />
- </mime-type>
-
- <mime-type type="model/vrml">
- <glob pattern="*.vrml" />
- </mime-type>
-
- <mime-type type="text/x-tcl">
- <alias type="application/x-tcl" />
- </mime-type>
-
- <mime-type type="text/x-tex">
- <alias type="application/x-tex" />
- </mime-type>
-
- <mime-type type="text/x-texinfo">
- <alias type="application/x-texinfo" />
- </mime-type>
-
- <mime-type type="text/x-troff-me">
- <alias type="application/x-troff-me" />
- </mime-type>
-
- <mime-type type="video/vnd.mpegurl">
- <glob pattern="*.mxu" />
- </mime-type>
-
- <mime-type type="x-conference/x-cooltalk">
- <_comment>Cooltalk Audio</_comment>
- <glob pattern="*.ice" />
- </mime-type>
+ <mime-type type="text/plain">
+ <magic priority="20">
+ <match value="This is TeX," type="string" offset="0" />
+ <match value="This is METAFONT," type="string" offset="0" />
+ <match value="#!/" type="string" offset="0" />
+ <match value="#!\ /" type="string" offset="0" />
+ <match value="#!\t/" type="string" offset="0" />
+ <!-- UTF-16BE BOM -->
+ <match value="0xfeff" type="string" offset="0"/>
+ <!-- UTF-16LE BOM -->
+ <match value="0xfffe" type="string" offset="0"/>
+ <!-- UTF-8 BOM -->
+ <match value="0xefbbbf" type="string" offset="0"/>
+ </magic>
+ <glob pattern="*.txt" />
+ <glob pattern="*.asc" />
+
+ <!-- TIKA-85: http://www.apache.org/dev/svn-eol-style.txt -->
+ <glob pattern="INSTALL" />
+ <glob pattern="KEYS" />
+ <glob pattern="Makefile" />
+ <glob pattern="README" />
+ <glob pattern="abs-linkmap" />
+ <glob pattern="abs-menulinks" />
+ <glob pattern="*.aart" />
+ <glob pattern="*.ac" />
+ <glob pattern="*.am" />
+ <glob pattern="*.bat" />
+ <glob pattern="*.c" />
+ <glob pattern="*.cat" />
+ <glob pattern="*.cgi" />
+ <glob pattern="*.classpath" />
+ <glob pattern="*.cmd" />
+ <glob pattern="*.conf" />
+ <glob pattern="*.config" />
+ <glob pattern="*.cpp" />
+ <glob pattern="*.css" />
+ <glob pattern="*.cwiki" />
+ <glob pattern="*.data" />
+ <glob pattern="*.dcl" />
+ <glob pattern="*.dtd" />
+ <glob pattern="*.egrm" />
+ <glob pattern="*.ent" />
+ <glob pattern="*.ft" />
+ <glob pattern="*.fn" />
+ <glob pattern="*.fv" />
+ <glob pattern="*.grm" />
+ <glob pattern="*.g" />
+ <glob pattern="*.h" />
+ <glob pattern=".htaccess" />
+ <glob pattern="*.ihtml" />
+ <glob pattern="*.in" />
+ <glob pattern="*.java" />
+ <glob pattern="*.jmx" />
+ <glob pattern="*.jsp" />
+ <glob pattern="*.js" />
+ <glob pattern="*.junit" />
+ <glob pattern="*.jx" />
+ <glob pattern="*.manifest" />
+ <glob pattern="*.m4" />
+ <glob pattern="*.mf" />
+ <glob pattern="*.MF" />
+ <glob pattern="*.meta" />
+ <glob pattern="*.n3" />
+ <glob pattern="*.pen" />
+ <glob pattern="*.pl" />
+ <glob pattern="*.pm" />
+ <glob pattern="*.pod" />
+ <glob pattern="*.pom" />
+ <glob pattern="*.project" />
+ <glob pattern="*.properties" />
+ <glob pattern="*.py" />
+ <glob pattern="*.rb" />
+ <glob pattern="*.rdf" />
+ <glob pattern="*.rnc" />
+ <glob pattern="*.rng" />
+ <glob pattern="*.rnx" />
+ <glob pattern="*.roles" />
+ <glob pattern="*.sh" />
+ <glob pattern="*.sql" />
+ <glob pattern="*.tld" />
+ <glob pattern="*.types" />
+ <glob pattern="*.vm" />
+ <glob pattern="*.vsl" />
+ <glob pattern="*.wsdd" />
+ <glob pattern="*.wsdl" />
+ <glob pattern="*.xargs" />
+ <glob pattern="*.xcat" />
+ <glob pattern="*.xconf" />
+ <glob pattern="*.xegrm" />
+ <glob pattern="*.xgrm" />
+ <glob pattern="*.xlex" />
+ <glob pattern="*.xlog" />
+ <glob pattern="*.xmap" />
+ <glob pattern="*.xroles" />
+ <glob pattern="*.xsamples" />
+ <glob pattern="*.xsp" />
+ <glob pattern="*.xul" />
+ <glob pattern="*.xweb" />
+ <glob pattern="*.xwelcome" />
+ </mime-type>
+
+ <mime-type type="text/html">
+ <magic priority="50">
+ <match value="<!DOCTYPE HTML" type="string" offset="0:64" />
+ <match value="<!doctype html" type="string" offset="0:64" />
+ <match value="<HEAD" type="string" offset="0:64" />
+ <match value="<head" type="string" offset="0:64" />
+ <match value="<TITLE" type="string" offset="0:64" />
+ <match value="<title" type="string" offset="0:64" />
+ <match value="<html" type="string" offset="0:64" />
+ <match value="<HTML" type="string" offset="0:64" />
+ <match value="<BODY" type="string" offset="0" />
+ <match value="<body" type="string" offset="0" />
+ <match value="<TITLE" type="string" offset="0" />
+ <match value="<title" type="string" offset="0" />
+ <match value="<!--" type="string" offset="0" />
+ <match value="<h1" type="string" offset="0" />
+ <match value="<H1" type="string" offset="0" />
+ <match value="<!doctype HTML" type="string" offset="0" />
+ <match value="<!DOCTYPE html" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.html" />
+ <glob pattern="*.htm" />
+ </mime-type>
+
+ <mime-type type="application/xhtml+xml">
+ <sub-class-of type="application/xml" />
+ <glob pattern="*.xhtml" />
+ <glob pattern="*.xht" />
+ <root-XML namespaceURI="http://www.w3.org/1999/xhtml" localName="html" />
+ </mime-type>
+
+ <!-- ===================================================================== -->
+ <!-- Microsoft Office binary file formats -->
+ <!-- http://www.microsoft.com/interop/docs/OfficeBinaryFormats.mspx -->
+ <!-- ===================================================================== -->
+
+ <mime-type type="application/x-tika-msoffice">
+ <magic>
+ <match value="0xd0cf11e0a1b11ae1" type="string" offset="0:8" />
+ </magic>
+ </mime-type>
+
+ <!-- http://www.iana.org/assignments/media-types/application/vnd.visio -->
+ <mime-type type="application/vnd.visio">
+ <comment>Microsoft Visio Diagram</comment>
+ <glob pattern="*.vsd" />
+ <glob pattern="*.vst" />
+ <glob pattern="*.vsw" />
+ <glob pattern="*.vss" />
+ <sub-class-of type="application/x-tika-msoffice"/>
+ </mime-type>
+
+ <!-- http://www.iana.org/assignments/media-types/application/vnd.ms-powerpoint -->
+ <mime-type type="application/vnd.ms-powerpoint">
+ <comment>Microsoft Powerpoint Presentation</comment>
+ <glob pattern="*.ppz" />
+ <glob pattern="*.ppt" />
+ <glob pattern="*.pps" />
+ <glob pattern="*.pot" />
+ <glob pattern="*.ppa" />
+ <alias type="application/mspowerpoint" />
+ <sub-class-of type="application/x-tika-msoffice"/>
+ </mime-type>
+
+ <!-- http://www.iana.org/assignments/media-types/application/vnd.ms-excel -->
+ <mime-type type="application/vnd.ms-excel">
+ <comment>Microsoft Excel Spreadsheet</comment>
+ <magic priority="50">
+ <match value="Microsoft\ Excel\ 5.0\ Worksheet" type="string" offset="2080" />
+ <match value="Foglio\ di\ lavoro\ Microsoft\ Exce" type="string" offset="2080" />
+ <match value="Biff5" type="string" offset="2114" />
+ <match value="Biff5" type="string" offset="2121" />
+ <match value="\x09\x04\x06\x00\x00\x00\x10\x00" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.xls" />
+ <glob pattern="*.xlc" />
+ <glob pattern="*.xll" />
+ <glob pattern="*.xlm" />
+ <glob pattern="*.xlw" />
+ <glob pattern="*.xla" />
+ <glob pattern="*.xlt" />
+ <glob pattern="*.xld" />
+ <alias type="application/msexcel" />
+ <sub-class-of type="application/x-tika-msoffice"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.ms-excel.sheet.binary.macroenabled.12">
+ <comment>Microsoft Excel 2007 Binary Spreadsheet</comment>
+ <glob pattern="*.xlsb"/>
+ <sub-class-of type="application/vnd.ms-excel"/>
+ </mime-type>
+
+ <!-- http://www.iana.org/assignments/media-types/application/msword -->
+ <mime-type type="application/msword">
+ <comment>Microsoft Word Document</comment>
+ <magic priority="50">
+ <match value="Microsoft\ Word\ 6.0\ Document" type="string" offset="2080" />
+ <match value="Documento\ Microsoft\ Word\ 6" type="string" offset="2080" />
+ <match value="MSWordDoc" type="string" offset="2112" />
+ <match value="0x31be0000" type="big32" offset="0" />
+ <match value="PO^Q`" type="string" offset="0" />
+ <match value="\376\067\0\043" type="string" offset="0" />
+ <match value="\333\245-\0\0\0" type="string" offset="0" />
+ <match value="\354\245\301" type="string" offset="512" />
+ <match value="\320\317\021\340\241\261\032\341" type="string" offset="0" />
+ <match value="\224\246\056" type="string" offset="0" />
+ <match value="R\0o\0o\0t\0\ \0E\0n\0t\0r\0y" type="string" offset="512" />
+ </magic>
+ <glob pattern="*.doc" />
+ <glob pattern="*.dot" />
+ <alias type="application/vnd.ms-word" />
+ <sub-class-of type="application/x-tika-msoffice"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.ms-outlook">
+ <comment>Microsoft Outlook Message</comment>
+ <glob pattern="*.msg" />
+ <sub-class-of type="application/x-tika-msoffice"/>
+ </mime-type>
+
+ <!-- ===================================================================== -->
+ <!-- Office Open XML file formats -->
+ <!-- http://www.ecma-international.org/publications/standards/Ecma-376.htm -->
+ <!-- ===================================================================== -->
+
+ <mime-type type="application/x-tika-ooxml">
+ <sub-class-of type="application/zip"/>
+ <magic priority="50">
+ <match value="PK\003\004" type="string" offset="0">
+ <match value="[Content_Types].xml" type="string" offset="30"/>
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.openxmlformats-officedocument.presentationml.presentation">
+ <comment>Office Open XML Presentation</comment>
+ <glob pattern="*.pptx"/>
+ <glob pattern="*.sldx"/>
+ <glob pattern="*.thmx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.ms-powerpoint.presentation.macroenabled.12">
+ <comment>Office Open XML Presentation (macro-enabled)</comment>
+ <glob pattern="*.pptm"/>
+ <glob pattern="*.potm"/>
+ <glob pattern="*.sldm"/>
+ <sub-class-of type="application/x-tika-msoffice"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.openxmlformats-officedocument.presentationml.template">
+ <comment>Office Open XML Presentation Template</comment>
+ <glob pattern="*.potx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.openxmlformats-officedocument.presentationml.slideshow">
+ <comment>Office Open XML Presentation Slideshow</comment>
+ <glob pattern="*.ppsx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.ms-powerpoint.slideshow.macroenabled.12">
+ <comment>Office Open XML Presentation Slideshow (macro-enabled)</comment>
+ <glob pattern="*.ppsm"/>
+ <sub-class-of type="application/x-tika-msoffice"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.ms-powerpoint.addin.macroenabled.12">
+ <comment>Office Open XML Presentation Add-in (macro-enabled)</comment>
+ <glob pattern="*.ppam"/>
+ <sub-class-of type="application/x-tika-msoffice"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet">
+ <comment>Office Open XML Workbook</comment>
+ <glob pattern="*.xlsx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.ms-excel.sheet.macroenabled.12">
+ <comment>Office Open XML Workbook (macro-enabled)</comment>
+ <glob pattern="*.xlsm"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.openxmlformats-officedocument.spreadsheetml.template">
+ <comment>Office Open XML Workbook Template</comment>
+ <glob pattern="*.xltx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.ms-excel.template.macroenabled.12">
+ <comment>Office Open XML Workbook Template (macro-enabled)</comment>
+ <glob pattern="*.xltm"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.ms-excel.addin.macroenabled.12">
+ <comment>Office Open XML Workbook Add-in (macro-enabled)</comment>
+ <glob pattern="*.xlam"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.openxmlformats-officedocument.wordprocessingml.document">
+ <comment>Office Open XML Document</comment>
+ <glob pattern="*.docx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.ms-word.document.macroenabled.12">
+ <comment>Office Open XML Document (macro-enabled)</comment>
+ <glob pattern="*.docm"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.openxmlformats-officedocument.wordprocessingml.template">
+ <comment>Office Open XML Document Template</comment>
+ <glob pattern="*.dotx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
+ <mime-type type="application/vnd.ms-word.template.macroenabled.12">
+ <comment>Office Open XML Document Template (macro-enabled)</comment>
+ <glob pattern="*.dotm"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
+ <!-- ===================================================================== -->
+ <!-- Open Document Format for Office Applications (OpenDocument) v1.0 -->
+ <!-- http://www.oasis-open.org/specs/index.php#opendocumentv1.0 -->
+ <!-- ===================================================================== -->
+
+ <mime-type type="application/vnd.oasis.opendocument.text">
+ <comment>OpenDocument v1.0: Text document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.text" />
+ <glob pattern="*.odt" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.text" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.text-template">
+ <comment>OpenDocument v1.0: Text document used as template</comment>
+ <alias type="application/x-vnd.oasis.opendocument.text-template" />
+ <glob pattern="*.ott" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.text-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.graphics">
+ <comment>OpenDocument v1.0: Graphics document (Drawing)</comment>
+ <alias type="application/x-vnd.oasis.opendocument.graphics" />
+ <glob pattern="*.odg" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.graphics" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.graphics-template">
+ <comment>OpenDocument v1.0: Graphics document used as template</comment>
+ <alias type="application/x-vnd.oasis.opendocument.graphics-template" />
+ <glob pattern="*.otg" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.graphics-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.presentation">
+ <comment>OpenDocument v1.0: Presentation document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.presentation" />
+ <glob pattern="*.odp" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.presentation" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.presentation-template">
+ <comment>OpenDocument v1.0: Presentation document used as template</comment>
+ <alias type="application/x-vnd.oasis.opendocument.presentation-template" />
+ <glob pattern="*.otp" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.presentation-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.spreadsheet">
+ <comment>OpenDocument v1.0: Spreadsheet document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.spreadsheet" />
+ <glob pattern="*.ods" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.spreadsheet" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.spreadsheet-template">
+ <comment>OpenDocument v1.0: Spreadsheet document used as template</comment>
+ <alias type="application/x-vnd.oasis.opendocument.spreadsheet-template" />
+ <glob pattern="*.ots" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.spreadsheet-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.chart">
+ <comment>OpenDocument v1.0: Chart document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.chart" />
+ <glob pattern="*.odc" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.chart" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.chart-template">
+ <comment>OpenDocument v1.0: Chart document used as template</comment>
+ <alias type="application/x-vnd.oasis.opendocument.chart-template" />
+ <glob pattern="*.otc" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.chart-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.image">
+ <comment>OpenDocument v1.0: Image document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.image" />
+ <glob pattern="*.odi" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.image" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.image-template">
+ <comment>OpenDocument v1.0: Image document used as template</comment>
+ <alias type="application/x-vnd.oasis.opendocument.image-template" />
+ <glob pattern="*.oti" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.image-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.formula">
+ <comment>OpenDocument v1.0: Formula document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.formula" />
+ <glob pattern="*.odf" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.formula" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.formula-template">
+ <comment>OpenDocument v1.0: Formula document used as template</comment>
+ <alias type="application/x-vnd.oasis.opendocument.formula-template" />
+ <glob pattern="*.otf" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.formula-template" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.text-master">
+ <comment>OpenDocument v1.0: Global Text document</comment>
+ <alias type="application/x-vnd.oasis.opendocument.text-master" />
+ <glob pattern="*.odm" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.text-master" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.oasis.opendocument.text-web">
+ <comment>
+ OpenDocument v1.0: Text document used as template for HTML documents
+ </comment>
+ <alias type="application/x-vnd.oasis.opendocument.text-web" />
+ <glob pattern="*.oth" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.oasis.opendocument.text-web" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/vnd.sun.xml.writer">
+ <comment>
+ OpenOffice v1.0: Writer Document
+ </comment>
+ <alias type="application/x-vnd.sun.xml.writer" />
+ <glob pattern="*.sxw" />
+ <magic>
+ <match type="string" offset="0" value="PK">
+ <match type="string" offset="30"
+ value="mimetypeapplication/vnd.sun.xml.writer" />
+ </match>
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/zip">
+ <alias type="application/x-zip-compressed" />
+ <magic priority="40">
+ <match value="PK\003\004" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.zip" />
+ </mime-type>
+
+ <mime-type type="application/x-tar">
+ <magic priority="40">
+ <!-- POSIX tar archive -->
+ <match value="ustar\0" type="string" offset="257" />
+ <!-- GNU tar archive -->
+ <match value="ustar \0" type="string" offset="257" />
+ </magic>
+ <glob pattern="*.tar" />
+ </mime-type>
+
+ <mime-type type="application/x-gzip">
+ <magic priority="40">
+ <match value="\037\213" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.tgz" />
+ <glob pattern="*.gz" />
+ <glob pattern="*-gz" />
+ <glob pattern="*.svgz" />
+ <glob pattern="*.wmz" />
+ <glob pattern="*.emz" />
+ </mime-type>
+
+ <mime-type type="application/x-bzip">
+ <alias type="application/x-bzip2" />
+ <magic priority="40">
+ <match value="BZh" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.bz" />
+ <glob pattern="*.bz2" />
+ <glob pattern="*.tbz" />
+ <glob pattern="*.tbz2" />
+ </mime-type>
+
+ <mime-type type="application/x-tika-java-class">
+ <magic priority="40">
+ <match value="0xcafebabe" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.class" />
+ </mime-type>
+
+ <mime-type type="application/octet-stream">
+ <magic priority="50">
+ <match value="#\ This\ is\ a\ shell\ archive" type="string" offset="10" />
+ <match value="\037\036" type="string" offset="0" />
+ <match value="017437" type="host16" offset="0" />
+ <match value="0x1fff" type="host16" offset="0" />
+ <match value="\377\037" type="string" offset="0" />
+ <match value="0145405" type="host16" offset="0" />
+ </magic>
+ <glob pattern="*.bin" />
+ </mime-type>
+
+ <mime-type type="application/pdf">
+ <acronym>PDF</acronym>
+ <comment>Portable Document Format</comment>
+ <magic priority="50">
+ <match value="%PDF-" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.pdf" />
+ <alias type="application/x-pdf" />
+ </mime-type>
+
+ <mime-type type="application/x-shockwave-flash">
+ <acronym>Flash</acronym>
+ <comment>Adobe Flash</comment>
+ <magic priority="50">
+ <!-- F = Uncompressed -->
+ <match value="FWS" type="string" offset="0" />
+ <!-- C = Compressed -->
+ <match value="CWS" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.swf" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-wmf">
+ <acronym>WMF</acronym>
+ <comment>Windows Metafile</comment>
+ <glob pattern="*.wmf" />
+ <glob pattern="*.emf" />
+ </mime-type>
+
+ <mime-type type="application/atom+xml">
+ <root-XML localName="feed" namespaceURI="http://purl.org/atom/ns#" />
+ </mime-type>
+
+ <mime-type type="application/mac-binhex40">
+ <magic priority="50">
+ <match value="must\ be\ converted\ with\ BinHex" type="string" offset="11" />
+ </magic>
+ <glob pattern="*.hqx" />
+ </mime-type>
+
+ <mime-type type="application/mac-compactpro">
+ <glob pattern="*.cpt" />
+ </mime-type>
+
+ <mime-type type="application/rtf">
+ <sub-class-of type="text/plain" />
+ <magic priority="50">
+ <match value="{\\rtf" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.rtf" />
+ <alias type="text/rtf" />
+ </mime-type>
+
+ <mime-type type="application/rss+xml">
+ <alias type="text/rss" />
+ <root-XML localName="rss" />
+ <root-XML namespaceURI="http://purl.org/rss/1.0/" />
+ <glob pattern="*.rss" />
+ </mime-type>
+
+ <!-- added in by mattmann -->
+ <mime-type type="application/xml">
+ <sub-class-of type="text/plain" />
+ <magic priority="50">
+ <match value="<?xml" type="string" offset="0" />
+ <match value="<?XML" type="string" offset="0" />
+ <match value="0xFFFE3C003F0078006D006C00" type="string" offset="0" />
+ <match value="0xFEFF003C003F0078006D006C" type="string" offset="0" />
+ <!-- TODO: Add matches for the rest of the possible XML encoding schemes -->
+ </magic>
+ <alias type="text/xml" />
+ <glob pattern="*.xml" />
+ <glob pattern="*.xsd" />
+ </mime-type>
+
+ <mime-type type="image/svg+xml">
+ <sub-class-of type="application/xml" />
+ <acronym>SVG</acronym>
+ <comment>Scalable Vector Graphics</comment>
+ <root-XML localName="svg" namespaceURI="http://www.w3.org/2000/svg" />
+ <glob pattern="*.svg" />
+ </mime-type>
+
+ <mime-type type="application/xslt+xml">
+ <sub-class-of type="application/xml" />
+ <acronym>XSLT</acronym>
+ <comment>XSL Transformations</comment>
+ <root-XML localName="stylesheet" namespaceURI="http://www.w3.org/1999/XSL/Transform" />
+ <alias type="text/xsl" />
+ <glob pattern="*.xsl" />
+ <glob pattern="*.xslt" />
+ </mime-type>
+
+ <mime-type type="application/x-mif">
+ <magic priority="50">
+ <match value="\<MakerFile" type="string" offset="0" />
+ <match value="\<MIFFile" type="string" offset="0" />
+ <match value="\<MakerDictionary" type="string" offset="0" />
+ <match value="\<MakerScreenFont" type="string" offset="0" />
+ <match value="\<MML" type="string" offset="0" />
+ <match value="\<BookFile" type="string" offset="0" />
+ <match value="\<Maker" type="string" offset="0" />
+ </magic>
+ <alias type="application/vnd.mif" />
+ </mime-type>
+
+ <mime-type type="application/vnd.wap.wbxml">
+ <glob pattern="*.wbxml" />
+ </mime-type>
+
+ <mime-type type="application/vnd.wap.wmlc">
+ <_comment>Compiled WML Document</_comment>
+ <glob pattern="*.wmlc" />
+ </mime-type>
+
+ <mime-type type="application/vnd.wap.wmlscriptc">
+ <_comment>Compiled WML Script</_comment>
+ <glob pattern="*.wmlsc" />
+ </mime-type>
+
+ <mime-type type="text/vnd.wap.wmlscript">
+ <_comment>WML Script</_comment>
+ <glob pattern="*.wmls" />
+ </mime-type>
+
+ <mime-type type="application/x-cdlink">
+ <_comment>Virtual CD-ROM CD Image File</_comment>
+ <glob pattern="*.vcd" />
+ </mime-type>
+
+ <mime-type type="application/x-director">
+ <_comment>Shockwave Movie</_comment>
+ <glob pattern="*.dcr" />
+ <glob pattern="*.dir" />
+ <glob pattern="*.dxr" />
+ </mime-type>
+
+ <mime-type type="application/x-futuresplash">
+ <_comment>Macromedia FutureSplash File</_comment>
+ <glob pattern="*.spl" />
+ </mime-type>
+
+ <mime-type type="application/x-koan">
+ <_comment>SSEYO Koan File</_comment>
+ <glob pattern="*.skp" />
+ <glob pattern="*.skd" />
+ <glob pattern="*.skt" />
+ <glob pattern="*.skm" />
+ </mime-type>
+
+ <mime-type type="application/x-latex">
+ <_comment>LaTeX Source Document</_comment>
+ <magic priority="50">
+ <match value="%\ -*-latex-*-" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.latex" />
+ </mime-type>
+
+ <!-- JC CHANGED
+ <mime-type type="application/x-mif">
+ <_comment>FrameMaker MIF document</_comment>
+ <glob pattern="*.mif"/>
+ </mime-type> -->
+
+ <mime-type type="application/x-ms-dos-executable">
+ <alias type="application/x-dosexec" />
+ </mime-type>
+
+ <mime-type type="application/ogg">
+ <magic priority="50">
+ <match value="OggS" type="string" offset="0" />
+ </magic>
+ <alias type="application/x-ogg" />
+ </mime-type>
+
+ <mime-type type="application/x-rar">
+ <alias type="application/x-rar-compressed" />
+ </mime-type>
+
+ <mime-type type="application/x-shellscript">
+ <alias type="application/x-sh" />
+ </mime-type>
+
+ <mime-type type="audio/midi">
+ <acronym>MIDI</acronym>
+ <comment>Musical Instrument Digital Interface</comment>
+ <magic priority ="20">
+ <match type="string" value="MThd" offset="0" />
+ </magic>
+ <glob pattern="*.mid" />
+ <glob pattern="*.midi" />
+ <glob pattern="*.kar" />
+ </mime-type>
+
+ <mime-type type="message/rfc822">
+ <magic priority="50">
+ <match type="string" value="Relay-Version:" offset="0" />
+ <match type="string" value="#! rnews" offset="0" />
+ <match type="string" value="N#! rnews" offset="0" />
+ <match type="string" value="Forward to" offset="0" />
+ <match type="string" value="Pipe to" offset="0" />
+ <match type="string" value="Return-Path:" offset="0" />
+ <match type="string" value="From:" offset="0" />
+ <match type="string" value="Message-ID:" offset="0" />
+ <match type="string" value="Date:" offset="0" />
+ </magic>
+ </mime-type>
+
+ <mime-type type="image/vnd.wap.wbmp">
+ <_comment>Wireless Bitmap File Format</_comment>
+ <glob pattern="*.wbmp" />
+ </mime-type>
+
+ <mime-type type="image/x-psd">
+ <alias type="image/photoshop" />
+ </mime-type>
+
+ <mime-type type="image/x-xcf">
+ <alias type="image/xcf" />
+ <magic priority="50">
+ <match type="string" value="gimp xcf " offset="0" />
+ </magic>
+ </mime-type>
+
+ <mime-type type="model/iges">
+ <_comment>Initial Graphics Exchange Specification Format</_comment>
+ <glob pattern="*.igs" />
+ <glob pattern="*.iges" />
+ </mime-type>
+
+ <mime-type type="model/mesh">
+ <glob pattern="*.msh" />
+ <glob pattern="*.mesh" />
+ <glob pattern="*.silo" />
+ </mime-type>
+
+ <mime-type type="model/vrml">
+ <glob pattern="*.vrml" />
+ </mime-type>
+
+ <mime-type type="text/x-tcl">
+ <alias type="application/x-tcl" />
+ </mime-type>
+
+ <mime-type type="text/x-tex">
+ <magic priority="50">
+ <match value="\\input" type="string" offset="0" />
+ <match value="\\section" type="string" offset="0" />
+ <match value="\\setlength" type="string" offset="0" />
+ <match value="\\documentstyle" type="string" offset="0" />
+ <match value="\\chapter" type="string" offset="0" />
+ <match value="\\documentclass" type="string" offset="0" />
+ <match value="\\relax" type="string" offset="0" />
+ <match value="\\contentsline" type="string" offset="0" />
+ </magic>
+ <alias type="application/x-tex" />
+ </mime-type>
+
+ <mime-type type="text/x-texinfo">
+ <magic priority="50">
+ <match value="\\input\ texinfo" type="string" offset="0" />
+ </magic>
+ <alias type="application/x-texinfo" />
+ </mime-type>
+
+ <mime-type type="text/x-troff-me">
+ <alias type="application/x-troff-me" />
+ </mime-type>
+
+ <mime-type type="video/vnd.mpegurl">
+ <glob pattern="*.mxu" />
+ </mime-type>
+
+ <mime-type type="x-conference/x-cooltalk">
+ <_comment>Cooltalk Audio</_comment>
+ <glob pattern="*.ice" />
+ </mime-type>
+
+ <mime-type type="audio/mpeg">
+ <acronym>MP3</acronym>
+ <comment>MPEG-1 Audio Layer 3</comment>
+ <magic priority="20">
+ <!-- http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html -->
+ <!-- Bit pattern for first two bytes: 11111111 111VVLLC -->
+ <!-- VV = MPEG Audio Version ID; 10 = V2, 11 = V1 -->
+ <!-- LL = Layer description; 01 = L3, 10 = L2, 11 = L1 -->
+ <!-- C = Protection bit; 0 = CRC, 1 = no CRC -->
+ <match value="0xfff2" type="string" offset="0" /> <!-- V2, L3, CRC -->
+ <match value="0xfff3" type="string" offset="0" /> <!-- V2, L3 -->
+ <match value="0xfff4" type="string" offset="0" /> <!-- V2, L2, CRC -->
+ <match value="0xfff5" type="string" offset="0" /> <!-- V2, L2 -->
+ <match value="0xfff6" type="string" offset="0" /> <!-- V2, L1, CRC -->
+ <match value="0xfff7" type="string" offset="0" /> <!-- V2, L1 -->
+ <match value="0xfffa" type="string" offset="0" /> <!-- V1, L3, CRC -->
+ <match value="0xfffb" type="string" offset="0" /> <!-- V1, L3 -->
+ <match value="0xfffc" type="string" offset="0" /> <!-- V1, L2, CRC -->
+ <match value="0xfffd" type="string" offset="0" /> <!-- V1, L2 -->
+ <match value="0xfffe" type="string" offset="0" /> <!-- V1, L1, CRC -->
+ <match value="0xffff" type="string" offset="0" /> <!-- V1, L1 -->
+ <match value="ID3" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.mp3" />
+ </mime-type>
+
+ <!-- ===================================================================== -->
+ <!-- TIKA-85: http://www.apache.org/dev/svn-eol-style.txt -->
+ <!-- ===================================================================== -->
+
+ <mime-type type="image/x-icon">
+ <magic priority="50">
+ <match value="\102\101\050\000\000\000\056\000\000\000\000\000\000\000" type="string" offset="0" />
+ <match value="\000\000\001\000" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.ico" />
+ </mime-type>
+
+ <mime-type type="image/jpeg">
+ <acronym>JPEG</acronym>
+ <comment>Joint Photographic Experts Group</comment>
+ <magic priority="50">
+ <!-- FFD8 is the SOI (Start Of Image) marker. -->
+ <!-- It is followed by another marker that starts with FF. -->
+ <match value="0xffd8ff" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.jpg" />
+ <glob pattern="*.jpeg" />
+ <glob pattern="*.jpe" />
+ <glob pattern="*.jif" />
+ <glob pattern="*.jfif" />
+ <glob pattern="*.jfi" />
+ </mime-type>
+
+ <mime-type type="image/png">
+ <acronym>PNG</acronym>
+ <comment>Portable Network Graphics</comment>
+ <magic priority="50">
+ <match value="\x89PNG\x0d\x0a\x1a\x0a" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.png" />
+ </mime-type>
+
+ <mime-type type="audio/basic">
+ <magic priority="20">
+ <match value=".snd" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.au" />
+ <glob pattern="*.snd" />
+ </mime-type>
+
+ <mime-type type="audio/x-aiff">
+ <alias type="audio/aiff"/>
+ <acronym>AIFF</acronym>
+ <comment>Audio Interchange File Format</comment>
+ <magic priority="20">
+ <match value="FORM....AIFF" type="string" offset="0"
+ mask="0xFFFFFFFF00000000FFFFFFFF" />
+ <match value="FORM....AIFC" type="string" offset="0"
+ mask="0xFFFFFFFF00000000FFFFFFFF" />
+ <!-- Amiga IFF sound sample, somewhat like the more modern AIFF -->
+ <match value="FORM....8SVX" type="string" offset="0"
+ mask="0xFFFFFFFF00000000FFFFFFFF" />
+ </magic>
+ <glob pattern="*.aif" />
+ <glob pattern="*.aiff" />
+ <glob pattern="*.aifc" />
+ </mime-type>
+
+ <mime-type type="audio/x-wav">
+ <acronym>WAV</acronym>
+ <magic priority="20">
+ <match value="RIFF....WAVE" type="string" offset="0"
+ mask="0xFFFFFFFF00000000FFFFFFFF" />
+ </magic>
+ <glob pattern="*.wav" />
+ </mime-type>
+
+ <mime-type type="application/postscript">
+ <comment>PostScript</comment>
+ <magic priority="50">
+ <match value="%!" type="string" offset="0" />
+ <match value="\004%!" type="string" offset="0" />
+ <!-- Windows format EPS -->
+ <match value="0xc5d0d3c6" type="string" offset="0"/>
+ </magic>
+ <glob pattern="*.ps" />
+ <glob pattern="*.eps" />
+ <glob pattern="*.epsf" />
+ <glob pattern="*.epsi" />
+ </mime-type>
+
+ <mime-type type="application/vnd.lotus-wordpro">
+ <magic priority="50">
+ <match value="WordPro\0" type="string" offset="0" />
+ <match value="WordPro\r\373" type="string" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/vnd.ms-tnef">
+ <magic priority="50">
+ <match value="0x223e9f78" type="little16" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/vnd.rn-realmedia">
+ <magic priority="50">
+ <match value=".RMF" type="string" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/vnd.symbian.install">
+ <magic priority="50">
+ <match value="0x10000419" type="little32" offset="8" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-123">
+ <magic priority="50">
+ <match value="0x00001a00" type="big32" offset="0" />
+ <match value="0x00000200" type="big32" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-archive">
+ <magic priority="50">
+ <match value="=<ar>" type="string" offset="0" />
+ <match value="=!<arch>" type="string" offset="0" />
+ </magic>
+ <glob patter="*.ar" />
+ </mime-type>
+ <mime-type type="application/x-bittorrent">
+ <magic priority="50">
+ <match value="d8:announce" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.torrent" />
+ </mime-type>
+ <mime-type type="application/x-compress">
+ <magic priority="50">
+ <match value="\037\235" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.z" />
+ </mime-type>
+ <mime-type type="application/x-cpio">
+ <magic priority="50">
+ <match value="070707" type="host16" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-debian-package">
+ <glob pattern="*.deb" />
+ </mime-type>
+ <mime-type type="application/x-dvi">
+ <magic priority="50">
+ <match value="\367\002" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.dvi" />
+ </mime-type>
+ <mime-type type="application/x-gnucash">
+ <glob pattern="*.gnucash" />
+ </mime-type>
+ <mime-type type="application/x-gnumeric">
+ <magic priority="50">
+ <match value="=<gmr:Workbook" type="string" offset="39" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-hdf">
+ <magic priority="50">
+ <match value="0x0e031301" type="big32" offset="0" />
+ <match value="\211HDF\r\n\032" type="string" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-hwp">
+ <magic priority="50">
+ <match value="R\0o\0o\0t\0" type="string" offset="512" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-iso9660-image">
+ <magic priority="50">
+ <match value="CD001" type="string" offset="37633" />
+ </magic>
+ <glob pattern="*.iso" />
+ </mime-type>
+ <mime-type type="application/x-kdelnk">
+ <magic priority="50">
+ <match value="[KDE\ Desktop\ Entry]" type="string" offset="0" />
+ <match value="#\ KDE\ Config\ File" type="string" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-lha">
+ <magic priority="50">
+ <match value="-lzs-" type="string" offset="2" />
+ <match value="-lh\40-" type="string" offset="2" />
+ <match value="-lhd-" type="string" offset="2" />
+ <match value="-lh2-" type="string" offset="2" />
+ <match value="-lh3-" type="string" offset="2" />
+ <match value="-lh4-" type="string" offset="2" />
+ <match value="-lh5-" type="string" offset="2" />
+ <match value="-lh6-" type="string" offset="2" />
+ <match value="-lh7-" type="string" offset="2" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-lharc">
+ <magic priority="50">
+ <match value="-lh0-" type="string" offset="2" />
+ <match value="-lh1-" type="string" offset="2" />
+ <match value="-lz4-" type="string" offset="2" />
+ <match value="-lz5-" type="string" offset="2" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-rar">
+ <magic priority="50">
+ <match value="Rar!" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.rar" />
+ </mime-type>
+ <mime-type type="application/x-rpm">
+ <glob pattern="*.rpm" />
+ </mime-type>
+ <mime-type type="application/x-shockwave-flash">
+ <magic priority="50">
+ <match value="FWS" type="string" offset="0" />
+ <match value="CWS" type="string" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-stuffit">
+ <magic priority="50">
+ <match value="StuffIt" type="string" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-zoo">
+ <magic priority="50">
+ <match value="0xfdc4a7dc" type="little32" offset="20" />
+ </magic>
+ <glob pattern="*.zoo" />
+ </mime-type>
+
+ <mime-type type="audio/x-flac">
+ <acronym>FLAC</acronym>
+ <comment>Free Lossless Audio Codec</comment>
+ <magic priority="50">
+ <match value="fLaC" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.flac" />
+ </mime-type>
+
+ <mime-type type="audio/x-mod">
+ <acronym>MOD</acronym>
+ <magic priority="50">
+ <match value="Extended\ Module:" type="string" offset="0" />
+ <match value="BMOD2STM" type="string" offset="21" />
+ <match value="M.K." type="string" offset="1080" />
+ <match value="M!K!" type="string" offset="1080" />
+ <match value="FLT4" type="string" offset="1080" />
+ <match value="FLT8" type="string" offset="1080" />
+ <match value="4CHN" type="string" offset="1080" />
+ <match value="6CHN" type="string" offset="1080" />
+ <match value="8CHN" type="string" offset="1080" />
+ <match value="CD81" type="string" offset="1080" />
+ <match value="OKTA" type="string" offset="1080" />
+ <match value="16CN" type="string" offset="1080" />
+ <match value="32CN" type="string" offset="1080" />
+ <match value="IMPM" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.mod" />
+ </mime-type>
+
+ <mime-type type="audio/x-mp4a">
+ <glob pattern="*.mp4a" />
+ </mime-type>
+
+ <mime-type type="audio/x-pn-realaudio">
+ <comment>Real Audio</comment>
+ <alias type="audio/x-realaudio" />
+ <magic priority="50">
+ <match value="0x2e7261fd" type="big32" offset="0" />
+ </magic>
+ <glob pattern="*.ra" />
+ </mime-type>
+
+ <mime-type type="image/gif">
+ <acronym>GIF</acronym>
+ <comment>Graphics Interchange Format</comment>
+ <magic priority="50">
+ <match value="GIF87a" type="string" offset="0" />
+ <match value="GIF89a" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.gif" />
+ </mime-type>
+
+ <mime-type type="image/tiff">
+ <acronym>TIFF</acronym>
+ <comment>Tagged Image File Format</comment>
+ <magic priority="50">
+ <!-- MM.* = Big endian (M=Motorola) and 0x002a in big endian -->
+ <match value="MM\x00\x2a" type="string" offset="0" />
+ <!-- II*. = Little endian (I=Intel) and 0x002a in little endian -->
+ <match value="II\x2a\x00" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.tiff" />
+ <glob pattern="*.tif" />
+ </mime-type>
+
+ <mime-type type="image/x-ms-bmp">
+ <acronym>BMP</acronym>
+ <comment>Windows bitmap</comment>
+ <magic priority="50">
+ <match value="BM" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.bmp" />
+ <glob pattern="*.dib" />
+ </mime-type>
+
+ <mime-type type="image/x-portable-anymap">
+ <acronym>PNM</acronym>
+ <comment>Portable Any Map</comment>
+ <glob pattern="*.pnm" />
+ </mime-type>
+
+ <mime-type type="image/x-portable-bitmap">
+ <sub-class-of type="image/x-portable-anymap" />
+ <acronym>PBM</acronym>
+ <comment>Portable Bit Map</comment>
+ <magic priority="50">
+ <match value="P1" type="string" offset="0" />
+ <match value="P4" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.pbm" />
+ </mime-type>
+
+ <mime-type type="image/x-portable-graymap">
+ <sub-class-of type="image/x-portable-anymap" />
+ <acronym>PGM</acronym>
+ <comment>Portable Gray Map</comment>
+ <magic priority="50">
+ <match value="P2" type="string" offset="0" />
+ <match value="P5" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.pgm" />
+ </mime-type>
+
+ <mime-type type="image/x-portable-pixmap">
+ <sub-class-of type="image/x-portable-anymap" />
+ <acronym>PXM</acronym>
+ <comment>Portable Pixel Map</comment>
+ <magic priority="50">
+ <match value="P3" type="string" offset="0" />
+ <match value="P6" type="string" offset="0" />
+ <match value="P7" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.ppm" />
+ </mime-type>
+
+ <mime-type type="image/cgm">
+ <acronym>CGM</acronym>
+ <comment>Computer Graphics Metafile</comment>
+ <magic priority="50">
+ <match value="BEGMF" type="string" offset="0" />
+ <match value="0x0020" mask="0xffe0" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.cgm" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-dng">
+ <acronym>DNG</acronym>
+ <comment>Adobe Digital Negative</comment>
+ <glob pattern="*.dng" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-hasselblad">
+ <comment>Hasselblad raw image</comment>
+ <glob pattern="*.3fr" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-fuji">
+ <comment>Fuji raw image</comment>
+ <glob pattern="*.raf" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-canon">
+ <comment>Canon raw image</comment>
+ <glob pattern="*.crw" />
+ <glob pattern="*.cr2" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-kodak">
+ <comment>Kodak raw image</comment>
+ <glob pattern="*.k25" />
+ <glob pattern="*.kdc" />
+ <glob pattern="*.dcs" />
+ <glob pattern="*.drf" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-minolta">
+ <comment>Minolta raw image</comment>
+ <glob pattern="*.mrw" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-nikon">
+ <comment>Nikon raw image</comment>
+ <glob pattern="*.nef" />
+ <glob pattern="*.nrw" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-olympus">
+ <comment>Olympus raw image</comment>
+ <glob pattern="*.orf" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-pentax">
+ <comment>Pentax raw image</comment>
+ <glob pattern="*.ptx" />
+ <glob pattern="*.pef" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-sony">
+ <comment>Sony raw image</comment>
+ <glob pattern="*.arw" />
+ <glob pattern="*.srf" />
+ <glob pattern="*.sr2" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-sigma">
+ <comment>Sigma raw image</comment>
+ <glob pattern="*.x3f" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-epson">
+ <comment>Epson raw image</comment>
+ <glob pattern="*.erf" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-mamiya">
+ <comment>Mamiya raw image</comment>
+ <glob pattern="*.mef" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-leaf">
+ <comment>Leaf raw image</comment>
+ <glob pattern="*.mos" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-panasonic">
+ <comment>Panasonic raw image</comment>
+ <glob pattern="*.raw" />
+ <glob pattern="*.rw2" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-phaseone">
+ <comment>Phase One raw image</comment>
+ <glob pattern="*.cap" />
+ <glob pattern="*.iiq" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-red">
+ <comment>Red raw image</comment>
+ <glob pattern="*.r3d" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-imacon">
+ <comment>Imacon raw image</comment>
+ <glob pattern="*.fff" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-logitech">
+ <comment>Logitech raw image</comment>
+ <glob pattern="*.pxn" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-casio">
+ <comment>Casio raw image</comment>
+ <glob pattern="*.bay" />
+ </mime-type>
+
+ <mime-type type="image/x-tika-rawzor">
+ <comment>Rawzor raw image</comment>
+ <glob pattern="*.rwz" />
+ </mime-type>
+
+ <mime-type type="message/news">
+ <magic priority="50">
+ <match value="Path:" type="string" offset="0" />
+ <match value="Xref:" type="string" offset="0" />
+ <match value="Article" type="string" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="message/rfc822">
+ <magic priority="50">
+ <match value="Relay-Version:" type="string" offset="0" />
+ <match value="#!\ rnews" type="string" offset="0" />
+ <match value="N#!\ rnews" type="string" offset="0" />
+ <match value="Forward\ to" type="string" offset="0" />
+ <match value="Pipe\ to" type="string" offset="0" />
+ <match value="Return-Path:" type="string" offset="0" />
+ <match value="From:" type="string" offset="0" />
+ <match value="Received:" type="string" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="model/vrml">
+ <glob pattern="*.vrml" />
+ </mime-type>
+ <mime-type type="text/troff">
+ <magic priority="50">
+ <match value=".\\"" type="string" offset="0" />
+ <match value="'\\"" type="string" offset="0" />
+ <match value="'.\\"" type="string" offset="0" />
+ <match value="\\"" type="string" offset="0" />
+ <match value="'''" type="string" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="text/x-diff">
+ <magic priority="50">
+ <match value="diff\ " type="string" offset="0" />
+ <match value="***\ " type="string" offset="0" />
+ <match value="Only\ in\ " type="string" offset="0" />
+ <match value="Common\ subdirectories:\ " type="string" offset="0" />
+ <match value="Index:" type="string" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="video/mpeg">
+ <glob pattern="*.mpg" />
+ <glob pattern="*.mpeg" />
+ </mime-type>
+ <mime-type type="video/quicktime">
+ <magic priority="50">
+ <match value="moov" type="string" offset="4" />
+ <match value="mdat" type="string" offset="4" />
+ <match value="ftyp" type="string" offset="4" />
+ </magic>
+ <glob pattern="*.mov" />
+ </mime-type>
+ <mime-type type="video/x-flc">
+ <glob pattern="*.flc" />
+ </mime-type>
+ <mime-type type="video/x-fli">
+ <glob pattern="*.fli" />
+ </mime-type>
+ <mime-type type="video/x-flv">
+ <magic priority="50">
+ <match value="FLV" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.flv" />
+ </mime-type>
+ <mime-type type="video/x-jng">
+ <magic priority="50">
+ <match value="\x8bJNG" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.jng" />
+ </mime-type>
+ <mime-type type="video/x-mng">
+ <magic priority="50">
+ <match value="\x8aMNG" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.mng" />
+ </mime-type>
+ <mime-type type="video/x-msvideo">
+ <magic priority="50">
+ <match value="RIFF....AVI " type="string" offset="0"
+ mask="0xFFFFFFFF00000000FFFFFFFF" />
+ </magic>
+ <glob pattern="*.avi" />
+ <alias type="video/avi" />
+ <alias type="video/msvideo" />
+ </mime-type>
+ <mime-type type="video/x-sgi-movie">
+ <magic priority="50">
+ <match value="MOVI" type="string" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-Berkeley-DB">
+ <magic priority="50">
+ <match value="0x00061561" type="big32" offset="0" />
+ <match value="0x00061561" type="host32" offset="12" />
+ <match value="0x00061561" type="big32" offset="12" />
+ <match value="0x00061561" type="little32" offset="12" />
+ <match value="0x00053162" type="host32" offset="12" />
+ <match value="0x00053162" type="big32" offset="12" />
+ <match value="0x00053162" type="little32" offset="12" />
+ <match value="0x00042253" type="host32" offset="12" />
+ <match value="0x00042253" type="big32" offset="12" />
+ <match value="0x00042253" type="little32" offset="12" />
+ <match value="0x00040988" type="host32" offset="12" />
+ <match value="0x00040988" type="little32" offset="12" />
+ <match value="0x00040988" type="big32" offset="12" />
+ <match value="0x00053162" type="host32" offset="0" />
+ <match value="0x00053162" type="big32" offset="0" />
+ <match value="0x00053162" type="little32" offset="0" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-BibTeX-text-file">
+ <magic priority="50">
+ <match value="%\ BibTeX\ `" type="string" offset="0" />
+ <match value="%%%\ \ " type="string" offset="73" />
+ <match value="%\ BibTeX\ standard\ bibliography\ " type="string" offset="0" />
+ <match value="%%%\ \ @BibTeX-style-file{" type="string" offset="73" />
+ <match value="@article{" type="string" offset="0" />
+ <match value="@book{" type="string" offset="0" />
+ <match value="@inbook{" type="string" offset="0" />
+ <match value="@incollection{" type="string" offset="0" />
+ <match value="@inproceedings{" type="string" offset="0" />
+ <match value="@manual{" type="string" offset="0" />
+ <match value="@misc{" type="string" offset="0" />
+ <match value="@preamble{" type="string" offset="0" />
+ <match value="@phdthesis{" type="string" offset="0" />
+ <match value="@techreport{" type="string" offset="0" />
+ <match value="@unpublished{" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.bib" />
+ <glob pattern="*.bibtex" />
+ </mime-type>
+ <mime-type type="application/x-BinHex-binary-text">
+ <magic priority="50">
+ <match value="must\ be\ converted\ with\ BinHex" type="string" offset="11" />
+ </magic>
+ </mime-type>
+ <mime-type type="application/x-Gnumeric-spreadsheet">
+ <magic priority="50">
+ <match value="=<gmr:Workbook" type="string" offset="39" />
+ </magic>
+ <glob pattern="*.gnumeric" />
+ </mime-type>
</mime-info>
Added: lucene/nutch/trunk/lib/tika-core-0.5.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/tika-core-0.5.jar?rev=885869&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/lib/tika-core-0.5.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java?rev=885869&r1=885868&r2=885869&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java Tue Dec 1 19:01:58 2009
@@ -19,6 +19,7 @@
// JDK imports
import java.io.File;
+import java.io.IOException;
import java.util.logging.Logger;
// Hadoop imports
@@ -59,8 +60,13 @@
MimeTypes mimeTypez = (MimeTypes) objectCache.getObject(MimeTypes.class
.getName());
if (mimeTypez == null) {
- mimeTypez = MimeTypesFactory.create(conf
- .getConfResourceAsInputStream(conf.get("mime.types.file")));
+ try {
+ mimeTypez = MimeTypesFactory.create(conf
+ .getConfResourceAsInputStream(conf.get("mime.types.file")));
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new RuntimeException(e);
+ }
objectCache.setObject(MimeTypes.class.getName(), mimeTypez);
}
@@ -139,7 +145,7 @@
// if returned null, or if it's the default type then try url resolution
if (type == null
- || (type != null && type.getName().equals(MimeTypes.DEFAULT))) {
+ || (type != null && type.getName().equals(MimeTypes.OCTET_STREAM))) {
// If no mime-type header, or cannot find a corresponding registered
// mime-type, then guess a mime-type from the url pattern
type = this.mimeTypes.getMimeType(url) != null ? this.mimeTypes
@@ -152,7 +158,7 @@
// returned by the magic
if (this.mimeMagic) {
MimeType magicType = this.mimeTypes.getMimeType(data);
- if (magicType != null && !magicType.getName().equals(MimeTypes.DEFAULT)
+ if (magicType != null && !magicType.getName().equals(MimeTypes.OCTET_STREAM)
&& type != null && !type.getName().equals(magicType.getName())) {
// If magic enabled and the current mime type differs from that of the
// one returned from the magic, take the magic mimeType
@@ -163,7 +169,7 @@
// default type
if (type == null) {
try {
- type = this.mimeTypes.forName(MimeTypes.DEFAULT);
+ type = this.mimeTypes.forName(MimeTypes.OCTET_STREAM);
} catch (Exception ignore) {
}
}