You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2009/12/01 20:01:58 UTC

svn commit: r885869 - in /lucene/nutch/trunk: CHANGES.txt conf/tika-mimetypes.xml lib/tika-0.1-incubating.jar lib/tika-core-0.5.jar src/java/org/apache/nutch/util/MimeUtil.java

Author: ab
Date: Tue Dec  1 19:01:58 2009
New Revision: 885869

URL: http://svn.apache.org/viewvc?rev=885869&view=rev
Log:
NUTCH-767 Update Tika to v0.5  for the MimeType detection.

Added:
    lucene/nutch/trunk/lib/tika-core-0.5.jar   (with props)
Removed:
    lucene/nutch/trunk/lib/tika-0.1-incubating.jar
Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/conf/tika-mimetypes.xml
    lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=885869&r1=885868&r2=885869&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Tue Dec  1 19:01:58 2009
@@ -2,6 +2,8 @@
 
 Unreleased Changes
 
+* NUTCH-767 Update Tika to v0.5 for the MimeType detection (Julien Nioche via ab)
+
 * NUTCH-769 Fetcher to skip queues for URLS getting repeated exceptions
   (Julien Nioche via ab)
 

Modified: lucene/nutch/trunk/conf/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/conf/tika-mimetypes.xml?rev=885869&r1=885868&r2=885869&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/tika-mimetypes.xml (original)
+++ lucene/nutch/trunk/conf/tika-mimetypes.xml Tue Dec  1 19:01:58 2009
@@ -1,367 +1,1542 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-	Licensed to the Apache Software Foundation (ASF) under one or more
-	contributor license agreements.  See the NOTICE file distributed with
-	this work for additional information regarding copyright ownership.
-	The ASF licenses this file to You under the Apache License, Version 2.0
-	(the "License"); you may not use this file except in compliance with
-	the License.  You may obtain a copy of the License at
-	
-	http://www.apache.org/licenses/LICENSE-2.0
-	
-	Unless required by applicable law or agreed to in writing, software
-	distributed under the License is distributed on an "AS IS" BASIS,
-	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	See the License for the specific language governing permissions and
-	limitations under the License.
-	
-	Description: This xml file defines the valid mime types used by Tika.
-	The mime types within this file are based on the types in the mime-types.xml 
-	file available in Apache Nutch.
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!--
+  Description: This xml file defines the valid mime types used by Tika.
+  The mime types within this file are based on the types in the mime-types.xml
+  file available in Apache Nutch.
 -->
-
 <mime-info>
 
-	<mime-type type="text/plain">
-		<magic priority="50">
-			<match value="This is TeX," type="string" offset="0" />
-			<match value="This is METAFONT," type="string" offset="0" />
-		</magic>
-		<glob pattern="*.txt" />
-		<glob pattern="*.asc" />
-	</mime-type>
-
-	<mime-type type="text/html">
-		<magic priority="50">
-			<match value="&lt;!DOCTYPE HTML" type="string"
-				offset="0:64" />
-			<match value="&lt;!doctype html" type="string"
-				offset="0:64" />
-			<match value="&lt;HEAD" type="string" offset="0:64" />
-			<match value="&lt;head" type="string" offset="0:64" />
-			<match value="&lt;TITLE" type="string" offset="0:64" />
-			<match value="&lt;title" type="string" offset="0:64" />
-			<match value="&lt;html" type="string" offset="0:64" />
-			<match value="&lt;HTML" type="string" offset="0:64" />
-			<match value="&lt;BODY" type="string" offset="0" />
-			<match value="&lt;body" type="string" offset="0" />
-			<match value="&lt;TITLE" type="string" offset="0" />
-			<match value="&lt;title" type="string" offset="0" />
-			<match value="&lt;!--" type="string" offset="0" />
-			<match value="&lt;h1" type="string" offset="0" />
-			<match value="&lt;H1" type="string" offset="0" />
-			<match value="&lt;!doctype HTML" type="string" offset="0" />
-			<match value="&lt;!DOCTYPE html" type="string" offset="0" />
-		</magic>
-		<glob pattern="*.html" />
-		<glob pattern="*.htm" />
-	</mime-type>
-
-	<mime-type type="application/xhtml+xml">
-		<glob pattern="*.xhtml" />
-		<root-XML namespaceURI='http://www.w3.org/1999/xhtml'
-			localName='html' />
-	</mime-type>
-
-	<mime-type type="application/vnd.ms-powerpoint">
-		<glob pattern="*.ppz" />
-		<glob pattern="*.ppt" />
-		<glob pattern="*.pps" />
-		<glob pattern="*.pot" />
-		<magic priority="50">
-			<match value="0xcfd0e011" type="little32" offset="0" />
-		</magic>
-	</mime-type>
-
-	<mime-type type="application/vnd.ms-excel">
-		<magic priority="50">
-			<match value="Microsoft Excel 5.0 Worksheet" type="string"
-				offset="2080" />
-		</magic>
-		<glob pattern="*.xls" />
-		<glob pattern="*.xlc" />
-		<glob pattern="*.xll" />
-		<glob pattern="*.xlm" />
-		<glob pattern="*.xlw" />
-		<glob pattern="*.xla" />
-		<glob pattern="*.xlt" />
-		<glob pattern="*.xld" />
-		<alias type="application/msexcel" />
-	</mime-type>
-
-	<mime-type type="application/vnd.oasis.opendocument.text">
-		<glob pattern="*.odt" />
-	</mime-type>
-
-
-	<mime-type type="application/zip">
-		<alias type="application/x-zip-compressed" />
-		<magic priority="40">
-			<match value="PK\003\004" type="string" offset="0" />
-		</magic>
-		<glob pattern="*.zip" />
-	</mime-type>
-
-	<mime-type type="application/vnd.oasis.opendocument.text">
-		<glob pattern="*.oth" />
-	</mime-type>
-
-	<mime-type type="application/msword">
-		<magic priority="50">
-			<match value="\x31\xbe\x00\x00" type="string" offset="0" />
-			<match value="PO^Q`" type="string" offset="0" />
-			<match value="\376\067\0\043" type="string" offset="0" />
-			<match value="\333\245-\0\0\0" type="string" offset="0" />
-			<match value="Microsoft Word 6.0 Document" type="string"
-				offset="2080" />
-			<match value="Microsoft Word document data" type="string"
-				offset="2112" />
-		</magic>
-		<glob pattern="*.doc" />
-		<alias type="application/vnd.ms-word" />
-	</mime-type>
-
-	<mime-type type="application/octet-stream">
-		<magic priority="50">
-			<match value="\037\036" type="string" offset="0" />
-			<match value="017437" type="host16" offset="0" />
-			<match value="0x1fff" type="host16" offset="0" />
-			<match value="\377\037" type="string" offset="0" />
-			<match value="0145405" type="host16" offset="0" />
-		</magic>
-		<glob pattern="*.bin" />
-	</mime-type>
-
-	<mime-type type="application/pdf">
-		<magic priority="50">
-			<match value="%PDF-" type="string" offset="0" />
-		</magic>
-		<glob pattern="*.pdf" />
-		<alias type="application/x-pdf" />
-	</mime-type>
-
-	<mime-type type="application/atom+xml">
-		<root-XML localName="feed"
-			namespaceURI="http://purl.org/atom/ns#" />
-	</mime-type>
-
-	<mime-type type="application/mac-binhex40">
-		<glob pattern="*.hqx" />
-	</mime-type>
-
-	<mime-type type="application/mac-compactpro">
-		<glob pattern="*.cpt" />
-	</mime-type>
-
-	<mime-type type="application/rtf">
-	    <glob pattern="*.rtf"/>
-		<alias type="text/rtf" />
-	</mime-type>
-
-	<mime-type type="application/rss+xml">
-		<alias type="text/rss" />
-		<root-XML localName="rss" />
-		<root-XML namespaceURI="http://purl.org/rss/1.0/" />
-		<glob pattern="*.rss" />
-	</mime-type>
-
-	<!--  added in by mattmann -->
-	<mime-type type="application/xml">
-		<alias type="text/xml" />
-		<glob pattern="*.xml" />
-	</mime-type>
-
-	<mime-type type="application/x-mif">
-		<alias type="application/vnd.mif" />
-	</mime-type>
-
-	<mime-type type="application/vnd.wap.wbxml">
-		<glob pattern="*.wbxml" />
-	</mime-type>
-
-	<mime-type type="application/vnd.wap.wmlc">
-		<_comment>Compiled WML Document</_comment>
-		<glob pattern="*.wmlc" />
-	</mime-type>
-
-	<mime-type type="application/vnd.wap.wmlscriptc">
-		<_comment>Compiled WML Script</_comment>
-		<glob pattern="*.wmlsc" />
-	</mime-type>
-
-	<mime-type type="text/vnd.wap.wmlscript">
-		<_comment>WML Script</_comment>
-		<glob pattern="*.wmls" />
-	</mime-type>
-
-	<mime-type type="application/x-bzip">
-		<alias type="application/x-bzip2" />
-	</mime-type>
-
-	<mime-type type="application/x-bzip-compressed-tar">
-		<glob pattern="*.tbz" />
-		<glob pattern="*.tbz2" />
-	</mime-type>
-
-	<mime-type type="application/x-cdlink">
-		<_comment>Virtual CD-ROM CD Image File</_comment>
-		<glob pattern="*.vcd" />
-	</mime-type>
-
-	<mime-type type="application/x-director">
-		<_comment>Shockwave Movie</_comment>
-		<glob pattern="*.dcr" />
-		<glob pattern="*.dir" />
-		<glob pattern="*.dxr" />
-	</mime-type>
-
-	<mime-type type="application/x-futuresplash">
-		<_comment>Macromedia FutureSplash File</_comment>
-		<glob pattern="*.spl" />
-	</mime-type>
-
-	<mime-type type="application/x-java">
-		<alias type="application/java" />
-	</mime-type>
-
-	<mime-type type="application/x-koan">
-		<_comment>SSEYO Koan File</_comment>
-		<glob pattern="*.skp" />
-		<glob pattern="*.skd" />
-		<glob pattern="*.skt" />
-		<glob pattern="*.skm" />
-	</mime-type>
-
-	<mime-type type="application/x-latex">
-		<_comment>LaTeX Source Document</_comment>
-		<glob pattern="*.latex" />
-	</mime-type>
-
-	<!-- JC CHANGED
-		<mime-type type="application/x-mif">
-		<_comment>FrameMaker MIF document</_comment>
-		<glob pattern="*.mif"/>
-		</mime-type> -->
-
-	<mime-type type="application/x-ms-dos-executable">
-		<alias type="application/x-dosexec" />
-	</mime-type>
-
-	<mime-type type="application/ogg">
-		<alias type="application/x-ogg" />
-	</mime-type>
-
-	<mime-type type="application/x-rar">
-		<alias type="application/x-rar-compressed" />
-	</mime-type>
-
-	<mime-type type="application/x-shellscript">
-		<alias type="application/x-sh" />
-	</mime-type>
-
-	<mime-type type="application/xhtml+xml">
-		<glob pattern="*.xht" />
-	</mime-type>
-
-	<mime-type type="audio/midi">
-		<glob pattern="*.kar" />
-	</mime-type>
-
-	<mime-type type="audio/x-pn-realaudio">
-		<alias type="audio/x-realaudio" />
-	</mime-type>
-
-	<mime-type type="image/tiff">
-		<magic priority="50">
-			<match value="0x4d4d2a00" type="string" offset="0" />
-			<match value="0x49492a00" type="string" offset="0" />
-		</magic>
-	</mime-type>
-
-	<mime-type type="message/rfc822">
-		<magic priority="50">
-			<match type="string" value="Relay-Version:" offset="0" />
-			<match type="string" value="#! rnews" offset="0" />
-			<match type="string" value="N#! rnews" offset="0" />
-			<match type="string" value="Forward to" offset="0" />
-			<match type="string" value="Pipe to" offset="0" />
-			<match type="string" value="Return-Path:" offset="0" />
-			<match type="string" value="From:" offset="0" />
-			<match type="string" value="Message-ID:" offset="0" />
-			<match type="string" value="Date:" offset="0" />
-		</magic>
-	</mime-type>
-	
-	<mime-type type="application/x-javascript">
-        <glob pattern="*.js" />
-    </mime-type>
-    
-
-	<mime-type type="image/vnd.wap.wbmp">
-		<_comment>Wireless Bitmap File Format</_comment>
-		<glob pattern="*.wbmp" />
-	</mime-type>
-
-	<mime-type type="image/x-psd">
-		<alias type="image/photoshop" />
-	</mime-type>
-
-	<mime-type type="image/x-xcf">
-		<alias type="image/xcf" />
-		<magic priority="50">
-			<match type="string" value="gimp xcf " offset="0" />
-		</magic>
-	</mime-type>
-	
-	<mime-type type="application/x-shockwave-flash">
-      <glob pattern="*.swf"/>
-      <magic priority="50">
-        <match type="string" value="FWS" offset="0"/>
-        <match type="string" value="CWS" offset="0"/>
-      </magic>
-    </mime-type>
-
-	<mime-type type="model/iges">
-		<_comment>
-			Initial Graphics Exchange Specification Format
-		</_comment>
-		<glob pattern="*.igs" />
-		<glob pattern="*.iges" />
-	</mime-type>
-
-	<mime-type type="model/mesh">
-		<glob pattern="*.msh" />
-		<glob pattern="*.mesh" />
-		<glob pattern="*.silo" />
-	</mime-type>
-
-	<mime-type type="model/vrml">
-		<glob pattern="*.vrml" />
-	</mime-type>
-
-	<mime-type type="text/x-tcl">
-		<alias type="application/x-tcl" />
-	</mime-type>
-
-	<mime-type type="text/x-tex">
-		<alias type="application/x-tex" />
-	</mime-type>
-
-	<mime-type type="text/x-texinfo">
-		<alias type="application/x-texinfo" />
-	</mime-type>
-
-	<mime-type type="text/x-troff-me">
-		<alias type="application/x-troff-me" />
-	</mime-type>
-
-	<mime-type type="video/vnd.mpegurl">
-		<glob pattern="*.mxu" />
-	</mime-type>
-
-	<mime-type type="x-conference/x-cooltalk">
-		<_comment>Cooltalk Audio</_comment>
-		<glob pattern="*.ice" />
-	</mime-type>
+  <mime-type type="text/plain">
+    <magic priority="20">
+      <match value="This is TeX," type="string" offset="0" />
+      <match value="This is METAFONT," type="string" offset="0" />
+      <match value="#!/" type="string" offset="0" />
+      <match value="#!\ /" type="string" offset="0" />
+      <match value="#!\t/" type="string" offset="0" />
+      <!-- UTF-16BE BOM -->
+      <match value="0xfeff" type="string" offset="0"/>
+      <!-- UTF-16LE BOM -->
+      <match value="0xfffe" type="string" offset="0"/>
+      <!-- UTF-8 BOM -->
+      <match value="0xefbbbf" type="string" offset="0"/>
+    </magic>
+    <glob pattern="*.txt" />
+    <glob pattern="*.asc" />
+
+    <!-- TIKA-85: http://www.apache.org/dev/svn-eol-style.txt -->
+    <glob pattern="INSTALL" />
+    <glob pattern="KEYS" />
+    <glob pattern="Makefile" />
+    <glob pattern="README" />
+    <glob pattern="abs-linkmap" />
+    <glob pattern="abs-menulinks" />
+    <glob pattern="*.aart" />
+    <glob pattern="*.ac" />
+    <glob pattern="*.am" />
+    <glob pattern="*.bat" />
+    <glob pattern="*.c" />
+    <glob pattern="*.cat" />
+    <glob pattern="*.cgi" />
+    <glob pattern="*.classpath" />
+    <glob pattern="*.cmd" />
+    <glob pattern="*.conf" />
+    <glob pattern="*.config" />
+    <glob pattern="*.cpp" />
+    <glob pattern="*.css" />
+    <glob pattern="*.cwiki" />
+    <glob pattern="*.data" />
+    <glob pattern="*.dcl" />
+    <glob pattern="*.dtd" />
+    <glob pattern="*.egrm" />
+    <glob pattern="*.ent" />
+    <glob pattern="*.ft" />
+    <glob pattern="*.fn" />
+    <glob pattern="*.fv" />
+    <glob pattern="*.grm" />
+    <glob pattern="*.g" />
+    <glob pattern="*.h" />
+    <glob pattern=".htaccess" />
+    <glob pattern="*.ihtml" />
+    <glob pattern="*.in" />
+    <glob pattern="*.java" />
+    <glob pattern="*.jmx" />
+    <glob pattern="*.jsp" />
+    <glob pattern="*.js" />
+    <glob pattern="*.junit" />
+    <glob pattern="*.jx" />
+    <glob pattern="*.manifest" />
+    <glob pattern="*.m4" />
+    <glob pattern="*.mf" />
+    <glob pattern="*.MF" />
+    <glob pattern="*.meta" />
+    <glob pattern="*.n3" />
+    <glob pattern="*.pen" />
+    <glob pattern="*.pl" />
+    <glob pattern="*.pm" />
+    <glob pattern="*.pod" />
+    <glob pattern="*.pom" />
+    <glob pattern="*.project" />
+    <glob pattern="*.properties" />
+    <glob pattern="*.py" />
+    <glob pattern="*.rb" />
+    <glob pattern="*.rdf" />
+    <glob pattern="*.rnc" />
+    <glob pattern="*.rng" />
+    <glob pattern="*.rnx" />
+    <glob pattern="*.roles" />
+    <glob pattern="*.sh" />
+    <glob pattern="*.sql" />
+    <glob pattern="*.tld" />
+    <glob pattern="*.types" />
+    <glob pattern="*.vm" />
+    <glob pattern="*.vsl" />
+    <glob pattern="*.wsdd" />
+    <glob pattern="*.wsdl" />
+    <glob pattern="*.xargs" />
+    <glob pattern="*.xcat" />
+    <glob pattern="*.xconf" />
+    <glob pattern="*.xegrm" />
+    <glob pattern="*.xgrm" />
+    <glob pattern="*.xlex" />
+    <glob pattern="*.xlog" />
+    <glob pattern="*.xmap" />
+    <glob pattern="*.xroles" />
+    <glob pattern="*.xsamples" />
+    <glob pattern="*.xsp" />
+    <glob pattern="*.xul" />
+    <glob pattern="*.xweb" />
+    <glob pattern="*.xwelcome" />
+  </mime-type>
+
+  <mime-type type="text/html">
+    <magic priority="50">
+      <match value="&lt;!DOCTYPE HTML" type="string" offset="0:64" />
+      <match value="&lt;!doctype html" type="string" offset="0:64" />
+      <match value="&lt;HEAD" type="string" offset="0:64" />
+      <match value="&lt;head" type="string" offset="0:64" />
+      <match value="&lt;TITLE" type="string" offset="0:64" />
+      <match value="&lt;title" type="string" offset="0:64" />
+      <match value="&lt;html" type="string" offset="0:64" />
+      <match value="&lt;HTML" type="string" offset="0:64" />
+      <match value="&lt;BODY" type="string" offset="0" />
+      <match value="&lt;body" type="string" offset="0" />
+      <match value="&lt;TITLE" type="string" offset="0" />
+      <match value="&lt;title" type="string" offset="0" />
+      <match value="&lt;!--" type="string" offset="0" />
+      <match value="&lt;h1" type="string" offset="0" />
+      <match value="&lt;H1" type="string" offset="0" />
+      <match value="&lt;!doctype HTML" type="string" offset="0" />
+      <match value="&lt;!DOCTYPE html" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.html" />
+    <glob pattern="*.htm" />
+  </mime-type>
+
+  <mime-type type="application/xhtml+xml">
+    <sub-class-of type="application/xml" />
+    <glob pattern="*.xhtml" />
+    <glob pattern="*.xht" />
+    <root-XML namespaceURI="http://www.w3.org/1999/xhtml" localName="html" />
+  </mime-type>
+
+  <!-- ===================================================================== -->
+  <!-- Microsoft Office binary file formats                                  -->
+  <!-- http://www.microsoft.com/interop/docs/OfficeBinaryFormats.mspx        -->
+  <!-- ===================================================================== -->
+
+  <mime-type type="application/x-tika-msoffice">
+    <magic>
+      <match value="0xd0cf11e0a1b11ae1" type="string" offset="0:8" />
+    </magic>
+  </mime-type>
+
+  <!-- http://www.iana.org/assignments/media-types/application/vnd.visio -->
+  <mime-type type="application/vnd.visio">
+    <comment>Microsoft Visio Diagram</comment>
+    <glob pattern="*.vsd" />
+    <glob pattern="*.vst" />
+    <glob pattern="*.vsw" />
+    <glob pattern="*.vss" />
+    <sub-class-of type="application/x-tika-msoffice"/>
+  </mime-type>
+
+  <!-- http://www.iana.org/assignments/media-types/application/vnd.ms-powerpoint -->
+  <mime-type type="application/vnd.ms-powerpoint">
+    <comment>Microsoft Powerpoint Presentation</comment>
+    <glob pattern="*.ppz" />
+    <glob pattern="*.ppt" />
+    <glob pattern="*.pps" />
+    <glob pattern="*.pot" />
+    <glob pattern="*.ppa" />
+    <alias type="application/mspowerpoint" />
+    <sub-class-of type="application/x-tika-msoffice"/>
+  </mime-type>
+
+  <!-- http://www.iana.org/assignments/media-types/application/vnd.ms-excel -->
+  <mime-type type="application/vnd.ms-excel">
+    <comment>Microsoft Excel Spreadsheet</comment>
+    <magic priority="50">
+      <match value="Microsoft\ Excel\ 5.0\ Worksheet" type="string" offset="2080" />
+      <match value="Foglio\ di\ lavoro\ Microsoft\ Exce" type="string" offset="2080" />
+      <match value="Biff5" type="string" offset="2114" />
+      <match value="Biff5" type="string" offset="2121" />
+      <match value="\x09\x04\x06\x00\x00\x00\x10\x00" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.xls" />
+    <glob pattern="*.xlc" />
+    <glob pattern="*.xll" />
+    <glob pattern="*.xlm" />
+    <glob pattern="*.xlw" />
+    <glob pattern="*.xla" />
+    <glob pattern="*.xlt" />
+    <glob pattern="*.xld" />
+    <alias type="application/msexcel" />
+    <sub-class-of type="application/x-tika-msoffice"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.ms-excel.sheet.binary.macroenabled.12">
+    <comment>Microsoft Excel 2007 Binary Spreadsheet</comment>
+    <glob pattern="*.xlsb"/>
+    <sub-class-of type="application/vnd.ms-excel"/>
+  </mime-type>
+
+  <!-- http://www.iana.org/assignments/media-types/application/msword -->
+  <mime-type type="application/msword">
+    <comment>Microsoft Word Document</comment>
+    <magic priority="50">
+      <match value="Microsoft\ Word\ 6.0\ Document" type="string" offset="2080" />
+      <match value="Documento\ Microsoft\ Word\ 6" type="string" offset="2080" />
+      <match value="MSWordDoc" type="string" offset="2112" />
+      <match value="0x31be0000" type="big32" offset="0" />
+      <match value="PO^Q`" type="string" offset="0" />
+      <match value="\376\067\0\043" type="string" offset="0" />
+      <match value="\333\245-\0\0\0" type="string" offset="0" />
+      <match value="\354\245\301" type="string" offset="512" />
+      <match value="\320\317\021\340\241\261\032\341" type="string" offset="0" />
+      <match value="\224\246\056" type="string" offset="0" />
+      <match value="R\0o\0o\0t\0\ \0E\0n\0t\0r\0y" type="string" offset="512" />
+    </magic>
+    <glob pattern="*.doc" />
+    <glob pattern="*.dot" />
+    <alias type="application/vnd.ms-word" />
+    <sub-class-of type="application/x-tika-msoffice"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.ms-outlook">
+    <comment>Microsoft Outlook Message</comment>
+    <glob pattern="*.msg" />
+    <sub-class-of type="application/x-tika-msoffice"/>
+  </mime-type>
+
+  <!-- ===================================================================== -->
+  <!-- Office Open XML file formats                                          -->
+  <!-- http://www.ecma-international.org/publications/standards/Ecma-376.htm -->
+  <!-- ===================================================================== -->
+
+  <mime-type type="application/x-tika-ooxml">
+    <sub-class-of type="application/zip"/>
+    <magic priority="50">
+      <match value="PK\003\004" type="string" offset="0">
+        <match value="[Content_Types].xml" type="string" offset="30"/>
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.openxmlformats-officedocument.presentationml.presentation">
+    <comment>Office Open XML Presentation</comment>
+    <glob pattern="*.pptx"/>
+    <glob pattern="*.sldx"/>
+    <glob pattern="*.thmx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.ms-powerpoint.presentation.macroenabled.12">
+    <comment>Office Open XML Presentation (macro-enabled)</comment>
+    <glob pattern="*.pptm"/>
+    <glob pattern="*.potm"/>
+    <glob pattern="*.sldm"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.openxmlformats-officedocument.presentationml.template">
+    <comment>Office Open XML Presentation Template</comment>
+    <glob pattern="*.potx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.openxmlformats-officedocument.presentationml.slideshow">
+    <comment>Office Open XML Presentation Slideshow</comment>
+    <glob pattern="*.ppsx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.ms-powerpoint.slideshow.macroenabled.12">
+    <comment>Office Open XML Presentation Slideshow (macro-enabled)</comment>
+    <glob pattern="*.ppsm"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.ms-powerpoint.addin.macroenabled.12">
+    <comment>Office Open XML Presentation Add-in (macro-enabled)</comment>
+    <glob pattern="*.ppam"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet">
+    <comment>Office Open XML Workbook</comment>
+    <glob pattern="*.xlsx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.ms-excel.sheet.macroenabled.12">
+    <comment>Office Open XML Workbook (macro-enabled)</comment>
+    <glob pattern="*.xlsm"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.openxmlformats-officedocument.spreadsheetml.template">
+    <comment>Office Open XML Workbook Template</comment>
+    <glob pattern="*.xltx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.ms-excel.template.macroenabled.12">
+    <comment>Office Open XML Workbook Template (macro-enabled)</comment>
+    <glob pattern="*.xltm"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.ms-excel.addin.macroenabled.12">
+    <comment>Office Open XML Workbook Add-in (macro-enabled)</comment>
+    <glob pattern="*.xlam"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.openxmlformats-officedocument.wordprocessingml.document">
+    <comment>Office Open XML Document</comment>
+    <glob pattern="*.docx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.ms-word.document.macroenabled.12">
+    <comment>Office Open XML Document (macro-enabled)</comment>
+    <glob pattern="*.docm"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.openxmlformats-officedocument.wordprocessingml.template">
+    <comment>Office Open XML Document Template</comment>
+    <glob pattern="*.dotx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.ms-word.template.macroenabled.12">
+    <comment>Office Open XML Document Template (macro-enabled)</comment>
+    <glob pattern="*.dotm"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <!-- ===================================================================== -->
+  <!-- Open Document Format for Office Applications (OpenDocument) v1.0      -->
+  <!-- http://www.oasis-open.org/specs/index.php#opendocumentv1.0            -->
+  <!-- ===================================================================== -->
+
+  <mime-type type="application/vnd.oasis.opendocument.text">
+    <comment>OpenDocument v1.0: Text document</comment>
+    <alias type="application/x-vnd.oasis.opendocument.text" />
+    <glob pattern="*.odt" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.text" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.text-template">
+    <comment>OpenDocument v1.0: Text document used as template</comment>
+    <alias type="application/x-vnd.oasis.opendocument.text-template" />
+    <glob pattern="*.ott" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.text-template" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.graphics">
+    <comment>OpenDocument v1.0: Graphics document (Drawing)</comment>
+    <alias type="application/x-vnd.oasis.opendocument.graphics" />
+    <glob pattern="*.odg" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.graphics" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.graphics-template">
+    <comment>OpenDocument v1.0: Graphics document used as template</comment>
+    <alias type="application/x-vnd.oasis.opendocument.graphics-template" />
+    <glob pattern="*.otg" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.graphics-template" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.presentation">
+    <comment>OpenDocument v1.0: Presentation document</comment>
+    <alias type="application/x-vnd.oasis.opendocument.presentation" />
+    <glob pattern="*.odp" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.presentation" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.presentation-template">
+    <comment>OpenDocument v1.0: Presentation document used as template</comment>
+    <alias type="application/x-vnd.oasis.opendocument.presentation-template" />
+    <glob pattern="*.otp" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.presentation-template" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.spreadsheet">
+    <comment>OpenDocument v1.0: Spreadsheet document</comment>
+    <alias type="application/x-vnd.oasis.opendocument.spreadsheet" />
+    <glob pattern="*.ods" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.spreadsheet" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.spreadsheet-template">
+    <comment>OpenDocument v1.0: Spreadsheet document used as template</comment>
+    <alias type="application/x-vnd.oasis.opendocument.spreadsheet-template" />
+    <glob pattern="*.ots" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.spreadsheet-template" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.chart">
+    <comment>OpenDocument v1.0: Chart document</comment>
+    <alias type="application/x-vnd.oasis.opendocument.chart" />
+    <glob pattern="*.odc" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.chart" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.chart-template">
+    <comment>OpenDocument v1.0: Chart document used as template</comment>
+    <alias type="application/x-vnd.oasis.opendocument.chart-template" />
+    <glob pattern="*.otc" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.chart-template" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.image">
+    <comment>OpenDocument v1.0: Image document</comment>
+    <alias type="application/x-vnd.oasis.opendocument.image" />
+    <glob pattern="*.odi" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.image" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.image-template">
+    <comment>OpenDocument v1.0: Image document used as template</comment>
+    <alias type="application/x-vnd.oasis.opendocument.image-template" />
+    <glob pattern="*.oti" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.image-template" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.formula">
+    <comment>OpenDocument v1.0: Formula document</comment>
+    <alias type="application/x-vnd.oasis.opendocument.formula" />
+    <glob pattern="*.odf" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.formula" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.formula-template">
+    <comment>OpenDocument v1.0: Formula document used as template</comment>
+    <alias type="application/x-vnd.oasis.opendocument.formula-template" />
+    <glob pattern="*.otf" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.formula-template" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.text-master">
+    <comment>OpenDocument v1.0: Global Text document</comment>
+    <alias type="application/x-vnd.oasis.opendocument.text-master" />
+    <glob pattern="*.odm" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.text-master" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.oasis.opendocument.text-web">
+    <comment>
+      OpenDocument v1.0: Text document used as template for HTML documents
+    </comment>
+    <alias type="application/x-vnd.oasis.opendocument.text-web" />
+    <glob pattern="*.oth" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.oasis.opendocument.text-web" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/vnd.sun.xml.writer">
+    <comment>
+      OpenOffice v1.0: Writer Document
+    </comment>
+    <alias type="application/x-vnd.sun.xml.writer" />
+    <glob pattern="*.sxw" />
+    <magic>
+      <match type="string" offset="0" value="PK">
+        <match type="string" offset="30"
+          value="mimetypeapplication/vnd.sun.xml.writer" />
+      </match>
+    </magic>
+  </mime-type>
+
+  <mime-type type="application/zip">
+    <alias type="application/x-zip-compressed" />
+    <magic priority="40">
+      <match value="PK\003\004" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.zip" />
+  </mime-type>
+
+  <mime-type type="application/x-tar">
+    <magic priority="40">
+      <!-- POSIX tar archive -->
+      <match value="ustar\0" type="string" offset="257" />
+      <!-- GNU tar archive -->
+      <match value="ustar  \0" type="string" offset="257" />
+    </magic>
+    <glob pattern="*.tar" />
+  </mime-type>
+
+  <mime-type type="application/x-gzip">
+    <magic priority="40">
+      <match value="\037\213" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.tgz" />
+    <glob pattern="*.gz" />
+    <glob pattern="*-gz" />
+    <glob pattern="*.svgz" />
+    <glob pattern="*.wmz" />
+    <glob pattern="*.emz" />
+  </mime-type>
+
+  <mime-type type="application/x-bzip">
+    <alias type="application/x-bzip2" />
+    <magic priority="40">
+      <match value="BZh" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.bz" />
+    <glob pattern="*.bz2" />
+    <glob pattern="*.tbz" />
+    <glob pattern="*.tbz2" />
+  </mime-type>
+
+  <mime-type type="application/x-tika-java-class">
+    <magic priority="40">
+      <match value="0xcafebabe" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.class" />
+  </mime-type>
+
+  <mime-type type="application/octet-stream">
+    <magic priority="50">
+      <match value="#\ This\ is\ a\ shell\ archive" type="string" offset="10" />
+      <match value="\037\036" type="string" offset="0" />
+      <match value="017437" type="host16" offset="0" />
+      <match value="0x1fff" type="host16" offset="0" />
+      <match value="\377\037" type="string" offset="0" />
+      <match value="0145405" type="host16" offset="0" />
+    </magic>
+    <glob pattern="*.bin" />
+  </mime-type>
+
+  <mime-type type="application/pdf">
+    <acronym>PDF</acronym>
+    <comment>Portable Document Format</comment>
+    <magic priority="50">
+      <match value="%PDF-" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.pdf" />
+    <alias type="application/x-pdf" />
+  </mime-type>
+
+  <mime-type type="application/x-shockwave-flash">
+    <acronym>Flash</acronym>
+    <comment>Adobe Flash</comment>
+    <magic priority="50">
+      <!-- F = Uncompressed -->
+      <match value="FWS" type="string" offset="0" />
+      <!-- C = Compressed -->
+      <match value="CWS" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.swf" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-wmf">
+    <acronym>WMF</acronym>
+    <comment>Windows Metafile</comment>
+    <glob pattern="*.wmf" />
+    <glob pattern="*.emf" />
+  </mime-type>
+
+  <mime-type type="application/atom+xml">
+    <root-XML localName="feed" namespaceURI="http://purl.org/atom/ns#" />
+  </mime-type>
+
+  <mime-type type="application/mac-binhex40">
+    <magic priority="50">
+      <match value="must\ be\ converted\ with\ BinHex" type="string" offset="11" />
+    </magic>
+    <glob pattern="*.hqx" />
+  </mime-type>
+
+  <mime-type type="application/mac-compactpro">
+    <glob pattern="*.cpt" />
+  </mime-type>
+
+  <mime-type type="application/rtf">
+    <sub-class-of type="text/plain" />
+    <magic priority="50">
+      <match value="{\\rtf" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.rtf" />
+    <alias type="text/rtf" />
+  </mime-type>
+
+  <mime-type type="application/rss+xml">
+    <alias type="text/rss" />
+    <root-XML localName="rss" />
+    <root-XML namespaceURI="http://purl.org/rss/1.0/" />
+    <glob pattern="*.rss" />
+  </mime-type>
+
+  <!--  added in by mattmann -->
+  <mime-type type="application/xml">
+    <sub-class-of type="text/plain" />
+    <magic priority="50">
+      <match value="&lt;?xml" type="string" offset="0" />
+      <match value="&lt;?XML" type="string" offset="0" />
+      <match value="0xFFFE3C003F0078006D006C00" type="string" offset="0" />
+      <match value="0xFEFF003C003F0078006D006C" type="string" offset="0" />
+      <!-- TODO: Add matches for the rest of the possible XML encoding schemes -->
+    </magic>
+    <alias type="text/xml" />
+    <glob pattern="*.xml" />
+    <glob pattern="*.xsd" />
+  </mime-type>
+
+  <mime-type type="image/svg+xml">
+    <sub-class-of type="application/xml" />
+    <acronym>SVG</acronym>
+    <comment>Scalable Vector Graphics</comment>
+    <root-XML localName="svg" namespaceURI="http://www.w3.org/2000/svg" />
+    <glob pattern="*.svg" />
+  </mime-type>
+
+  <mime-type type="application/xslt+xml">
+    <sub-class-of type="application/xml" />
+    <acronym>XSLT</acronym>
+    <comment>XSL Transformations</comment>
+    <root-XML localName="stylesheet" namespaceURI="http://www.w3.org/1999/XSL/Transform" />
+    <alias type="text/xsl" />
+    <glob pattern="*.xsl" />
+    <glob pattern="*.xslt" />
+  </mime-type>
+  
+  <mime-type type="application/x-mif">
+    <magic priority="50">
+      <match value="\&lt;MakerFile" type="string" offset="0" />
+      <match value="\&lt;MIFFile" type="string" offset="0" />
+      <match value="\&lt;MakerDictionary" type="string" offset="0" />
+      <match value="\&lt;MakerScreenFont" type="string" offset="0" />
+      <match value="\&lt;MML" type="string" offset="0" />
+      <match value="\&lt;BookFile" type="string" offset="0" />
+      <match value="\&lt;Maker" type="string" offset="0" />
+    </magic>
+    <alias type="application/vnd.mif" />
+  </mime-type>
+
+  <mime-type type="application/vnd.wap.wbxml">
+    <glob pattern="*.wbxml" />
+  </mime-type>
+
+  <mime-type type="application/vnd.wap.wmlc">
+    <_comment>Compiled WML Document</_comment>
+    <glob pattern="*.wmlc" />
+  </mime-type>
+
+  <mime-type type="application/vnd.wap.wmlscriptc">
+    <_comment>Compiled WML Script</_comment>
+    <glob pattern="*.wmlsc" />
+  </mime-type>
+
+  <mime-type type="text/vnd.wap.wmlscript">
+    <_comment>WML Script</_comment>
+    <glob pattern="*.wmls" />
+  </mime-type>
+
+  <mime-type type="application/x-cdlink">
+    <_comment>Virtual CD-ROM CD Image File</_comment>
+    <glob pattern="*.vcd" />
+  </mime-type>
+
+  <mime-type type="application/x-director">
+    <_comment>Shockwave Movie</_comment>
+    <glob pattern="*.dcr" />
+    <glob pattern="*.dir" />
+    <glob pattern="*.dxr" />
+  </mime-type>
+
+  <mime-type type="application/x-futuresplash">
+    <_comment>Macromedia FutureSplash File</_comment>
+    <glob pattern="*.spl" />
+  </mime-type>
+
+  <mime-type type="application/x-koan">
+    <_comment>SSEYO Koan File</_comment>
+    <glob pattern="*.skp" />
+    <glob pattern="*.skd" />
+    <glob pattern="*.skt" />
+    <glob pattern="*.skm" />
+  </mime-type>
+
+  <mime-type type="application/x-latex">
+    <_comment>LaTeX Source Document</_comment>
+    <magic priority="50">
+      <match value="%\ -*-latex-*-" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.latex" />
+  </mime-type>
+
+  <!-- JC CHANGED
+    <mime-type type="application/x-mif">
+    <_comment>FrameMaker MIF document</_comment>
+    <glob pattern="*.mif"/>
+    </mime-type> -->
+
+  <mime-type type="application/x-ms-dos-executable">
+    <alias type="application/x-dosexec" />
+  </mime-type>
+
+  <mime-type type="application/ogg">
+    <magic priority="50">
+      <match value="OggS" type="string" offset="0" />
+    </magic>
+    <alias type="application/x-ogg" />
+  </mime-type>
+
+  <mime-type type="application/x-rar">
+    <alias type="application/x-rar-compressed" />
+  </mime-type>
+
+  <mime-type type="application/x-shellscript">
+    <alias type="application/x-sh" />
+  </mime-type>
+
+  <mime-type type="audio/midi">
+    <acronym>MIDI</acronym>
+    <comment>Musical Instrument Digital Interface</comment>
+    <magic priority ="20">
+      <match type="string" value="MThd" offset="0" />
+    </magic>
+    <glob pattern="*.mid" />
+    <glob pattern="*.midi" />
+    <glob pattern="*.kar" />
+  </mime-type>
+
+  <mime-type type="message/rfc822">
+    <magic priority="50">
+      <match type="string" value="Relay-Version:" offset="0" />
+      <match type="string" value="#! rnews" offset="0" />
+      <match type="string" value="N#! rnews" offset="0" />
+      <match type="string" value="Forward to" offset="0" />
+      <match type="string" value="Pipe to" offset="0" />
+      <match type="string" value="Return-Path:" offset="0" />
+      <match type="string" value="From:" offset="0" />
+      <match type="string" value="Message-ID:" offset="0" />
+      <match type="string" value="Date:" offset="0" />
+    </magic>
+  </mime-type>
+
+  <mime-type type="image/vnd.wap.wbmp">
+    <_comment>Wireless Bitmap File Format</_comment>
+    <glob pattern="*.wbmp" />
+  </mime-type>
+
+  <mime-type type="image/x-psd">
+    <alias type="image/photoshop" />
+  </mime-type>
+
+  <mime-type type="image/x-xcf">
+    <alias type="image/xcf" />
+    <magic priority="50">
+      <match type="string" value="gimp xcf " offset="0" />
+    </magic>
+  </mime-type>
+
+  <mime-type type="model/iges">
+    <_comment>Initial Graphics Exchange Specification Format</_comment>
+    <glob pattern="*.igs" />
+    <glob pattern="*.iges" />
+  </mime-type>
+
+  <mime-type type="model/mesh">
+    <glob pattern="*.msh" />
+    <glob pattern="*.mesh" />
+    <glob pattern="*.silo" />
+  </mime-type>
+
+  <mime-type type="model/vrml">
+    <glob pattern="*.vrml" />
+  </mime-type>
+
+  <mime-type type="text/x-tcl">
+    <alias type="application/x-tcl" />
+  </mime-type>
+
+  <mime-type type="text/x-tex">
+    <magic priority="50">
+      <match value="\\input" type="string" offset="0" />
+      <match value="\\section" type="string" offset="0" />
+      <match value="\\setlength" type="string" offset="0" />
+      <match value="\\documentstyle" type="string" offset="0" />
+      <match value="\\chapter" type="string" offset="0" />
+      <match value="\\documentclass" type="string" offset="0" />
+      <match value="\\relax" type="string" offset="0" />
+      <match value="\\contentsline" type="string" offset="0" />
+    </magic>
+    <alias type="application/x-tex" />
+  </mime-type>
+
+  <mime-type type="text/x-texinfo">
+    <magic priority="50">
+      <match value="\\input\ texinfo" type="string" offset="0" />
+    </magic>
+    <alias type="application/x-texinfo" />
+  </mime-type>
+
+  <mime-type type="text/x-troff-me">
+    <alias type="application/x-troff-me" />
+  </mime-type>
+
+  <mime-type type="video/vnd.mpegurl">
+    <glob pattern="*.mxu" />
+  </mime-type>
+
+  <mime-type type="x-conference/x-cooltalk">
+    <_comment>Cooltalk Audio</_comment>
+    <glob pattern="*.ice" />
+  </mime-type>
+
+  <mime-type type="audio/mpeg">
+    <acronym>MP3</acronym>
+    <comment>MPEG-1 Audio Layer 3</comment>
+    <magic priority="20">
+      <!-- http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html -->
+      <!-- Bit pattern for first two bytes: 11111111 111VVLLC    -->
+      <!-- VV = MPEG Audio Version ID; 10 = V2, 11 = V1          -->
+      <!-- LL = Layer description; 01 = L3, 10 = L2, 11 = L1     -->
+      <!-- C = Protection bit; 0 = CRC, 1 = no CRC               -->
+      <match value="0xfff2" type="string" offset="0" /> <!-- V2, L3, CRC -->
+      <match value="0xfff3" type="string" offset="0" /> <!-- V2, L3      -->
+      <match value="0xfff4" type="string" offset="0" /> <!-- V2, L2, CRC -->
+      <match value="0xfff5" type="string" offset="0" /> <!-- V2, L2      -->
+      <match value="0xfff6" type="string" offset="0" /> <!-- V2, L1, CRC -->
+      <match value="0xfff7" type="string" offset="0" /> <!-- V2, L1      -->
+      <match value="0xfffa" type="string" offset="0" /> <!-- V1, L3, CRC -->
+      <match value="0xfffb" type="string" offset="0" /> <!-- V1, L3      -->
+      <match value="0xfffc" type="string" offset="0" /> <!-- V1, L2, CRC -->
+      <match value="0xfffd" type="string" offset="0" /> <!-- V1, L2      -->
+      <match value="0xfffe" type="string" offset="0" /> <!-- V1, L1, CRC -->
+      <match value="0xffff" type="string" offset="0" /> <!-- V1, L1      -->
+      <match value="ID3" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.mp3" />
+  </mime-type>
+
+  <!-- ===================================================================== -->
+  <!-- TIKA-85: http://www.apache.org/dev/svn-eol-style.txt                  -->
+  <!-- ===================================================================== -->
+
+  <mime-type type="image/x-icon">
+    <magic priority="50">
+      <match value="\102\101\050\000\000\000\056\000\000\000\000\000\000\000" type="string" offset="0" />
+      <match value="\000\000\001\000" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.ico" />
+  </mime-type>
+
+  <mime-type type="image/jpeg">
+    <acronym>JPEG</acronym>
+    <comment>Joint Photographic Experts Group</comment>
+    <magic priority="50">
+      <!-- FFD8 is the SOI (Start Of Image) marker.              -->
+      <!-- It is followed by another marker that starts with FF. -->
+      <match value="0xffd8ff" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.jpg" />
+    <glob pattern="*.jpeg" />
+    <glob pattern="*.jpe" />
+    <glob pattern="*.jif" />
+    <glob pattern="*.jfif" />
+    <glob pattern="*.jfi" />
+  </mime-type>
+
+  <mime-type type="image/png">
+    <acronym>PNG</acronym>
+    <comment>Portable Network Graphics</comment>
+    <magic priority="50">
+      <match value="\x89PNG\x0d\x0a\x1a\x0a" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.png" />
+  </mime-type>
+
+  <mime-type type="audio/basic">
+    <magic priority="20">
+      <match value=".snd" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.au" />
+    <glob pattern="*.snd" />
+  </mime-type>
+
+  <mime-type type="audio/x-aiff">
+    <alias type="audio/aiff"/>
+    <acronym>AIFF</acronym>
+    <comment>Audio Interchange File Format</comment>
+    <magic priority="20">
+      <match value="FORM....AIFF" type="string" offset="0"
+             mask="0xFFFFFFFF00000000FFFFFFFF" />
+      <match value="FORM....AIFC" type="string" offset="0"
+             mask="0xFFFFFFFF00000000FFFFFFFF" />
+      <!-- Amiga IFF sound sample, somewhat like the more modern AIFF -->
+      <match value="FORM....8SVX" type="string" offset="0"
+             mask="0xFFFFFFFF00000000FFFFFFFF" />
+    </magic>
+    <glob pattern="*.aif" />
+    <glob pattern="*.aiff" />
+    <glob pattern="*.aifc" />
+  </mime-type>
+
+  <mime-type type="audio/x-wav">
+    <acronym>WAV</acronym>
+    <magic priority="20">
+      <match value="RIFF....WAVE" type="string" offset="0"
+             mask="0xFFFFFFFF00000000FFFFFFFF" />
+    </magic>
+    <glob pattern="*.wav" />
+  </mime-type>
+
+  <mime-type type="application/postscript">
+    <comment>PostScript</comment>
+    <magic priority="50">
+      <match value="%!" type="string" offset="0" />
+      <match value="\004%!" type="string" offset="0" />
+      <!-- Windows format EPS -->
+      <match value="0xc5d0d3c6" type="string" offset="0"/>
+    </magic>
+    <glob pattern="*.ps" />
+    <glob pattern="*.eps" />
+    <glob pattern="*.epsf" />
+    <glob pattern="*.epsi" />
+  </mime-type>
+
+  <mime-type type="application/vnd.lotus-wordpro">
+    <magic priority="50">
+      <match value="WordPro\0" type="string" offset="0" />
+      <match value="WordPro\r\373" type="string" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/vnd.ms-tnef">
+    <magic priority="50">
+      <match value="0x223e9f78" type="little16" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/vnd.rn-realmedia">
+    <magic priority="50">
+      <match value=".RMF" type="string" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/vnd.symbian.install">
+    <magic priority="50">
+      <match value="0x10000419" type="little32" offset="8" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-123">
+    <magic priority="50">
+      <match value="0x00001a00" type="big32" offset="0" />
+      <match value="0x00000200" type="big32" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-archive">
+    <magic priority="50">
+      <match value="=&lt;ar&gt;" type="string" offset="0" />
+      <match value="=!&lt;arch&gt;" type="string" offset="0" />
+    </magic>
+    <glob patter="*.ar" />
+  </mime-type>
+  <mime-type type="application/x-bittorrent">
+    <magic priority="50">
+      <match value="d8:announce" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.torrent" />
+  </mime-type>
+  <mime-type type="application/x-compress">
+    <magic priority="50">
+      <match value="\037\235" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.z" />
+  </mime-type>
+  <mime-type type="application/x-cpio">
+    <magic priority="50">
+      <match value="070707" type="host16" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-debian-package">
+    <glob pattern="*.deb" />
+  </mime-type>
+  <mime-type type="application/x-dvi">
+    <magic priority="50">
+      <match value="\367\002" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.dvi" />
+  </mime-type>
+  <mime-type type="application/x-gnucash">
+    <glob pattern="*.gnucash" />
+  </mime-type>
+  <mime-type type="application/x-gnumeric">
+    <magic priority="50">
+      <match value="=&lt;gmr:Workbook" type="string" offset="39" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-hdf">
+    <magic priority="50">
+      <match value="0x0e031301" type="big32" offset="0" />
+      <match value="\211HDF\r\n\032" type="string" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-hwp">
+    <magic priority="50">
+      <match value="R\0o\0o\0t\0" type="string" offset="512" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-iso9660-image">
+    <magic priority="50">
+      <match value="CD001" type="string" offset="37633" />
+    </magic>
+    <glob pattern="*.iso" />
+  </mime-type>
+  <mime-type type="application/x-kdelnk">
+    <magic priority="50">
+      <match value="[KDE\ Desktop\ Entry]" type="string" offset="0" />
+      <match value="#\ KDE\ Config\ File" type="string" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-lha">
+    <magic priority="50">
+      <match value="-lzs-" type="string" offset="2" />
+      <match value="-lh\40-" type="string" offset="2" />
+      <match value="-lhd-" type="string" offset="2" />
+      <match value="-lh2-" type="string" offset="2" />
+      <match value="-lh3-" type="string" offset="2" />
+      <match value="-lh4-" type="string" offset="2" />
+      <match value="-lh5-" type="string" offset="2" />
+      <match value="-lh6-" type="string" offset="2" />
+      <match value="-lh7-" type="string" offset="2" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-lharc">
+    <magic priority="50">
+      <match value="-lh0-" type="string" offset="2" />
+      <match value="-lh1-" type="string" offset="2" />
+      <match value="-lz4-" type="string" offset="2" />
+      <match value="-lz5-" type="string" offset="2" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-rar">
+    <magic priority="50">
+      <match value="Rar!" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.rar" />
+  </mime-type>
+  <mime-type type="application/x-rpm">
+    <glob pattern="*.rpm" />
+  </mime-type>
+  <mime-type type="application/x-shockwave-flash">
+    <magic priority="50">
+      <match value="FWS" type="string" offset="0" />
+      <match value="CWS" type="string" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-stuffit">
+    <magic priority="50">
+      <match value="StuffIt" type="string" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-zoo">
+    <magic priority="50">
+      <match value="0xfdc4a7dc" type="little32" offset="20" />
+    </magic>
+    <glob pattern="*.zoo" />
+  </mime-type>
+
+  <mime-type type="audio/x-flac">
+    <acronym>FLAC</acronym>
+    <comment>Free Lossless Audio Codec</comment>
+    <magic priority="50">
+      <match value="fLaC" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.flac" />
+  </mime-type>
+
+  <mime-type type="audio/x-mod">
+    <acronym>MOD</acronym>
+    <magic priority="50">
+      <match value="Extended\ Module:" type="string" offset="0" />
+      <match value="BMOD2STM" type="string" offset="21" />
+      <match value="M.K." type="string" offset="1080" />
+      <match value="M!K!" type="string" offset="1080" />
+      <match value="FLT4" type="string" offset="1080" />
+      <match value="FLT8" type="string" offset="1080" />
+      <match value="4CHN" type="string" offset="1080" />
+      <match value="6CHN" type="string" offset="1080" />
+      <match value="8CHN" type="string" offset="1080" />
+      <match value="CD81" type="string" offset="1080" />
+      <match value="OKTA" type="string" offset="1080" />
+      <match value="16CN" type="string" offset="1080" />
+      <match value="32CN" type="string" offset="1080" />
+      <match value="IMPM" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.mod" />
+  </mime-type>
+
+  <mime-type type="audio/x-mp4a">
+    <glob pattern="*.mp4a" />
+  </mime-type>
+
+  <mime-type type="audio/x-pn-realaudio">
+    <comment>Real Audio</comment>
+    <alias type="audio/x-realaudio" />
+    <magic priority="50">
+      <match value="0x2e7261fd" type="big32" offset="0" />
+    </magic>
+    <glob pattern="*.ra" />
+  </mime-type>
+
+  <mime-type type="image/gif">
+    <acronym>GIF</acronym>
+    <comment>Graphics Interchange Format</comment>
+    <magic priority="50">
+      <match value="GIF87a" type="string" offset="0" />
+      <match value="GIF89a" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.gif" />
+  </mime-type>
+
+  <mime-type type="image/tiff">
+    <acronym>TIFF</acronym>
+    <comment>Tagged Image File Format</comment>
+    <magic priority="50">
+      <!-- MM.* = Big endian (M=Motorola) and 0x002a in big endian    -->
+      <match value="MM\x00\x2a" type="string" offset="0" />
+      <!-- II*. = Little endian (I=Intel) and 0x002a in little endian -->
+      <match value="II\x2a\x00" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.tiff" />
+    <glob pattern="*.tif" />
+  </mime-type>
+
+  <mime-type type="image/x-ms-bmp">
+    <acronym>BMP</acronym>
+    <comment>Windows bitmap</comment>
+    <magic priority="50">
+      <match value="BM" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.bmp" />
+    <glob pattern="*.dib" />
+  </mime-type>
+
+  <mime-type type="image/x-portable-anymap">
+    <acronym>PNM</acronym>
+    <comment>Portable Any Map</comment>
+    <glob pattern="*.pnm" />
+  </mime-type>
+
+  <mime-type type="image/x-portable-bitmap">
+    <sub-class-of type="image/x-portable-anymap" />
+    <acronym>PBM</acronym>
+    <comment>Portable Bit Map</comment>
+    <magic priority="50">
+      <match value="P1" type="string" offset="0" />
+      <match value="P4" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.pbm" />
+  </mime-type>
+
+  <mime-type type="image/x-portable-graymap">
+    <sub-class-of type="image/x-portable-anymap" />
+    <acronym>PGM</acronym>
+    <comment>Portable Gray Map</comment>
+    <magic priority="50">
+      <match value="P2" type="string" offset="0" />
+      <match value="P5" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.pgm" />
+  </mime-type>
+
+  <mime-type type="image/x-portable-pixmap">
+    <sub-class-of type="image/x-portable-anymap" />
+    <acronym>PXM</acronym>
+    <comment>Portable Pixel Map</comment>
+    <magic priority="50">
+      <match value="P3" type="string" offset="0" />
+      <match value="P6" type="string" offset="0" />
+      <match value="P7" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.ppm" />
+  </mime-type>
+
+  <mime-type type="image/cgm">
+    <acronym>CGM</acronym>
+    <comment>Computer Graphics Metafile</comment>
+    <magic priority="50">
+      <match value="BEGMF" type="string" offset="0" />
+      <match value="0x0020" mask="0xffe0" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.cgm" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-dng">
+    <acronym>DNG</acronym>
+    <comment>Adobe Digital Negative</comment>
+    <glob pattern="*.dng" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-hasselblad">
+    <comment>Hasselblad raw image</comment>
+    <glob pattern="*.3fr" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-fuji">
+    <comment>Fuji raw image</comment>
+    <glob pattern="*.raf" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-canon">
+    <comment>Canon raw image</comment>
+    <glob pattern="*.crw" />
+    <glob pattern="*.cr2" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-kodak">
+    <comment>Kodak raw image</comment>
+    <glob pattern="*.k25" />
+    <glob pattern="*.kdc" />
+    <glob pattern="*.dcs" />
+    <glob pattern="*.drf" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-minolta">
+    <comment>Minolta raw image</comment>
+    <glob pattern="*.mrw" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-nikon">
+    <comment>Nikon raw image</comment>
+    <glob pattern="*.nef" />
+    <glob pattern="*.nrw" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-olympus">
+    <comment>Olympus raw image</comment>
+    <glob pattern="*.orf" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-pentax">
+    <comment>Pentax raw image</comment>
+    <glob pattern="*.ptx" />
+    <glob pattern="*.pef" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-sony">
+    <comment>Sony raw image</comment>
+    <glob pattern="*.arw" />
+    <glob pattern="*.srf" />
+    <glob pattern="*.sr2" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-sigma">
+    <comment>Sigma raw image</comment>
+    <glob pattern="*.x3f" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-epson">
+    <comment>Epson raw image</comment>
+    <glob pattern="*.erf" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-mamiya">
+    <comment>Mamiya raw image</comment>
+    <glob pattern="*.mef" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-leaf">
+    <comment>Leaf raw image</comment>
+    <glob pattern="*.mos" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-panasonic">
+    <comment>Panasonic raw image</comment>
+    <glob pattern="*.raw" />
+    <glob pattern="*.rw2" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-phaseone">
+    <comment>Phase One raw image</comment>
+    <glob pattern="*.cap" />
+    <glob pattern="*.iiq" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-red">
+    <comment>Red raw image</comment>
+    <glob pattern="*.r3d" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-imacon">
+    <comment>Imacon raw image</comment>
+    <glob pattern="*.fff" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-logitech">
+    <comment>Logitech raw image</comment>
+    <glob pattern="*.pxn" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-casio">
+    <comment>Casio raw image</comment>
+    <glob pattern="*.bay" />
+  </mime-type>
+
+  <mime-type type="image/x-tika-rawzor">
+    <comment>Rawzor raw image</comment>
+    <glob pattern="*.rwz" />
+  </mime-type>
+
+  <mime-type type="message/news">
+    <magic priority="50">
+      <match value="Path:" type="string" offset="0" />
+      <match value="Xref:" type="string" offset="0" />
+      <match value="Article" type="string" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="message/rfc822">
+    <magic priority="50">
+      <match value="Relay-Version:" type="string" offset="0" />
+      <match value="#!\ rnews" type="string" offset="0" />
+      <match value="N#!\ rnews" type="string" offset="0" />
+      <match value="Forward\ to" type="string" offset="0" />
+      <match value="Pipe\ to" type="string" offset="0" />
+      <match value="Return-Path:" type="string" offset="0" />
+      <match value="From:" type="string" offset="0" />
+      <match value="Received:" type="string" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="model/vrml">
+    <glob pattern="*.vrml" />
+  </mime-type>
+  <mime-type type="text/troff">
+    <magic priority="50">
+      <match value=".\\&quot;" type="string" offset="0" />
+      <match value="'\\&quot;" type="string" offset="0" />
+      <match value="'.\\&quot;" type="string" offset="0" />
+      <match value="\\&quot;" type="string" offset="0" />
+      <match value="'''" type="string" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="text/x-diff">
+    <magic priority="50">
+      <match value="diff\ " type="string" offset="0" />
+      <match value="***\ " type="string" offset="0" />
+      <match value="Only\ in\ " type="string" offset="0" />
+      <match value="Common\ subdirectories:\ " type="string" offset="0" />
+      <match value="Index:" type="string" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="video/mpeg">
+    <glob pattern="*.mpg" />
+    <glob pattern="*.mpeg" />
+  </mime-type>
+  <mime-type type="video/quicktime">
+    <magic priority="50">
+      <match value="moov" type="string" offset="4" />
+      <match value="mdat" type="string" offset="4" />
+      <match value="ftyp" type="string" offset="4" />
+    </magic>
+    <glob pattern="*.mov" />
+  </mime-type>
+  <mime-type type="video/x-flc">
+    <glob pattern="*.flc" />
+  </mime-type>
+  <mime-type type="video/x-fli">
+    <glob pattern="*.fli" />
+  </mime-type>
+  <mime-type type="video/x-flv">
+    <magic priority="50">
+      <match value="FLV" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.flv" />
+  </mime-type>
+  <mime-type type="video/x-jng">
+    <magic priority="50">
+      <match value="\x8bJNG" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.jng" />
+  </mime-type>
+  <mime-type type="video/x-mng">
+    <magic priority="50">
+      <match value="\x8aMNG" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.mng" />
+  </mime-type>
+  <mime-type type="video/x-msvideo">
+    <magic priority="50">
+      <match value="RIFF....AVI " type="string" offset="0"
+             mask="0xFFFFFFFF00000000FFFFFFFF" />
+    </magic>
+    <glob pattern="*.avi" />
+    <alias type="video/avi" />
+    <alias type="video/msvideo" />
+  </mime-type>
+  <mime-type type="video/x-sgi-movie">
+    <magic priority="50">
+      <match value="MOVI" type="string" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-Berkeley-DB">
+    <magic priority="50">
+      <match value="0x00061561" type="big32" offset="0" />
+      <match value="0x00061561" type="host32" offset="12" />
+      <match value="0x00061561" type="big32" offset="12" />
+      <match value="0x00061561" type="little32" offset="12" />
+      <match value="0x00053162" type="host32" offset="12" />
+      <match value="0x00053162" type="big32" offset="12" />
+      <match value="0x00053162" type="little32" offset="12" />
+      <match value="0x00042253" type="host32" offset="12" />
+      <match value="0x00042253" type="big32" offset="12" />
+      <match value="0x00042253" type="little32" offset="12" />
+      <match value="0x00040988" type="host32" offset="12" />
+      <match value="0x00040988" type="little32" offset="12" />
+      <match value="0x00040988" type="big32" offset="12" />
+      <match value="0x00053162" type="host32" offset="0" />
+      <match value="0x00053162" type="big32" offset="0" />
+      <match value="0x00053162" type="little32" offset="0" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-BibTeX-text-file">
+    <magic priority="50">
+      <match value="%\ BibTeX\ `" type="string" offset="0" />
+      <match value="%%%\ \ " type="string" offset="73" />
+      <match value="%\ BibTeX\ standard\ bibliography\ " type="string" offset="0" />
+      <match value="%%%\ \ @BibTeX-style-file{" type="string" offset="73" />
+      <match value="@article{" type="string" offset="0" />
+      <match value="@book{" type="string" offset="0" />
+      <match value="@inbook{" type="string" offset="0" />
+      <match value="@incollection{" type="string" offset="0" />
+      <match value="@inproceedings{" type="string" offset="0" />
+      <match value="@manual{" type="string" offset="0" />
+      <match value="@misc{" type="string" offset="0" />
+      <match value="@preamble{" type="string" offset="0" />
+      <match value="@phdthesis{" type="string" offset="0" />
+      <match value="@techreport{" type="string" offset="0" />
+      <match value="@unpublished{" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.bib" />
+    <glob pattern="*.bibtex" />
+  </mime-type>
+  <mime-type type="application/x-BinHex-binary-text">
+    <magic priority="50">
+      <match value="must\ be\ converted\ with\ BinHex" type="string" offset="11" />
+    </magic>
+  </mime-type>
+  <mime-type type="application/x-Gnumeric-spreadsheet">
+    <magic priority="50">
+      <match value="=&lt;gmr:Workbook" type="string" offset="39" />
+    </magic>
+    <glob pattern="*.gnumeric" />
+  </mime-type>
 
 </mime-info>

Added: lucene/nutch/trunk/lib/tika-core-0.5.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/lib/tika-core-0.5.jar?rev=885869&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/tika-core-0.5.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java?rev=885869&r1=885868&r2=885869&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java Tue Dec  1 19:01:58 2009
@@ -19,6 +19,7 @@
 
 // JDK imports
 import java.io.File;
+import java.io.IOException;
 import java.util.logging.Logger;
 
 // Hadoop imports
@@ -59,8 +60,13 @@
     MimeTypes mimeTypez = (MimeTypes) objectCache.getObject(MimeTypes.class
         .getName());
     if (mimeTypez == null) {
-      mimeTypez = MimeTypesFactory.create(conf
-          .getConfResourceAsInputStream(conf.get("mime.types.file")));
+      try {
+        mimeTypez = MimeTypesFactory.create(conf
+            .getConfResourceAsInputStream(conf.get("mime.types.file")));
+      } catch (Exception e) {
+        e.printStackTrace();
+        throw new RuntimeException(e);
+      }
       objectCache.setObject(MimeTypes.class.getName(), mimeTypez);
 
     }
@@ -139,7 +145,7 @@
 
     // if returned null, or if it's the default type then try url resolution
     if (type == null
-        || (type != null && type.getName().equals(MimeTypes.DEFAULT))) {
+        || (type != null && type.getName().equals(MimeTypes.OCTET_STREAM))) {
       // If no mime-type header, or cannot find a corresponding registered
       // mime-type, then guess a mime-type from the url pattern
       type = this.mimeTypes.getMimeType(url) != null ? this.mimeTypes
@@ -152,7 +158,7 @@
     // returned by the magic
     if (this.mimeMagic) {
       MimeType magicType = this.mimeTypes.getMimeType(data);
-      if (magicType != null && !magicType.getName().equals(MimeTypes.DEFAULT)
+      if (magicType != null && !magicType.getName().equals(MimeTypes.OCTET_STREAM)
           && type != null && !type.getName().equals(magicType.getName())) {
         // If magic enabled and the current mime type differs from that of the
         // one returned from the magic, take the magic mimeType
@@ -163,7 +169,7 @@
       // default type
       if (type == null) {
         try {
-          type = this.mimeTypes.forName(MimeTypes.DEFAULT);
+          type = this.mimeTypes.forName(MimeTypes.OCTET_STREAM);
         } catch (Exception ignore) {
         }
       }