You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/02/10 17:06:38 UTC

svn commit: r908560 - /lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml

Author: jukka
Date: Wed Feb 10 16:06:38 2010
New Revision: 908560

URL: http://svn.apache.org/viewvc?rev=908560&view=rev
Log:
TIKA-377: Error parsing HTML partial with AutoDetect parser

Recognize both upper and lower case versions of HTML tag soup.

Modified:
    lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml

Modified: lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=908560&r1=908559&r2=908560&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Wed Feb 10 16:06:38 2010
@@ -3551,9 +3551,13 @@
           bad HTML, unfortunately.
      -->
     <root-XML localName="html"/>
+    <root-XML localName="HTML"/>
     <root-XML localName="link"/>
+    <root-XML localName="LINK"/>
     <root-XML localName="body"/>
+    <root-XML localName="BODY"/>
     <root-XML localName="p"/>
+    <root-XML localName="P"/>
     <magic priority="50">
       <match value="&lt;!DOCTYPE HTML" type="string" offset="0:64"/>
       <match value="&lt;!doctype html" type="string" offset="0:64"/>