You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2010/01/20 06:42:40 UTC

svn commit: r901066 - in /lucene/tika/trunk: CHANGES.txt tika-core/src/main/java/org/apache/tika/mime/MimeType.java tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml

Author: mattmann
Date: Wed Jan 20 05:42:30 2010
New Revision: 901066

URL: http://svn.apache.org/viewvc?rev=901066&view=rev
Log:
- fix for TIKA-367 Mime type rootXML equality improvement

Modified:
    lucene/tika/trunk/CHANGES.txt
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
    lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml

Modified: lucene/tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/CHANGES.txt?rev=901066&r1=901065&r2=901066&view=diff
==============================================================================
--- lucene/tika/trunk/CHANGES.txt (original)
+++ lucene/tika/trunk/CHANGES.txt Wed Jan 20 05:42:30 2010
@@ -6,10 +6,10 @@
 
 The most notable changes in Tika 0.6 over the previous release are:
 
- * Mime-type detection for HTML has been improved, allowing malformed
+ * Mime-type detection for HTML (and all types) has been improved, allowing malformed
    HTML files and those HTML files that require a bit more observed content
    before the type is properly detected, are now correctly identified by 
-   the AutoDetectParser. (TIKA-327, TIKA-366)
+   the AutoDetectParser. (TIKA-327, TIKA-366, TIKA-367)
 
  * Tika now has an additional OSGi bundle packaging that includes all the
    required parser libraries. This bundle package makes it easy to use all

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java?rev=901066&r1=901065&r2=901066&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeType.java Wed Jan 20 05:42:30 2010
@@ -297,6 +297,13 @@
                     return false;
                 }
             }
+            else{
+                // else if it was empty then check to see if the provided namespaceURI
+                // is empty. If it is not, then these two aren't equal and return false
+                if(!isEmpty(namespaceURI)){
+                    return false;
+                }
+            }
 
             //Compare root element's local name
             if (!isEmpty(this.localName)) {
@@ -304,6 +311,13 @@
                     return false;
                 }
             }
+            else{
+                // else if it was empty then check to see if the provided localName
+                // is empty. If it is not, then these two aren't equal and return false 
+                if(!isEmpty(localName)){
+                    return false;
+                }
+            }
             return true;
         }
 

Modified: lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=901066&r1=901065&r2=901066&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Wed Jan 20 05:42:30 2010
@@ -367,6 +367,7 @@
 
   <mime-type type="application/rdf+xml">
     <root-XML localName="RDF"/>
+    <root-XML localName="RDF" namespaceURI="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>
     <sub-class-of type="application/xml"/>
     <acronym>RDF/XML</acronym>
     <comment>XML syntax for RDF graphs</comment>
@@ -2637,6 +2638,9 @@
   </mime-type>
 
   <mime-type type="application/xhtml+xml">
+    <magic priority="50">
+      <match value="&lt;html xmlns=" type="string" offset="0:8192"/>
+    </magic>    
     <root-XML namespaceURI="http://www.w3.org/1999/xhtml" localName="html"/>
     <glob pattern="*.xhtml"/>
     <glob pattern="*.xht"/>