You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2010/01/11 11:13:21 UTC

svn commit: r897825 - in /lucene/nutch/trunk/src: java/org/apache/nutch/util/MimeUtil.java test/org/apache/nutch/protocol/TestContent.java

Author: jnioche
Date: Mon Jan 11 10:13:21 2010
New Revision: 897825

URL: http://svn.apache.org/viewvc?rev=897825&view=rev
Log:
fix for NUTCH-767 : reverted original expected values for test + treat text/plain as a default mime-type from Tika

Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
    lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java?rev=897825&r1=897824&r2=897825&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java Mon Jan 11 10:13:21 2010
@@ -159,6 +159,7 @@
     if (this.mimeMagic) {
       MimeType magicType = this.mimeTypes.getMimeType(data);
       if (magicType != null && !magicType.getName().equals(MimeTypes.OCTET_STREAM)
+          && !magicType.getName().equals(MimeTypes.PLAIN_TEXT)
           && type != null && !type.getName().equals(magicType.getName())) {
         // If magic enabled and the current mime type differs from that of the
         // one returned from the magic, take the magic mimeType

Modified: lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java?rev=897825&r1=897824&r2=897825&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java (original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java Mon Jan 11 10:13:21 2010
@@ -63,28 +63,19 @@
                     "http://www.foo.com/",
                     "".getBytes("UTF8"),
                     "text/html; charset=UTF-8", p, conf);
-    // TODO check potential Tika issue and 
-    // revert the expected value to text/html
-    // see https://issues.apache.org/jira/browse/NUTCH-767
-    assertEquals("text/plain", c.getContentType());
+    assertEquals("text/html", c.getContentType());
 
     c = new Content("http://www.foo.com/foo.html",
                     "http://www.foo.com/",
                     "".getBytes("UTF8"),
                     "", p, conf);
-    // TODO check potential Tika issue and 
-    // revert the expected value to text/html
-    // see https://issues.apache.org/jira/browse/NUTCH-767
-    assertEquals("text/plain", c.getContentType());
+    assertEquals("text/html", c.getContentType());
 
     c = new Content("http://www.foo.com/foo.html",
                     "http://www.foo.com/",
                     "".getBytes("UTF8"),
                     null, p, conf);
-    // TODO check potential Tika issue and 
-    // revert the expected value to text/html
-    // see https://issues.apache.org/jira/browse/NUTCH-767
-    assertEquals("text/plain", c.getContentType());
+    assertEquals("text/html", c.getContentType());
 
     c = new Content("http://www.foo.com/",
                     "http://www.foo.com/",
@@ -108,10 +99,7 @@
                     "http://www.foo.com/",
                     "".getBytes("UTF8"),
                     "", p, conf);
-    // TODO check that Tika returns the right value and
-    // revert to the default type
-    // see https://issues.apache.org/jira/browse/NUTCH-767
-    assertEquals("text/plain", c.getContentType());
+    assertEquals(MimeTypes.OCTET_STREAM, c.getContentType());
 
     c = new Content("http://www.foo.com/",
                     "http://www.foo.com/",