You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2012/02/13 12:49:40 UTC

svn commit: r1243482 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/fetcher/Fetcher.java

Author: jnioche
Date: Mon Feb 13 11:49:40 2012
New Revision: 1243482

URL: http://svn.apache.org/viewvc?rev=1243482&view=rev
Log:
NUTCH-1259 Store detected content type in crawldatum metadata

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1243482&r1=1243481&r2=1243482&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon Feb 13 11:49:40 2012
@@ -1,5 +1,7 @@
 Nutch Change Log
 
+* NUTCH-1259 Store detected content-type in crawldatum metadata (jnioche, markus)
+
 * NUTCH-1266 Subcollection to optionally write to configured fields (markus)
 
 * NUTCH-1005 Parse headings plugin (markus)

Modified: nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=1243482&r1=1243481&r2=1243482&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Mon Feb 13 11:49:40 2012
@@ -924,7 +924,9 @@ public class Fetcher extends Configured 
       datum.setStatus(status);
       datum.setFetchTime(System.currentTimeMillis());
       if (pstatus != null) datum.getMetaData().put(Nutch.WRITABLE_PROTO_STATUS_KEY, pstatus);
-
+      // store the guessed content type in the crawldatum
+      if (content.getContentType() != null) datum.getMetaData().put(new Text(Metadata.CONTENT_TYPE), new Text(content.getContentType()));
+      
       ParseResult parseResult = null;
       if (content != null) {
         Metadata metadata = content.getMetadata();