You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2013/06/18 01:31:02 UTC

svn commit: r1493973 - in /nutch/branches/2.x: CHANGES.txt src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java

Author: lewismc
Date: Mon Jun 17 23:31:02 2013
New Revision: 1493973

URL: http://svn.apache.org/r1493973
Log:
NUTCH-1475 Index-More Plugin -- A better fall back value for date field

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1493973&r1=1493972&r2=1493973&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Jun 17 23:31:02 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1475 Index-More Plugin -- A better fall back value for date field (James Sullivan, snagel via lewismc)
+
 * NUTCH-1420 Get rid of the dreaded � (markus + lewismc)
 
 * NUTCH-1578 Upgrade to Hadoop 1.2.0 (markus)

Modified: nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java?rev=1493973&r1=1493972&r2=1493973&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java (original)
+++ nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java Mon Jun 17 23:31:02 2013
@@ -67,6 +67,7 @@ public class MoreIndexingFilter implemen
   static {
     FIELDS.add(WebPage.Field.HEADERS);
     FIELDS.add(WebPage.Field.CONTENT_TYPE);
+    FIELDS.add(WebPage.Field.MODIFIED_TIME);
   }
 
   @Override
@@ -94,8 +95,7 @@ public class MoreIndexingFilter implemen
     }
 
     if (time == -1) { // if no last-modified
-      // time = datum.getFetchTime(); // use fetch time
-      time = page.getFetchTime(); // use fetch time
+      time = page.getModifiedTime(); // use Modified time
     }
 
     String dateString = DateUtil.getThreadLocalDateFormat().format(new Date(time));
@@ -145,7 +145,7 @@ public class MoreIndexingFilter implemen
       // NUTCH-1010 ContentLength not trimmed
       String trimmed = contentLength.toString().trim();
       if (!trimmed.isEmpty())
-          doc.add("contentLength", trimmed);
+        doc.add("contentLength", trimmed);
     }
 
     return doc;