You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2014/07/07 14:38:24 UTC

svn commit: r1608431 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/crawl/CrawlDbReducer.java

Author: jnioche
Date: Mon Jul  7 12:38:23 2014
New Revision: 1608431

URL: http://svn.apache.org/r1608431
Log:
NUTCH-578 URL fetched with 403 is generated over and over again

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1608431&r1=1608430&r2=1608431&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon Jul  7 12:38:23 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development
 
+* NUTCH-578 URL fetched with 403 is generated over and over again (snagel)
+
 * NUTCH-1776 Log incorrect plugin.folder file path (Diaa via snagel)
 
 * NUTCH-1566 bin/nutch to allow whitespace in paths (tejasp, snagel)

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=1608431&r1=1608430&r2=1608431&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Mon Jul  7 12:38:23 2014
@@ -274,6 +274,8 @@ public class CrawlDbReducer implements R
         result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
       } else {
         result.setStatus(CrawlDatum.STATUS_DB_GONE);
+        result = schedule.setPageGoneSchedule(key, result, prevFetchTime,
+          prevModifiedTime, fetch.getFetchTime());
       }
       break;