You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2014/07/07 14:38:24 UTC
svn commit: r1608431 - in /nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/crawl/CrawlDbReducer.java
Author: jnioche
Date: Mon Jul 7 12:38:23 2014
New Revision: 1608431
URL: http://svn.apache.org/r1608431
Log:
NUTCH-578 URL fetched with 403 is generated over and over again
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1608431&r1=1608430&r2=1608431&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon Jul 7 12:38:23 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development
+* NUTCH-578 URL fetched with 403 is generated over and over again (snagel)
+
* NUTCH-1776 Log incorrect plugin.folder file path (Diaa via snagel)
* NUTCH-1566 bin/nutch to allow whitespace in paths (tejasp, snagel)
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=1608431&r1=1608430&r2=1608431&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Mon Jul 7 12:38:23 2014
@@ -274,6 +274,8 @@ public class CrawlDbReducer implements R
result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
} else {
result.setStatus(CrawlDatum.STATUS_DB_GONE);
+ result = schedule.setPageGoneSchedule(key, result, prevFetchTime,
+ prevModifiedTime, fetch.getFetchTime());
}
break;