You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by te...@apache.org on 2013/01/28 08:59:41 UTC
svn commit: r1439291 - in /nutch/branches/2.x: CHANGES.txt
src/java/org/apache/nutch/fetcher/FetcherReducer.java
Author: tejasp
Date: Mon Jan 28 07:59:41 2013
New Revision: 1439291
URL: http://svn.apache.org/viewvc?rev=1439291&view=rev
Log:
NUTCH-1284 Add site fetcher.max.crawl.delay as log output by default
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/java/org/apache/nutch/fetcher/FetcherReducer.java
Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1439291&r1=1439290&r2=1439291&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Jan 28 07:59:41 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.2 - Current Development
+* NUTCH-1284 Add site fetcher.max.crawl.delay as log output by default (Tejas Patil)
+
* NUTCH-1453 Substantiate tests for IndexingFilters (lufeng via lewismc)
* NUTCH-1274 Fix [cast] javac warnings (Tejas Patil via lewismc)
Modified: nutch/branches/2.x/src/java/org/apache/nutch/fetcher/FetcherReducer.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/fetcher/FetcherReducer.java?rev=1439291&r1=1439290&r2=1439291&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/fetcher/FetcherReducer.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/fetcher/FetcherReducer.java Mon Jan 28 07:59:41 2013
@@ -484,7 +484,8 @@ extends GoraReducer<IntWritable, FetchEn
reprUrl = TableUtil.toString(fit.page.getReprUrl());
}
try {
- LOG.info("fetching " + fit.url);
+ LOG.info("fetching " + fit.url + " (queue crawl delay=" +
+ fetchQueues.getFetchItemQueue(fit.queueID).crawlDelay + "ms)");
// fetch the page
final Protocol protocol = this.protocolFactory.getProtocol(fit.url);
@@ -500,7 +501,7 @@ extends GoraReducer<IntWritable, FetchEn
continue;
}
if (rules.getCrawlDelay() > 0) {
- if (rules.getCrawlDelay() > maxCrawlDelay) {
+ if (rules.getCrawlDelay() > maxCrawlDelay && maxCrawlDelay >= 0) {
// unblock
fetchQueues.finishFetchItem(fit, true);
LOG.debug("Crawl-Delay for " + fit.url + " too long (" + rules.getCrawlDelay() + "), skipping");
@@ -509,6 +510,9 @@ extends GoraReducer<IntWritable, FetchEn
} else {
final FetchItemQueue fiq = fetchQueues.getFetchItemQueue(fit.queueID);
fiq.crawlDelay = rules.getCrawlDelay();
+ if (LOG.isDebugEnabled()) {
+ LOG.info("Crawl delay for queue: " + fit.queueID + " is set to " + fiq.crawlDelay + " as per robots.txt. url: " + fit.url);
+ }
}
}
final ProtocolOutput output = protocol.getProtocolOutput(fit.url, fit.page);
@@ -875,3 +879,4 @@ extends GoraReducer<IntWritable, FetchEn
LOG.info("-activeThreads=" + activeThreads);
}
}
+