You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by fe...@apache.org on 2012/05/07 11:48:57 UTC
svn commit: r1334945 - in /nutch/branches/nutchgora: CHANGES.txt
conf/nutch-default.xml src/java/org/apache/nutch/fetcher/FetcherReducer.java
Author: ferdy
Date: Mon May 7 09:48:57 2012
New Revision: 1334945
URL: http://svn.apache.org/viewvc?rev=1334945&view=rev
Log:
NUTCH-1354 nutchgora support fetcher.queue.depth.multiplier property
Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/conf/nutch-default.xml
nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java
Modified: nutch/branches/nutchgora/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1334945&r1=1334944&r2=1334945&view=diff
==============================================================================
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Mon May 7 09:48:57 2012
@@ -1,6 +1,8 @@
Nutch Change Log
Release nutchgora - Current Development
+* NUTCH-1354 nutchgora support fetcher.queue.depth.multiplier property (ferdy)
+
* NUTCH-1353 nutchgora DomainStatistics support crawlId, counter bug and reformatting (ferdy)
* NUTCH-1350 remove unused dependancy because of access restriction (ferdy)
Modified: nutch/branches/nutchgora/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/nutch-default.xml?rev=1334945&r1=1334944&r2=1334945&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/nutch-default.xml (original)
+++ nutch/branches/nutchgora/conf/nutch-default.xml Mon May 7 09:48:57 2012
@@ -686,6 +686,16 @@
</description>
</property>
+<property>
+ <name>fetcher.queue.depth.multiplier</name>
+ <value>50</value>
+ <description>(EXPERT)The fetcher buffers the incoming URLs into queues based on the [host|domain|IP]
+ (see param fetcher.queue.mode). The depth of the queue is the number of threads times the value of this parameter.
+ A large value requires more memory but can improve the performance of the fetch when the order of the URLS in the fetch list
+ is not optimal.
+ </description>
+</property>
+
<!-- indexingfilter plugin properties -->
<property>
Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java?rev=1334945&r1=1334944&r2=1334945&view=diff
==============================================================================
--- nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java (original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java Mon May 7 09:48:57 2012
@@ -775,7 +775,8 @@ extends GoraReducer<IntWritable, FetchEn
}
LOG.info("Fetcher: threads: " + threadCount);
- feeder = new QueueFeeder(context, fetchQueues, threadCount * 50);
+ int maxFeedPerThread = conf.getInt("fetcher.queue.depth.multiplier", 50);
+ feeder = new QueueFeeder(context, fetchQueues, threadCount * maxFeedPerThread);
feeder.start();
for (int i = 0; i < threadCount; i++) { // spawn threads