You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by fe...@apache.org on 2012/05/07 11:48:57 UTC

svn commit: r1334945 - in /nutch/branches/nutchgora: CHANGES.txt conf/nutch-default.xml src/java/org/apache/nutch/fetcher/FetcherReducer.java

Author: ferdy
Date: Mon May  7 09:48:57 2012
New Revision: 1334945

URL: http://svn.apache.org/viewvc?rev=1334945&view=rev
Log:
NUTCH-1354 nutchgora support fetcher.queue.depth.multiplier property

Modified:
    nutch/branches/nutchgora/CHANGES.txt
    nutch/branches/nutchgora/conf/nutch-default.xml
    nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1334945&r1=1334944&r2=1334945&view=diff
==============================================================================
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Mon May  7 09:48:57 2012
@@ -1,6 +1,8 @@
 Nutch Change Log
 
 Release nutchgora - Current Development
+* NUTCH-1354 nutchgora support fetcher.queue.depth.multiplier property (ferdy)
+
 * NUTCH-1353 nutchgora DomainStatistics support crawlId, counter bug and reformatting (ferdy)
 
 * NUTCH-1350 remove unused dependancy because of access restriction (ferdy)

Modified: nutch/branches/nutchgora/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/nutch-default.xml?rev=1334945&r1=1334944&r2=1334945&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/nutch-default.xml (original)
+++ nutch/branches/nutchgora/conf/nutch-default.xml Mon May  7 09:48:57 2012
@@ -686,6 +686,16 @@
   </description>
 </property>
 
+<property>
+  <name>fetcher.queue.depth.multiplier</name>
+  <value>50</value>
+  <description>(EXPERT)The fetcher buffers the incoming URLs into queues based on the [host|domain|IP]
+  (see param fetcher.queue.mode). The depth of the queue is the number of threads times the value of this parameter.
+  A large value requires more memory but can improve the performance of the fetch when the order of the URLS in the fetch list
+  is not optimal.
+  </description>
+</property>
+
 <!-- indexingfilter plugin properties -->
 
 <property>

Modified: nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java?rev=1334945&r1=1334944&r2=1334945&view=diff
==============================================================================
--- nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java (original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/fetcher/FetcherReducer.java Mon May  7 09:48:57 2012
@@ -775,7 +775,8 @@ extends GoraReducer<IntWritable, FetchEn
     }
     LOG.info("Fetcher: threads: " + threadCount);
 
-    feeder = new QueueFeeder(context, fetchQueues, threadCount * 50);
+    int maxFeedPerThread = conf.getInt("fetcher.queue.depth.multiplier", 50);
+    feeder = new QueueFeeder(context, fetchQueues, threadCount * maxFeedPerThread);
     feeder.start();
 
     for (int i = 0; i < threadCount; i++) {       // spawn threads