You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2011/10/01 10:24:33 UTC

svn commit: r1177944 - in /nutch/trunk: CHANGES.txt conf/nutch-default.xml src/java/org/apache/nutch/fetcher/Fetcher.java

Author: jnioche
Date: Sat Oct  1 08:24:32 2011
New Revision: 1177944

URL: http://svn.apache.org/viewvc?rev=1177944&view=rev
Log:
NUTCH-1141 Configurable Fetcher queue depth

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/conf/nutch-default.xml
    nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1177944&r1=1177943&r2=1177944&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Oct  1 08:24:32 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.4 - Current development
 
+* NUTCH-1141 Configurable Fetcher queue depth (jnioche)
+
 * NUTCH-1091 Remove commons logging dependency from Nutch branch and trunk (lewismc)
 
 * NUTCH-672 allow unit tests to be run from bin/nutch (Todd Lipton via lewismc)

Modified: nutch/trunk/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1177944&r1=1177943&r2=1177944&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Sat Oct  1 08:24:32 2011
@@ -722,6 +722,16 @@
   </description>
 </property>
 
+<property>
+  <name>fetcher.queue.depth.multiplier</name>
+  <value>50</value>
+  <description>(EXPERT)The fetcher buffers the incoming URLs into queues based on the [host|domain|IP]
+  (see param fetcher.queue.mode). The depth of the queue is the number of threads times the value of this parameter.
+  A large value requires more memory but can improve the performance of the fetch when the order of the URLS in the fetch list
+  is not optimal.
+  </description>
+</property>	
+
 <!-- moreindexingfilter plugin properties -->
 
 <property>

Modified: nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=1177944&r1=1177943&r2=1177944&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Sat Oct  1 08:24:32 2011
@@ -1057,7 +1057,9 @@ public class Fetcher extends Configured 
     int timeoutDivisor = getConf().getInt("fetcher.threads.timeout.divisor", 2);
     if (LOG.isInfoEnabled()) { LOG.info("Fetcher: time-out divisor: " + timeoutDivisor); }
 
-    feeder = new QueueFeeder(input, fetchQueues, threadCount * 50);
+    int queueDepthMuliplier =  getConf().getInt("fetcher.queue.depth.multiplier", 50);
+    
+    feeder = new QueueFeeder(input, fetchQueues, threadCount * queueDepthMuliplier);
     //feeder.setPriority((Thread.MAX_PRIORITY + Thread.NORM_PRIORITY) / 2);
     
     // the value of the time limit is either -1 or the time where it should finish