You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2014/05/12 14:54:32 UTC

svn commit: r1593951 - in /nutch/trunk: CHANGES.txt src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java

Author: jnioche
Date: Mon May 12 12:54:32 2014
New Revision: 1593951

URL: http://svn.apache.org/r1593951
Log:
NUTCH-1613 Timeouts in protocol-httpclient when crawling same host with >2 threads

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1593951&r1=1593950&r2=1593951&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon May 12 12:54:32 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development
 
+* NUTCH-1613 Timeouts in protocol-httpclient when crawling same host with >2 threads (brian44 via jnioche)
+
 * NUTCH-1766 Generator to unlock crawldb and remove tempdir if generate job fails (Diaa via jnioche)
 
 * NUTCH-207 Bandwidth target for fetcher rather than a thread count (jnioche)

Modified: nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=1593951&r1=1593950&r2=1593951&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java (original)
+++ nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java Mon May 12 12:54:32 2014
@@ -80,7 +80,7 @@ public class Http extends HttpBase {
   private static boolean authRulesRead = false;
   private static Configuration conf;
 
-  int maxThreadsTotal = 10;
+  private int maxThreadsTotal = 10;
 
   private String proxyUsername;
   private String proxyPassword;
@@ -170,6 +170,10 @@ public class Http extends HttpBase {
     params.setSendBufferSize(BUFFER_SIZE);
     params.setReceiveBufferSize(BUFFER_SIZE);
     params.setMaxTotalConnections(maxThreadsTotal);
+    
+    //Also set max connections per host to maxThreadsTotal since all threads
+    //might be used to fetch from the same host - otherwise timeout errors can occur
+    params.setDefaultMaxConnectionsPerHost(maxThreadsTotal);
 
     // executeMethod(HttpMethod) seems to ignore the connection timeout on the connection manager.
     // set it explicitly on the HttpClient.