You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2014/05/12 14:58:42 UTC

svn commit: r1593954 - in /nutch/branches/2.x: CHANGES.txt src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java

Author: jnioche
Date: Mon May 12 12:58:41 2014
New Revision: 1593954

URL: http://svn.apache.org/r1593954
Log:
NUTCH-1613 Timeouts in protocol-httpclient when crawling same host with >2 threads

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1593954&r1=1593953&r2=1593954&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon May 12 12:58:41 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1613 Timeouts in protocol-httpclient when crawling same host with >2 threads (brian44 via jnioche)
+
 * NUTCH-1182 fetcher to log hung threads (snagel)
 
 * NUTCH-1618 Turn speculative execution off for Fetching (talat)

Modified: nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=1593954&r1=1593953&r2=1593954&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java Mon May 12 12:58:41 2014
@@ -190,6 +190,10 @@ public class Http extends HttpBase {
 		params.setSendBufferSize(BUFFER_SIZE);
 		params.setReceiveBufferSize(BUFFER_SIZE);
 		params.setMaxTotalConnections(maxThreadsTotal);
+		
+		//Also set max connections per host to maxThreadsTotal since all threads
+		//might be used to fetch from the same host - otherwise timeout errors can occur
+		params.setDefaultMaxConnectionsPerHost(maxThreadsTotal);
 
 		// executeMethod(HttpMethod) seems to ignore the connection timeout on
 		// the connection manager.