You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2014/05/12 14:54:32 UTC
svn commit: r1593951 - in /nutch/trunk: CHANGES.txt
src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
Author: jnioche
Date: Mon May 12 12:54:32 2014
New Revision: 1593951
URL: http://svn.apache.org/r1593951
Log:
NUTCH-1613 Timeouts in protocol-httpclient when crawling same host with >2 threads
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1593951&r1=1593950&r2=1593951&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon May 12 12:54:32 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development
+* NUTCH-1613 Timeouts in protocol-httpclient when crawling same host with >2 threads (brian44 via jnioche)
+
* NUTCH-1766 Generator to unlock crawldb and remove tempdir if generate job fails (Diaa via jnioche)
* NUTCH-207 Bandwidth target for fetcher rather than a thread count (jnioche)
Modified: nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=1593951&r1=1593950&r2=1593951&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java (original)
+++ nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java Mon May 12 12:54:32 2014
@@ -80,7 +80,7 @@ public class Http extends HttpBase {
private static boolean authRulesRead = false;
private static Configuration conf;
- int maxThreadsTotal = 10;
+ private int maxThreadsTotal = 10;
private String proxyUsername;
private String proxyPassword;
@@ -170,6 +170,10 @@ public class Http extends HttpBase {
params.setSendBufferSize(BUFFER_SIZE);
params.setReceiveBufferSize(BUFFER_SIZE);
params.setMaxTotalConnections(maxThreadsTotal);
+
+ //Also set max connections per host to maxThreadsTotal since all threads
+ //might be used to fetch from the same host - otherwise timeout errors can occur
+ params.setDefaultMaxConnectionsPerHost(maxThreadsTotal);
// executeMethod(HttpMethod) seems to ignore the connection timeout on the connection manager.
// set it explicitly on the HttpClient.