You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/04/02 16:25:10 UTC
svn commit: r1584050 - in /manifoldcf/branches/CONNECTORS-911-2: ./
connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
Author: kwright
Date: Wed Apr 2 14:25:10 2014
New Revision: 1584050
URL: http://svn.apache.org/r1584050
Log:
Merge in contents of CONNECTORS-911 branch
Modified:
manifoldcf/branches/CONNECTORS-911-2/ (props changed)
manifoldcf/branches/CONNECTORS-911-2/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
Propchange: manifoldcf/branches/CONNECTORS-911-2/
------------------------------------------------------------------------------
Merged /manifoldcf/branches/CONNECTORS-911:r1580649-1584049
Modified: manifoldcf/branches/CONNECTORS-911-2/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-911-2/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1584050&r1=1584049&r2=1584050&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-911-2/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java (original)
+++ manifoldcf/branches/CONNECTORS-911-2/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Wed Apr 2 14:25:10 2014
@@ -33,15 +33,20 @@ import java.util.zip.GZIPInputStream;
import java.util.concurrent.TimeUnit;
import java.nio.charset.Charset;
-import org.apache.http.conn.ClientConnectionManager;
+import org.apache.http.conn.HttpClientConnectionManager;
import org.apache.http.client.HttpClient;
+import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
+import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.http.protocol.HttpRequestExecutor;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.CredentialsProvider;
+import org.apache.http.impl.client.BasicCredentialsProvider;
+import org.apache.http.config.SocketConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.NameValuePair;
-import org.apache.http.impl.conn.PoolingClientConnectionManager;
-import org.apache.http.conn.scheme.Scheme;
-import org.apache.http.conn.ssl.SSLSocketFactory;
-import org.apache.http.conn.ssl.AllowAllHostnameVerifier;
+import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.BrowserCompatHostnameVerifier;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
@@ -49,21 +54,13 @@ import org.apache.http.auth.AuthScope;
import org.apache.http.auth.NTCredentials;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpRequestBase;
-import org.apache.http.impl.client.DefaultHttpClient;
-import org.apache.http.impl.client.AbstractHttpClient;
import org.apache.http.impl.client.DefaultRedirectStrategy;
import org.apache.http.util.EntityUtils;
-import org.apache.http.params.BasicHttpParams;
-import org.apache.http.params.HttpParams;
-import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.HttpStatus;
import org.apache.http.HttpHost;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
-import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.message.BasicHeader;
-import org.apache.http.client.params.ClientPNames;
-import org.apache.http.client.params.HttpClientParams;
import org.apache.http.client.params.CookiePolicy;
import org.apache.http.cookie.params.CookieSpecPNames;
import org.apache.http.impl.cookie.BasicClientCookie;
@@ -257,9 +254,9 @@ public class ThrottledFetcher
protected long expireTime = -1L;
/** The http connection manager. The pool is of size 1. */
- protected PoolingClientConnectionManager connManager = null;
+ protected HttpClientConnectionManager connManager = null;
/** The http client object. */
- protected AbstractHttpClient httpClient = null;
+ protected HttpClient httpClient = null;
/** The method object */
protected HttpRequestBase fetchMethod = null;
/** The error trace, if any */
@@ -395,9 +392,8 @@ public class ThrottledFetcher
throws ManifoldCFException, ServiceInterruption
{
// Set up scheme
- SSLSocketFactory myFactory = new SSLSocketFactory(new InterruptibleSocketFactory(httpsSocketFactory,connectionTimeoutMilliseconds),
- new AllowAllHostnameVerifier());
- Scheme myHttpsProtocol = new Scheme("https", 443, myFactory);
+ SSLConnectionSocketFactory myFactory = new SSLConnectionSocketFactory(new InterruptibleSocketFactory(httpsSocketFactory,connectionTimeoutMilliseconds),
+ SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
int hostPort;
String displayedPort;
@@ -443,15 +439,9 @@ public class ThrottledFetcher
if (connManager == null)
{
- PoolingClientConnectionManager localConnManager = new PoolingClientConnectionManager();
- localConnManager.setMaxTotal(1);
- localConnManager.setDefaultMaxPerRoute(1);
- connManager = localConnManager;
+ connManager = new PoolingHttpClientConnectionManager();
}
- // Set up protocol registry
- connManager.getSchemeRegistry().register(myHttpsProtocol);
-
long startTime = 0L;
if (Logging.connectors.isDebugEnabled())
{
@@ -459,10 +449,59 @@ public class ThrottledFetcher
Logging.connectors.debug("WEB: Waiting for an HttpClient object");
}
- // If we already have an httpclient object, great. Otherwise we have to get one, and initialize it with
- // those parameters that aren't expected to change.
- if (httpClient == null)
+ CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
+
+ // Set up authentication to use
+ if (authentication != null)
{
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("WEB: For "+myUrl+", discovered matching authentication credentials");
+ credentialsProvider.setCredentials(AuthScope.ANY,
+ authentication.makeCredentialsObject(host));
+ }
+
+ RequestConfig.Builder requestBuilder = RequestConfig.custom()
+ .setCircularRedirectsAllowed(true)
+ .setSocketTimeout(socketTimeoutMilliseconds)
+ .setStaleConnectionCheckEnabled(true)
+ .setExpectContinueEnabled(true)
+ .setConnectTimeout(connectionTimeoutMilliseconds)
+ .setConnectionRequestTimeout(socketTimeoutMilliseconds)
+ .setRedirectsEnabled(redirectOK);
+
+ // If there's a proxy, set that too.
+ if (proxyHost != null && proxyHost.length() > 0)
+ {
+ // Configure proxy authentication
+ if (proxyAuthUsername != null && proxyAuthUsername.length() > 0)
+ {
+ credentialsProvider.setCredentials(
+ new AuthScope(proxyHost, proxyPort),
+ new NTCredentials(proxyAuthUsername, (proxyAuthPassword==null)?"":proxyAuthPassword, currentHost, (proxyAuthDomain==null)?"":proxyAuthDomain));
+ }
+
+ HttpHost proxy = new HttpHost(proxyHost, proxyPort);
+
+ requestBuilder.setProxy(proxy);
+ }
+
+ httpClient = HttpClients.custom()
+ .setConnectionManager(connectionManager)
+ .setMaxConnTotal(1)
+ .setMaxConnPerRoute(1)
+ .disableAutomaticRetries()
+ .setDefaultRequestConfig(requestBuilder.build())
+ .setDefaultSocketConfig(SocketConfig.custom()
+ .setTcpNoDelay(true)
+ .setSoTimeout(socketTimeoutMilliseconds)
+ .build())
+ .setDefaultCredentialsProvider(credentialsProvider)
+ .setSSLSocketFactory(myFactory)
+ .setRequestExecutor(new HttpRequestExecutor(socketTimeoutMilliseconds))
+ .setRedirectStrategy(new DefaultRedirectStrategy())
+ .build();
+
+ /*
BasicHttpParams params = new BasicHttpParams();
params.setParameter(ClientPNames.DEFAULT_HOST,fetchHost);
params.setBooleanParameter(CoreConnectionPNames.TCP_NODELAY,true);
@@ -497,39 +536,13 @@ public class ThrottledFetcher
}
);
- // If there's a proxy, set that too.
- if (proxyHost != null && proxyHost.length() > 0)
- {
- // Configure proxy authentication
- if (proxyAuthUsername != null && proxyAuthUsername.length() > 0)
- {
- localHttpClient.getCredentialsProvider().setCredentials(
- new AuthScope(proxyHost, proxyPort),
- new NTCredentials(proxyAuthUsername, (proxyAuthPassword==null)?"":proxyAuthPassword, currentHost, (proxyAuthDomain==null)?"":proxyAuthDomain));
- }
-
- HttpHost proxy = new HttpHost(proxyHost, proxyPort);
-
- localHttpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);
- }
- // Set up authentication to use
- if (authentication != null)
- {
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("WEB: For "+myUrl+", discovered matching authentication credentials");
- localHttpClient.getCredentialsProvider().setCredentials(AuthScope.ANY,
- authentication.makeCredentialsObject(host));
- }
httpClient = localHttpClient;
- }
+ */
// Set the parameters we haven't keyed on (so these can change from request to request)
- httpClient.getParams().setIntParameter(CoreConnectionPNames.SO_TIMEOUT,socketTimeoutMilliseconds);
- httpClient.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT,connectionTimeoutMilliseconds);
- httpClient.getParams().setBooleanParameter(ClientPNames.HANDLE_REDIRECTS,redirectOK);
if (host != null)
{