You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/03/24 00:43:07 UTC

svn commit: r1580664 - /manifoldcf/branches/CONNECTORS-911/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java

Author: kwright
Date: Sun Mar 23 23:43:07 2014
New Revision: 1580664

URL: http://svn.apache.org/r1580664
Log:
Do some work, as yet incomplete, on revamping webconnector to be compatible with httpclient 4.3

Modified:
    manifoldcf/branches/CONNECTORS-911/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java

Modified: manifoldcf/branches/CONNECTORS-911/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-911/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1580664&r1=1580663&r2=1580664&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-911/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java (original)
+++ manifoldcf/branches/CONNECTORS-911/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Sun Mar 23 23:43:07 2014
@@ -33,15 +33,20 @@ import java.util.zip.GZIPInputStream;
 import java.util.concurrent.TimeUnit;
 import java.nio.charset.Charset;
 
-import org.apache.http.conn.ClientConnectionManager;
+import org.apache.http.conn.HttpClientConnectionManager;
 import org.apache.http.client.HttpClient;
+import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
+import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.http.protocol.HttpRequestExecutor;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.CredentialsProvider;
+import org.apache.http.impl.client.BasicCredentialsProvider;
+import org.apache.http.config.SocketConfig;
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.methods.HttpPost;
 import org.apache.http.NameValuePair;
-import org.apache.http.impl.conn.PoolingClientConnectionManager;
-import org.apache.http.conn.scheme.Scheme;
-import org.apache.http.conn.ssl.SSLSocketFactory;
-import org.apache.http.conn.ssl.AllowAllHostnameVerifier;
+import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
 import org.apache.http.conn.ssl.BrowserCompatHostnameVerifier;
 import org.apache.http.HttpEntity;
 import org.apache.http.HttpResponse;
@@ -49,21 +54,13 @@ import org.apache.http.auth.AuthScope;
 import org.apache.http.auth.NTCredentials;
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.methods.HttpRequestBase;
-import org.apache.http.impl.client.DefaultHttpClient;
-import org.apache.http.impl.client.AbstractHttpClient;
 import org.apache.http.impl.client.DefaultRedirectStrategy;
 import org.apache.http.util.EntityUtils;
-import org.apache.http.params.BasicHttpParams;
-import org.apache.http.params.HttpParams;
-import org.apache.http.params.CoreConnectionPNames;
 import org.apache.http.HttpStatus;
 import org.apache.http.HttpHost;
 import org.apache.http.Header;
 import org.apache.http.HeaderElement;
-import org.apache.http.conn.params.ConnRoutePNames;
 import org.apache.http.message.BasicHeader;
-import org.apache.http.client.params.ClientPNames;
-import org.apache.http.client.params.HttpClientParams;
 import org.apache.http.client.params.CookiePolicy;
 import org.apache.http.cookie.params.CookieSpecPNames;
 import org.apache.http.impl.cookie.BasicClientCookie;
@@ -257,9 +254,9 @@ public class ThrottledFetcher
     protected long expireTime = -1L;
 
     /** The http connection manager.  The pool is of size 1.  */
-    protected PoolingClientConnectionManager connManager = null;
+    protected HttpClientConnectionManager connManager = null;
     /** The http client object. */
-    protected AbstractHttpClient httpClient = null;
+    protected HttpClient httpClient = null;
     /** The method object */
     protected HttpRequestBase fetchMethod = null;
     /** The error trace, if any */
@@ -395,9 +392,8 @@ public class ThrottledFetcher
       throws ManifoldCFException, ServiceInterruption
     {
       // Set up scheme
-      SSLSocketFactory myFactory = new SSLSocketFactory(new InterruptibleSocketFactory(httpsSocketFactory,connectionTimeoutMilliseconds),
-        new AllowAllHostnameVerifier());
-      Scheme myHttpsProtocol = new Scheme("https", 443, myFactory);
+      SSLConnectionSocketFactory myFactory = new SSLConnectionSocketFactory(new InterruptibleSocketFactory(httpsSocketFactory,connectionTimeoutMilliseconds),
+        SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
 
       int hostPort;
       String displayedPort;
@@ -443,15 +439,9 @@ public class ThrottledFetcher
       
       if (connManager == null)
       {
-        PoolingClientConnectionManager localConnManager = new PoolingClientConnectionManager();
-        localConnManager.setMaxTotal(1);
-        localConnManager.setDefaultMaxPerRoute(1);
-        connManager = localConnManager;
+        connManager = new PoolingHttpClientConnectionManager();
       }
       
-      // Set up protocol registry
-      connManager.getSchemeRegistry().register(myHttpsProtocol);
-      
       long startTime = 0L;
       if (Logging.connectors.isDebugEnabled())
       {
@@ -459,10 +449,59 @@ public class ThrottledFetcher
         Logging.connectors.debug("WEB: Waiting for an HttpClient object");
       }
 
-      // If we already have an httpclient object, great.  Otherwise we have to get one, and initialize it with
-      // those parameters that aren't expected to change.
-      if (httpClient == null)
+      CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
+
+      // Set up authentication to use
+      if (authentication != null)
       {
+        if (Logging.connectors.isDebugEnabled())
+          Logging.connectors.debug("WEB: For "+myUrl+", discovered matching authentication credentials");
+        credentialsProvider.setCredentials(AuthScope.ANY,
+          authentication.makeCredentialsObject(host));
+      }
+
+      RequestConfig.Builder requestBuilder = RequestConfig.custom()
+        .setCircularRedirectsAllowed(true)
+        .setSocketTimeout(socketTimeoutMilliseconds)
+        .setStaleConnectionCheckEnabled(true)
+        .setExpectContinueEnabled(true)
+        .setConnectTimeout(connectionTimeoutMilliseconds)
+        .setConnectionRequestTimeout(socketTimeoutMilliseconds)
+        .setRedirectsEnabled(redirectOK);
+
+      // If there's a proxy, set that too.
+      if (proxyHost != null && proxyHost.length() > 0)
+      {
+        // Configure proxy authentication
+        if (proxyAuthUsername != null && proxyAuthUsername.length() > 0)
+        {
+          credentialsProvider.setCredentials(
+            new AuthScope(proxyHost, proxyPort),
+            new NTCredentials(proxyAuthUsername, (proxyAuthPassword==null)?"":proxyAuthPassword, currentHost, (proxyAuthDomain==null)?"":proxyAuthDomain));
+        }
+
+        HttpHost proxy = new HttpHost(proxyHost, proxyPort);
+
+        requestBuilder.setProxy(proxy);
+      }
+
+      httpClient = HttpClients.custom()
+        .setConnectionManager(connectionManager)
+        .setMaxConnTotal(1)
+        .setMaxConnPerRoute(1)
+        .disableAutomaticRetries()
+        .setDefaultRequestConfig(requestBuilder.build())
+        .setDefaultSocketConfig(SocketConfig.custom()
+          .setTcpNoDelay(true)
+          .setSoTimeout(socketTimeoutMilliseconds)
+          .build())
+        .setDefaultCredentialsProvider(credentialsProvider)
+        .setSSLSocketFactory(myFactory)
+        .setRequestExecutor(new HttpRequestExecutor(socketTimeoutMilliseconds))
+        .setRedirectStrategy(new DefaultRedirectStrategy())
+        .build();
+
+        /*
         BasicHttpParams params = new BasicHttpParams();
         params.setParameter(ClientPNames.DEFAULT_HOST,fetchHost);
         params.setBooleanParameter(CoreConnectionPNames.TCP_NODELAY,true);
@@ -497,39 +536,13 @@ public class ThrottledFetcher
           }
         );
 
-        // If there's a proxy, set that too.
-        if (proxyHost != null && proxyHost.length() > 0)
-        {
-          // Configure proxy authentication
-          if (proxyAuthUsername != null && proxyAuthUsername.length() > 0)
-          {
-            localHttpClient.getCredentialsProvider().setCredentials(
-              new AuthScope(proxyHost, proxyPort),
-              new NTCredentials(proxyAuthUsername, (proxyAuthPassword==null)?"":proxyAuthPassword, currentHost, (proxyAuthDomain==null)?"":proxyAuthDomain));
-          }
-
-          HttpHost proxy = new HttpHost(proxyHost, proxyPort);
-
-          localHttpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);
-        }
 
-        // Set up authentication to use
-        if (authentication != null)
-        {
-          if (Logging.connectors.isDebugEnabled())
-            Logging.connectors.debug("WEB: For "+myUrl+", discovered matching authentication credentials");
-          localHttpClient.getCredentialsProvider().setCredentials(AuthScope.ANY,
-            authentication.makeCredentialsObject(host));
-        }
           
         httpClient = localHttpClient;
-      }
+        */
 
 
       // Set the parameters we haven't keyed on (so these can change from request to request)
-      httpClient.getParams().setIntParameter(CoreConnectionPNames.SO_TIMEOUT,socketTimeoutMilliseconds);
-      httpClient.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT,connectionTimeoutMilliseconds);
-      httpClient.getParams().setBooleanParameter(ClientPNames.HANDLE_REDIRECTS,redirectOK);
 
       if (host != null)
       {