You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/11/21 18:55:30 UTC

svn commit: r1412214 - /manifoldcf/branches/CONNECTORS-120/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java

Author: kwright
Date: Wed Nov 21 17:55:29 2012
New Revision: 1412214

URL: http://svn.apache.org/viewvc?rev=1412214&view=rev
Log:
Override BROWSER_COMPATIBILITY cookie setting with our more lax version.

Modified:
    manifoldcf/branches/CONNECTORS-120/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java

Modified: manifoldcf/branches/CONNECTORS-120/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-120/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1412214&r1=1412213&r2=1412214&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-120/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java (original)
+++ manifoldcf/branches/CONNECTORS-120/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Wed Nov 21 17:55:29 2012
@@ -64,7 +64,15 @@ import org.apache.http.impl.cookie.Basic
 import org.apache.http.message.BasicNameValuePair;
 import org.apache.http.protocol.HTTP;
 import org.apache.http.client.entity.UrlEncodedFormEntity;
+import org.apache.http.cookie.CookieOrigin;
+import org.apache.http.cookie.ClientCookie;
+import org.apache.http.cookie.Cookie;
+import org.apache.http.impl.cookie.BasicPathHandler;
+import org.apache.http.impl.cookie.BrowserCompatSpec;
+import org.apache.http.cookie.CookieSpecFactory;
+import org.apache.http.cookie.CookieSpec;
 
+import org.apache.http.cookie.MalformedCookieException;
 import org.apache.http.conn.ConnectTimeoutException;
 import org.apache.http.client.RedirectException;
 import org.apache.http.client.CircularRedirectException;
@@ -1331,7 +1339,17 @@ public class ThrottledFetcher
 
         DefaultHttpClient localHttpClient = new DefaultHttpClient(connManager,params);
         localHttpClient.setRedirectStrategy(new DefaultRedirectStrategy());
-          
+        localHttpClient.getCookieSpecs().register(CookiePolicy.BROWSER_COMPATIBILITY, new CookieSpecFactory()
+          {
+
+            public CookieSpec newInstance(HttpParams params)
+            {
+              return new LaxBrowserCompatSpec();
+            }
+    
+          }
+        );
+
         // If there's a proxy, set that too.
         if (proxyHost != null && proxyHost.length() > 0)
         {
@@ -2332,6 +2350,28 @@ public class ThrottledFetcher
     }
   }
 
+  /** Class to override browser compatibility to make it not check cookie paths.  See CONNECTORS-97.
+  */
+  protected static class LaxBrowserCompatSpec extends BrowserCompatSpec
+  {
+
+    public LaxBrowserCompatSpec()
+    {
+      super();
+      registerAttribHandler(ClientCookie.PATH_ATTR, new BasicPathHandler()
+        {
+          @Override
+          public void validate(Cookie cookie, CookieOrigin origin) throws MalformedCookieException
+          {
+            // No validation
+          }
+              
+        }
+      );
+    }
+    
+  }
+
   /** This thread does the actual socket communication with the server.
   * It's set up so that it can be abandoned at shutdown time.
   *