You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/11/21 18:55:30 UTC
svn commit: r1412214 -
/manifoldcf/branches/CONNECTORS-120/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
Author: kwright
Date: Wed Nov 21 17:55:29 2012
New Revision: 1412214
URL: http://svn.apache.org/viewvc?rev=1412214&view=rev
Log:
Override BROWSER_COMPATIBILITY cookie setting with our more lax version.
Modified:
manifoldcf/branches/CONNECTORS-120/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
Modified: manifoldcf/branches/CONNECTORS-120/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-120/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1412214&r1=1412213&r2=1412214&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-120/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java (original)
+++ manifoldcf/branches/CONNECTORS-120/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Wed Nov 21 17:55:29 2012
@@ -64,7 +64,15 @@ import org.apache.http.impl.cookie.Basic
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
import org.apache.http.client.entity.UrlEncodedFormEntity;
+import org.apache.http.cookie.CookieOrigin;
+import org.apache.http.cookie.ClientCookie;
+import org.apache.http.cookie.Cookie;
+import org.apache.http.impl.cookie.BasicPathHandler;
+import org.apache.http.impl.cookie.BrowserCompatSpec;
+import org.apache.http.cookie.CookieSpecFactory;
+import org.apache.http.cookie.CookieSpec;
+import org.apache.http.cookie.MalformedCookieException;
import org.apache.http.conn.ConnectTimeoutException;
import org.apache.http.client.RedirectException;
import org.apache.http.client.CircularRedirectException;
@@ -1331,7 +1339,17 @@ public class ThrottledFetcher
DefaultHttpClient localHttpClient = new DefaultHttpClient(connManager,params);
localHttpClient.setRedirectStrategy(new DefaultRedirectStrategy());
-
+ localHttpClient.getCookieSpecs().register(CookiePolicy.BROWSER_COMPATIBILITY, new CookieSpecFactory()
+ {
+
+ public CookieSpec newInstance(HttpParams params)
+ {
+ return new LaxBrowserCompatSpec();
+ }
+
+ }
+ );
+
// If there's a proxy, set that too.
if (proxyHost != null && proxyHost.length() > 0)
{
@@ -2332,6 +2350,28 @@ public class ThrottledFetcher
}
}
+ /** Class to override browser compatibility to make it not check cookie paths. See CONNECTORS-97.
+ */
+ protected static class LaxBrowserCompatSpec extends BrowserCompatSpec
+ {
+
+ public LaxBrowserCompatSpec()
+ {
+ super();
+ registerAttribHandler(ClientCookie.PATH_ATTR, new BasicPathHandler()
+ {
+ @Override
+ public void validate(Cookie cookie, CookieOrigin origin) throws MalformedCookieException
+ {
+ // No validation
+ }
+
+ }
+ );
+ }
+
+ }
+
/** This thread does the actual socket communication with the server.
* It's set up so that it can be abandoned at shutdown time.
*