You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/12/04 13:37:37 UTC
svn commit: r1416915 - in
/manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler:
CookieManager.java ThrottledFetcher.java
Author: kwright
Date: Tue Dec 4 12:37:36 2012
New Revision: 1416915
URL: http://svn.apache.org/viewvc?rev=1416915&view=rev
Log:
Fix cookie handling by override httpcomponents cookie store.
Modified:
manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/CookieManager.java
manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
Modified: manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/CookieManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/CookieManager.java?rev=1416915&r1=1416914&r2=1416915&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/CookieManager.java (original)
+++ manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/CookieManager.java Tue Dec 4 12:37:36 2012
@@ -245,7 +245,8 @@ public class CookieManager extends org.a
Date expirationDate = c.getExpiryDate();
if (expirationDate != null)
map.put(expirationDateField,new Long(expirationDate.getTime()));
- map.put(discardField,booleanToString(!c.isPersistent()));
+ //map.put(discardField,booleanToString(!c.isPersistent()));
+ map.put(discardField,booleanToString(false));
String commentURL = c.getCommentURL();
if (commentURL != null && commentURL.length() > 0)
map.put(commentURLField,commentURL);
Modified: manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1416915&r1=1416914&r2=1416915&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java (original)
+++ manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Tue Dec 4 12:37:36 2012
@@ -72,11 +72,11 @@ import org.apache.http.impl.cookie.Basic
import org.apache.http.impl.cookie.BrowserCompatSpec;
import org.apache.http.cookie.CookieSpecFactory;
import org.apache.http.cookie.CookieSpec;
-import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.client.CookieStore;
import org.apache.http.protocol.HttpContext;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.client.protocol.ClientContext;
+import org.apache.http.cookie.CookieIdentityComparator;
import org.apache.http.cookie.MalformedCookieException;
import org.apache.http.conn.ConnectTimeoutException;
@@ -1495,7 +1495,7 @@ public class ThrottledFetcher
fetchMethod.setHeader(new BasicHeader("From",from));
// Use a custom cookie store
- CookieStore cookieStore = new BasicCookieStore();
+ CookieStore cookieStore = new OurBasicCookieStore();
// If we have any cookies to set, set them.
if (loginCookies != null)
{
@@ -1514,6 +1514,8 @@ public class ThrottledFetcher
// Copy out the current cookies, in case the fetch fails
lastFetchCookies = loginCookies;
+ //httpClient.setCookieStore(cookieStore);
+
// Create the thread
methodThread = new ExecuteMethodThread(this, httpClient, fetchMethod, cookieStore);
try
@@ -2432,8 +2434,7 @@ public class ThrottledFetcher
{
HttpContext context = new BasicHttpContext();
context.setAttribute(ClientContext.COOKIE_STORE,cookieStore);
-
- response = httpClient.execute(executeMethod, context);
+ response = httpClient.execute(executeMethod,context);
}
catch (java.net.SocketTimeoutException e)
{
@@ -2700,4 +2701,98 @@ public class ThrottledFetcher
}
+ protected static class OurBasicCookieStore implements CookieStore, Serializable {
+
+ private static final long serialVersionUID = -7581093305228232025L;
+
+ private final TreeSet<Cookie> cookies;
+
+ public OurBasicCookieStore() {
+ super();
+ this.cookies = new TreeSet<Cookie>(new CookieIdentityComparator());
+ }
+
+ /**
+ * Adds an {@link Cookie HTTP cookie}, replacing any existing equivalent cookies.
+ * If the given cookie has already expired it will not be added, but existing
+ * values will still be removed.
+ *
+ * @param cookie the {@link Cookie cookie} to be added
+ *
+ * @see #addCookies(Cookie[])
+ *
+ */
+ public synchronized void addCookie(Cookie cookie) {
+ if (cookie != null) {
+ // first remove any old cookie that is equivalent
+ cookies.remove(cookie);
+ cookies.add(cookie);
+ }
+ }
+
+ /**
+ * Adds an array of {@link Cookie HTTP cookies}. Cookies are added individually and
+ * in the given array order. If any of the given cookies has already expired it will
+ * not be added, but existing values will still be removed.
+ *
+ * @param cookies the {@link Cookie cookies} to be added
+ *
+ * @see #addCookie(Cookie)
+ *
+ */
+ public synchronized void addCookies(Cookie[] cookies) {
+ if (cookies != null) {
+ for (Cookie cooky : cookies) {
+ this.addCookie(cooky);
+ }
+ }
+ }
+
+ /**
+ * Returns an immutable array of {@link Cookie cookies} that this HTTP
+ * state currently contains.
+ *
+ * @return an array of {@link Cookie cookies}.
+ */
+ public synchronized List<Cookie> getCookies() {
+ //create defensive copy so it won't be concurrently modified
+ return new ArrayList<Cookie>(cookies);
+ }
+
+ /**
+ * Removes all of {@link Cookie cookies} in this HTTP state
+ * that have expired by the specified {@link java.util.Date date}.
+ *
+ * @return true if any cookies were purged.
+ *
+ * @see Cookie#isExpired(Date)
+ */
+ public synchronized boolean clearExpired(final Date date) {
+ if (date == null) {
+ return false;
+ }
+ boolean removed = false;
+ for (Iterator<Cookie> it = cookies.iterator(); it.hasNext();) {
+ if (it.next().isExpired(date)) {
+ it.remove();
+ removed = true;
+ }
+ }
+ return removed;
+ }
+
+ /**
+ * Clears all cookies.
+ */
+ public synchronized void clear() {
+ cookies.clear();
+ }
+
+ @Override
+ public synchronized String toString() {
+ return cookies.toString();
+ }
+
+ }
+
}