You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/12/04 13:37:37 UTC

svn commit: r1416915 - in /manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler: CookieManager.java ThrottledFetcher.java

Author: kwright
Date: Tue Dec  4 12:37:36 2012
New Revision: 1416915

URL: http://svn.apache.org/viewvc?rev=1416915&view=rev
Log:
Fix cookie handling by override httpcomponents cookie store.

Modified:
    manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/CookieManager.java
    manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java

Modified: manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/CookieManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/CookieManager.java?rev=1416915&r1=1416914&r2=1416915&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/CookieManager.java (original)
+++ manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/CookieManager.java Tue Dec  4 12:37:36 2012
@@ -245,7 +245,8 @@ public class CookieManager extends org.a
           Date expirationDate = c.getExpiryDate();
           if (expirationDate != null)
             map.put(expirationDateField,new Long(expirationDate.getTime()));
-          map.put(discardField,booleanToString(!c.isPersistent()));
+          //map.put(discardField,booleanToString(!c.isPersistent()));
+          map.put(discardField,booleanToString(false));
           String commentURL = c.getCommentURL();
           if (commentURL != null && commentURL.length() > 0)
             map.put(commentURLField,commentURL);

Modified: manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1416915&r1=1416914&r2=1416915&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java (original)
+++ manifoldcf/branches/CONNECTORS-120-1/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Tue Dec  4 12:37:36 2012
@@ -72,11 +72,11 @@ import org.apache.http.impl.cookie.Basic
 import org.apache.http.impl.cookie.BrowserCompatSpec;
 import org.apache.http.cookie.CookieSpecFactory;
 import org.apache.http.cookie.CookieSpec;
-import org.apache.http.impl.client.BasicCookieStore;
 import org.apache.http.client.CookieStore;
 import org.apache.http.protocol.HttpContext;
 import org.apache.http.protocol.BasicHttpContext;
 import org.apache.http.client.protocol.ClientContext;
+import org.apache.http.cookie.CookieIdentityComparator;
 
 import org.apache.http.cookie.MalformedCookieException;
 import org.apache.http.conn.ConnectTimeoutException;
@@ -1495,7 +1495,7 @@ public class ThrottledFetcher
       fetchMethod.setHeader(new BasicHeader("From",from));
         
       // Use a custom cookie store
-      CookieStore cookieStore = new BasicCookieStore();
+      CookieStore cookieStore = new OurBasicCookieStore();
       // If we have any cookies to set, set them.
       if (loginCookies != null)
       {
@@ -1514,6 +1514,8 @@ public class ThrottledFetcher
       // Copy out the current cookies, in case the fetch fails
       lastFetchCookies = loginCookies;
 
+      //httpClient.setCookieStore(cookieStore);
+      
       // Create the thread
       methodThread = new ExecuteMethodThread(this, httpClient, fetchMethod, cookieStore);
       try
@@ -2432,8 +2434,7 @@ public class ThrottledFetcher
               {
                 HttpContext context = new BasicHttpContext();
                 context.setAttribute(ClientContext.COOKIE_STORE,cookieStore);
-
-                response = httpClient.execute(executeMethod, context);
+                response = httpClient.execute(executeMethod,context);
               }
               catch (java.net.SocketTimeoutException e)
               {
@@ -2700,4 +2701,98 @@ public class ThrottledFetcher
 
   }
 
+  protected static class OurBasicCookieStore implements CookieStore, Serializable {
+
+    private static final long serialVersionUID = -7581093305228232025L;
+
+    private final TreeSet<Cookie> cookies;
+
+    public OurBasicCookieStore() {
+      super();
+      this.cookies = new TreeSet<Cookie>(new CookieIdentityComparator());
+    }
+
+    /**
+     * Adds an {@link Cookie HTTP cookie}, replacing any existing equivalent cookies.
+     * If the given cookie has already expired it will not be added, but existing
+     * values will still be removed.
+     *
+     * @param cookie the {@link Cookie cookie} to be added
+     *
+     * @see #addCookies(Cookie[])
+     *
+     */
+    public synchronized void addCookie(Cookie cookie) {
+      if (cookie != null) {
+        // first remove any old cookie that is equivalent
+        cookies.remove(cookie);
+        cookies.add(cookie);
+      }
+    }
+
+    /**
+     * Adds an array of {@link Cookie HTTP cookies}. Cookies are added individually and
+     * in the given array order. If any of the given cookies has already expired it will
+     * not be added, but existing values will still be removed.
+     *
+     * @param cookies the {@link Cookie cookies} to be added
+     *
+     * @see #addCookie(Cookie)
+     *
+     */
+    public synchronized void addCookies(Cookie[] cookies) {
+      if (cookies != null) {
+        for (Cookie cooky : cookies) {
+          this.addCookie(cooky);
+        }
+      }
+    }
+
+    /**
+     * Returns an immutable array of {@link Cookie cookies} that this HTTP
+     * state currently contains.
+     *
+     * @return an array of {@link Cookie cookies}.
+     */
+    public synchronized List<Cookie> getCookies() {
+      //create defensive copy so it won't be concurrently modified
+      return new ArrayList<Cookie>(cookies);
+    }
+
+    /**
+     * Removes all of {@link Cookie cookies} in this HTTP state
+     * that have expired by the specified {@link java.util.Date date}.
+     *
+     * @return true if any cookies were purged.
+     *
+     * @see Cookie#isExpired(Date)
+     */
+    public synchronized boolean clearExpired(final Date date) {
+      if (date == null) {
+        return false;
+      }
+      boolean removed = false;
+      for (Iterator<Cookie> it = cookies.iterator(); it.hasNext();) {
+        if (it.next().isExpired(date)) {
+          it.remove();
+            removed = true;
+        }
+      }
+      return removed;
+    }
+
+    /**
+     * Clears all cookies.
+     */
+    public synchronized void clear() {
+      cookies.clear();
+    }
+
+    @Override
+    public synchronized String toString() {
+      return cookies.toString();
+    }
+
+  }
+
 }