You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/04/26 12:48:18 UTC

svn commit: r1476129 - in /manifoldcf/trunk: ./ connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/

Author: kwright
Date: Fri Apr 26 10:48:17 2013
New Revision: 1476129

URL: http://svn.apache.org/r1476129
Log:
Real fix for CONNECTORS-279.

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1476129&r1=1476128&r2=1476129&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Fri Apr 26 10:48:17 2013
@@ -3,9 +3,10 @@ $Id$
 
 ======================= 1.2-dev =====================
 
-CONNECTORS-679: Web connector hangs on some JVMs due to faulty
-optimizations.  Added volatile qualifiers for some of the key variables
-that seem to be involved in this, and a test.
+CONNECTORS-679: Web connector hangs during throttling.  Reason
+appears to be that it is possible to interrupt the beginRead() method
+after it goes into "obtain estimate" mode.  Added code to make it clean
+up in that case.  Also applied to RSS connector.
 (Erlend Garåsen, Karl Wright)
 
 CONNECTORS-677: Close body streams where required.

Modified: manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java?rev=1476129&r1=1476128&r2=1476129&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java (original)
+++ manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java Fri Apr 26 10:48:17 2013
@@ -1123,34 +1123,51 @@ public class ThrottledFetcher
         }
       }
 
-      long waitTime = 0L;
-      synchronized (this)
+      // It is possible for the following code to get interrupted.  If that happens,
+      // we have to unstick the threads that are waiting on the estimate!
+      boolean finished = false;
+      try
       {
-        // Add these bytes to the estimated total
-        totalBytesRead += (long)byteCount;
+        long waitTime = 0L;
+        synchronized (this)
+        {
+          // Add these bytes to the estimated total
+          totalBytesRead += (long)byteCount;
 
-        // Estimate the time this read will take, and wait accordingly
-        long estimatedTime = (long)(rateEstimate * (double)byteCount);
+          // Estimate the time this read will take, and wait accordingly
+          long estimatedTime = (long)(rateEstimate * (double)byteCount);
 
-        // Figure out how long the total byte count should take, to meet the constraint
-        long desiredEndTime = seriesStartTime + (long)(((double)totalBytesRead) * minimumMillisecondsPerBytePerServer);
+          // Figure out how long the total byte count should take, to meet the constraint
+          long desiredEndTime = seriesStartTime + (long)(((double)totalBytesRead) * minimumMillisecondsPerBytePerServer);
 
-        // The wait time is the different between our desired end time, minus the estimated time to read the data, and the
-        // current time.  But it can't be negative.
-        waitTime = (desiredEndTime - estimatedTime) - currentTime;
-      }
+          // The wait time is the different between our desired end time, minus the estimated time to read the data, and the
+          // current time.  But it can't be negative.
+          waitTime = (desiredEndTime - estimatedTime) - currentTime;
+        }
+
+        if (waitTime > 0L)
+        {
+          if (Logging.connectors.isDebugEnabled())
+            Logging.connectors.debug("RSS: Performing a read wait on server '"+serverName+"' of "+
+            new Long(waitTime).toString()+" ms.");
+          ManifoldCF.sleep(waitTime);
+        }
 
-      if (waitTime > 0L)
+        //if (Logging.connectors.isTraceEnabled())
+        //      Logging.connectors.trace("RSS: Begin read noted for '"+serverName+"'");
+        finished = true;
+      }
+      finally
       {
-        if (Logging.connectors.isDebugEnabled())
-          Logging.connectors.debug("RSS: Performing a read wait on server '"+serverName+"' of "+
-          new Long(waitTime).toString()+" ms.");
-        ManifoldCF.sleep(waitTime);
+        if (!finished)
+        {
+          if (estimateInProgress)
+          {
+            estimateInProgress = false;
+            firstChunkLock.notifyAll();
+          }
+        }
       }
-
-      //if (Logging.connectors.isTraceEnabled())
-      //      Logging.connectors.trace("RSS: Begin read noted for '"+serverName+"'");
-
     }
 
     /** Note the end of an individual read from the server.  Call this just after an individual read completes.

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1476129&r1=1476128&r2=1476129&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Fri Apr 26 10:48:17 2013
@@ -832,31 +832,48 @@ public class ThrottledFetcher
         }
       }
 
-      long waitTime = 0L;
-      synchronized (this)
+      // It is possible for the following code to get interrupted.  If that happens,
+      // we have to unstick the threads that are waiting on the estimate!
+      boolean finished = false;
+      try
       {
-        // Add these bytes to the estimated total
-        totalBytesRead += (long)byteCount;
+        long waitTime = 0L;
+        synchronized (this)
+        {
+          // Add these bytes to the estimated total
+          totalBytesRead += (long)byteCount;
 
-        // Estimate the time this read will take, and wait accordingly
-        long estimatedTime = (long)(rateEstimate * (double)byteCount);
+          // Estimate the time this read will take, and wait accordingly
+          long estimatedTime = (long)(rateEstimate * (double)byteCount);
 
-        // Figure out how long the total byte count should take, to meet the constraint
-        long desiredEndTime = seriesStartTime + (long)(((double)totalBytesRead) * minimumMillisecondsPerBytePerServer);
+          // Figure out how long the total byte count should take, to meet the constraint
+          long desiredEndTime = seriesStartTime + (long)(((double)totalBytesRead) * minimumMillisecondsPerBytePerServer);
 
-        // The wait time is the different between our desired end time, minus the estimated time to read the data, and the
-        // current time.  But it can't be negative.
-        waitTime = (desiredEndTime - estimatedTime) - currentTime;
-      }
+          // The wait time is the different between our desired end time, minus the estimated time to read the data, and the
+          // current time.  But it can't be negative.
+          waitTime = (desiredEndTime - estimatedTime) - currentTime;
+        }
 
-      if (waitTime > 0L)
+        if (waitTime > 0L)
+        {
+          if (Logging.connectors.isDebugEnabled())
+            Logging.connectors.debug("WEB: Performing a read wait on bin '"+binName+"' of "+
+            new Long(waitTime).toString()+" ms.");
+          ManifoldCF.sleep(waitTime);
+        }
+        finished = true;
+      }
+      finally
       {
-        if (Logging.connectors.isDebugEnabled())
-          Logging.connectors.debug("WEB: Performing a read wait on bin '"+binName+"' of "+
-          new Long(waitTime).toString()+" ms.");
-        ManifoldCF.sleep(waitTime);
+        if (!finished)
+        {
+          if (estimateInProgress)
+          {
+            estimateInProgress = false;
+            firstChunkLock.notifyAll();
+          }
+        }
       }
-
     }
 
     /** Note the end of an individual read from the server.  Call this just after an individual read completes.