You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/04/26 12:48:18 UTC
svn commit: r1476129 - in /manifoldcf/trunk: ./
connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/
connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/
Author: kwright
Date: Fri Apr 26 10:48:17 2013
New Revision: 1476129
URL: http://svn.apache.org/r1476129
Log:
Real fix for CONNECTORS-279.
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1476129&r1=1476128&r2=1476129&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Fri Apr 26 10:48:17 2013
@@ -3,9 +3,10 @@ $Id$
======================= 1.2-dev =====================
-CONNECTORS-679: Web connector hangs on some JVMs due to faulty
-optimizations. Added volatile qualifiers for some of the key variables
-that seem to be involved in this, and a test.
+CONNECTORS-679: Web connector hangs during throttling. Reason
+appears to be that it is possible to interrupt the beginRead() method
+after it goes into "obtain estimate" mode. Added code to make it clean
+up in that case. Also applied to RSS connector.
(Erlend Garåsen, Karl Wright)
CONNECTORS-677: Close body streams where required.
Modified: manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java?rev=1476129&r1=1476128&r2=1476129&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java (original)
+++ manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ThrottledFetcher.java Fri Apr 26 10:48:17 2013
@@ -1123,34 +1123,51 @@ public class ThrottledFetcher
}
}
- long waitTime = 0L;
- synchronized (this)
+ // It is possible for the following code to get interrupted. If that happens,
+ // we have to unstick the threads that are waiting on the estimate!
+ boolean finished = false;
+ try
{
- // Add these bytes to the estimated total
- totalBytesRead += (long)byteCount;
+ long waitTime = 0L;
+ synchronized (this)
+ {
+ // Add these bytes to the estimated total
+ totalBytesRead += (long)byteCount;
- // Estimate the time this read will take, and wait accordingly
- long estimatedTime = (long)(rateEstimate * (double)byteCount);
+ // Estimate the time this read will take, and wait accordingly
+ long estimatedTime = (long)(rateEstimate * (double)byteCount);
- // Figure out how long the total byte count should take, to meet the constraint
- long desiredEndTime = seriesStartTime + (long)(((double)totalBytesRead) * minimumMillisecondsPerBytePerServer);
+ // Figure out how long the total byte count should take, to meet the constraint
+ long desiredEndTime = seriesStartTime + (long)(((double)totalBytesRead) * minimumMillisecondsPerBytePerServer);
- // The wait time is the different between our desired end time, minus the estimated time to read the data, and the
- // current time. But it can't be negative.
- waitTime = (desiredEndTime - estimatedTime) - currentTime;
- }
+ // The wait time is the different between our desired end time, minus the estimated time to read the data, and the
+ // current time. But it can't be negative.
+ waitTime = (desiredEndTime - estimatedTime) - currentTime;
+ }
+
+ if (waitTime > 0L)
+ {
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("RSS: Performing a read wait on server '"+serverName+"' of "+
+ new Long(waitTime).toString()+" ms.");
+ ManifoldCF.sleep(waitTime);
+ }
- if (waitTime > 0L)
+ //if (Logging.connectors.isTraceEnabled())
+ // Logging.connectors.trace("RSS: Begin read noted for '"+serverName+"'");
+ finished = true;
+ }
+ finally
{
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("RSS: Performing a read wait on server '"+serverName+"' of "+
- new Long(waitTime).toString()+" ms.");
- ManifoldCF.sleep(waitTime);
+ if (!finished)
+ {
+ if (estimateInProgress)
+ {
+ estimateInProgress = false;
+ firstChunkLock.notifyAll();
+ }
+ }
}
-
- //if (Logging.connectors.isTraceEnabled())
- // Logging.connectors.trace("RSS: Begin read noted for '"+serverName+"'");
-
}
/** Note the end of an individual read from the server. Call this just after an individual read completes.
Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1476129&r1=1476128&r2=1476129&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Fri Apr 26 10:48:17 2013
@@ -832,31 +832,48 @@ public class ThrottledFetcher
}
}
- long waitTime = 0L;
- synchronized (this)
+ // It is possible for the following code to get interrupted. If that happens,
+ // we have to unstick the threads that are waiting on the estimate!
+ boolean finished = false;
+ try
{
- // Add these bytes to the estimated total
- totalBytesRead += (long)byteCount;
+ long waitTime = 0L;
+ synchronized (this)
+ {
+ // Add these bytes to the estimated total
+ totalBytesRead += (long)byteCount;
- // Estimate the time this read will take, and wait accordingly
- long estimatedTime = (long)(rateEstimate * (double)byteCount);
+ // Estimate the time this read will take, and wait accordingly
+ long estimatedTime = (long)(rateEstimate * (double)byteCount);
- // Figure out how long the total byte count should take, to meet the constraint
- long desiredEndTime = seriesStartTime + (long)(((double)totalBytesRead) * minimumMillisecondsPerBytePerServer);
+ // Figure out how long the total byte count should take, to meet the constraint
+ long desiredEndTime = seriesStartTime + (long)(((double)totalBytesRead) * minimumMillisecondsPerBytePerServer);
- // The wait time is the different between our desired end time, minus the estimated time to read the data, and the
- // current time. But it can't be negative.
- waitTime = (desiredEndTime - estimatedTime) - currentTime;
- }
+ // The wait time is the different between our desired end time, minus the estimated time to read the data, and the
+ // current time. But it can't be negative.
+ waitTime = (desiredEndTime - estimatedTime) - currentTime;
+ }
- if (waitTime > 0L)
+ if (waitTime > 0L)
+ {
+ if (Logging.connectors.isDebugEnabled())
+ Logging.connectors.debug("WEB: Performing a read wait on bin '"+binName+"' of "+
+ new Long(waitTime).toString()+" ms.");
+ ManifoldCF.sleep(waitTime);
+ }
+ finished = true;
+ }
+ finally
{
- if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("WEB: Performing a read wait on bin '"+binName+"' of "+
- new Long(waitTime).toString()+" ms.");
- ManifoldCF.sleep(waitTime);
+ if (!finished)
+ {
+ if (estimateInProgress)
+ {
+ estimateInProgress = false;
+ firstChunkLock.notifyAll();
+ }
+ }
}
-
}
/** Note the end of an individual read from the server. Call this just after an individual read completes.