You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/12/16 23:42:41 UTC
svn commit: r1551389 - in /manifoldcf/trunk: CHANGES.txt
framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
Author: kwright
Date: Mon Dec 16 22:42:40 2013
New Revision: 1551389
URL: http://svn.apache.org/r1551389
Log:
Reset document schedule for pending documents on job start, always. Fix for CONNECTORS-841.
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1551389&r1=1551388&r2=1551389&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Mon Dec 16 22:42:40 2013
@@ -3,6 +3,9 @@ $Id$
======================= 1.5-dev =====================
+CONNECTORS-841: Always reset document schedules on job start.
+(David Morana, Karl Wright)
+
CONNECTORS-839: Fix our CloudSolrServer usage to use multipart
post instead of putting everything in the URL.
(Alessandro Benedetti, Raymond Wiker, Karl Wright)
Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1551389&r1=1551388&r2=1551389&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java Mon Dec 16 22:42:40 2013
@@ -5845,7 +5845,10 @@ public class JobManager implements IJobM
// (3) If the connector has some other model, we look at the start time. A start
// time of 0 implies a full scan, while any other start time implies an incremental
// scan.
-
+
+ // Always reset document schedules for those documents already pending!
+ jobQueue.resetPendingDocumentSchedules(jobID);
+
// Complete connector model is told everything, so no delete phase.
if (connectorModel == IRepositoryConnector.MODEL_ADD_CHANGE_DELETE)
{
Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java?rev=1551389&r1=1551388&r2=1551389&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java Mon Dec 16 22:42:40 2013
@@ -717,6 +717,28 @@ public class JobQueue extends org.apache
TrackerClass.noteJobChange(jobID,"Prepare full scan");
}
+ /** Reset schedule for all PENDINGPURGATORY entries.
+ *@param jobID is the job identifier.
+ */
+ public void resetPendingDocumentSchedules(Long jobID)
+ throws ManifoldCFException
+ {
+ HashMap map = new HashMap();
+ // Do not reset priorities here! They should all be blank at this point.
+ map.put(checkTimeField,new Long(0L));
+ map.put(checkActionField,actionToString(ACTION_RESCAN));
+ map.put(failTimeField,null);
+ map.put(failCountField,null);
+ ArrayList list = new ArrayList();
+ String query = buildConjunctionClause(list,new ClauseDescription[]{
+ new UnitaryClause(jobIDField,jobID),
+ new MultiClause(statusField,new Object[]{
+ statusToString(STATUS_PENDINGPURGATORY),
+ statusToString(STATUS_PENDING)})});
+ performUpdate(map,"WHERE "+query,list,null);
+ noteModifications(0,1,0);
+ }
+
/** For ADD_CHANGE_DELETE jobs where the specifications have been changed,
* we must reconsider every existing document. So reconsider them all.
*@param jobID is the job identifier.