You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/12/16 23:42:41 UTC

svn commit: r1551389 - in /manifoldcf/trunk: CHANGES.txt framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java

Author: kwright
Date: Mon Dec 16 22:42:40 2013
New Revision: 1551389

URL: http://svn.apache.org/r1551389
Log:
Reset document schedule for pending documents on job start, always.  Fix for CONNECTORS-841.

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
    manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1551389&r1=1551388&r2=1551389&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Mon Dec 16 22:42:40 2013
@@ -3,6 +3,9 @@ $Id$
 
 ======================= 1.5-dev =====================
 
+CONNECTORS-841: Always reset document schedules on job start.
+(David Morana, Karl Wright)
+
 CONNECTORS-839: Fix our CloudSolrServer usage to use multipart
 post instead of putting everything in the URL.
 (Alessandro Benedetti, Raymond Wiker, Karl Wright)

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1551389&r1=1551388&r2=1551389&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java Mon Dec 16 22:42:40 2013
@@ -5845,7 +5845,10 @@ public class JobManager implements IJobM
     // (3) If the connector has some other model, we look at the start time.  A start
     // time of 0 implies a full scan, while any other start time implies an incremental
     // scan.
-
+    
+    // Always reset document schedules for those documents already pending!
+    jobQueue.resetPendingDocumentSchedules(jobID);
+    
     // Complete connector model is told everything, so no delete phase.
     if (connectorModel == IRepositoryConnector.MODEL_ADD_CHANGE_DELETE)
     {

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java?rev=1551389&r1=1551388&r2=1551389&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java Mon Dec 16 22:42:40 2013
@@ -717,6 +717,28 @@ public class JobQueue extends org.apache
     TrackerClass.noteJobChange(jobID,"Prepare full scan");
   }
 
+  /** Reset schedule for all PENDINGPURGATORY entries.
+  *@param jobID is the job identifier.
+  */
+  public void resetPendingDocumentSchedules(Long jobID)
+    throws ManifoldCFException
+  {
+    HashMap map = new HashMap();
+    // Do not reset priorities here!  They should all be blank at this point.
+    map.put(checkTimeField,new Long(0L));
+    map.put(checkActionField,actionToString(ACTION_RESCAN));
+    map.put(failTimeField,null);
+    map.put(failCountField,null);
+    ArrayList list = new ArrayList();
+    String query = buildConjunctionClause(list,new ClauseDescription[]{
+      new UnitaryClause(jobIDField,jobID),
+      new MultiClause(statusField,new Object[]{
+        statusToString(STATUS_PENDINGPURGATORY),
+        statusToString(STATUS_PENDING)})});
+    performUpdate(map,"WHERE "+query,list,null);
+    noteModifications(0,1,0);
+  }
+  
   /** For ADD_CHANGE_DELETE jobs where the specifications have been changed,
   * we must reconsider every existing document.  So reconsider them all.
   *@param jobID is the job identifier.