You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/11/10 21:17:12 UTC

svn commit: r1637969 - in /manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system: CrawlerAgent.java JobResetThread.java ManifoldCF.java StartupThread.java

Author: kwright
Date: Mon Nov 10 20:17:12 2014
New Revision: 1637969

URL: http://svn.apache.org/r1637969
Log:
Use reprioritization tracker to make sure reset happens as a consistent whole.

Modified:
    manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/CrawlerAgent.java
    manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java
    manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java
    manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java

Modified: manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/CrawlerAgent.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/CrawlerAgent.java?rev=1637969&r1=1637968&r2=1637969&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/CrawlerAgent.java (original)
+++ manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/CrawlerAgent.java Mon Nov 10 20:17:12 2014
@@ -153,7 +153,9 @@ public class CrawlerAgent implements IAg
     // to take care of are dangling ones that won't get done because the process that was doing them went
     // away.  BUT: somebody may have blown away lock info, in which case we won't know anything at all.
     // So we do everything in that case.
-    ManifoldCF.resetAllDocumentPriorities(threadContext);
+    
+    ManifoldCF.resetAllDocumentPriorities(threadContext,currentProcessID);
+
   }
   
   /** Cleanup after agents process.
@@ -170,14 +172,23 @@ public class CrawlerAgent implements IAg
   {
     IJobManager jobManager = JobManagerFactory.make(threadContext);
     jobManager.cleanupProcessData(cleanupProcessID);
-	  
-    /*
+    
+    // If one agents process was starting a reprioritization, it could have started the reprioritization sequence, but
+    // failed to complete it.  If so, we may need to reset/complete the reprioritization sequence, which is defined as:
+    // - Resetting prioritization parameters
+    // - Removing all existing document priorities
+    // These must go together in order for the reset to be correct.
+    
     IReprioritizationTracker rt = ReprioritizationTrackerFactory.make(threadContext);
     String reproID = rt.isSpecifiedProcessReprioritizing(cleanupProcessID);
     if (reproID != null)
     {
       // We have to take over the prioritization for the process, which apparently died
       // in the middle.
+      
+      jobManager.clearAllDocumentPriorities();
+      
+      /*
       IRepositoryConnectionManager connectionManager = RepositoryConnectionManagerFactory.make(threadContext);
 
       // Reprioritize all documents in the jobqueue, 1000 at a time
@@ -212,10 +223,10 @@ public class CrawlerAgent implements IAg
 
         Logging.threads.debug("Reprioritized "+Integer.toString(docs.length)+" not-yet-processed documents in "+new Long(System.currentTimeMillis()-startTime)+" ms");
       }
+      */
       
       rt.doneReprioritization(reproID);
     }
-    */
   }
 
   /** Start the agent.  This method should spin up the agent threads, and

Modified: manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java?rev=1637969&r1=1637968&r2=1637969&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java (original)
+++ manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java Mon Nov 10 20:17:12 2014
@@ -106,7 +106,7 @@ public class JobResetThread extends Thre
           {
             Logging.threads.debug("Job reset thread reprioritizing documents...");
 
-            ManifoldCF.resetAllDocumentPriorities(threadContext);
+            ManifoldCF.resetAllDocumentPriorities(threadContext,processID);
             
             Logging.threads.debug("Job reset thread done reprioritizing documents.");
 

Modified: manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java?rev=1637969&r1=1637968&r2=1637969&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java (original)
+++ manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java Mon Nov 10 20:17:12 2014
@@ -993,13 +993,25 @@ public class ManifoldCF extends org.apac
   /** Reset all (active) document priorities.  This operation may occur due to various externally-triggered
   * events, such a job abort, pause, resume, wait, or unwait.
   */
-  public static void resetAllDocumentPriorities(IThreadContext threadContext)
+  public static void resetAllDocumentPriorities(IThreadContext threadContext, String processID)
     throws ManifoldCFException
   {
+    // The reprioritization cycle is as follows now:
+    // (1) We reset the reprioritization tracker, which causes all bins to be be reset, and locks reprioritization so that it is blocked;
+    // (2) We clear all document priorities;
+    // (3) We unlock reprioritization, so that it may proceed.
     IJobManager jobManager = JobManagerFactory.make(threadContext);
+    IReprioritizationTracker rt = ReprioritizationTrackerFactory.make(threadContext);
+
+    String reproID = IDFactory.make(threadContext);
+
+    rt.startReprioritization(System.currentTimeMillis(),processID,reproID);
+
     jobManager.clearAllDocumentPriorities();
 
-    /*
+    rt.doneReprioritization(reproID);
+
+  /*
     ILockManager lockManager = LockManagerFactory.make(threadContext);
     IRepositoryConnectionManager connectionManager = RepositoryConnectionManagerFactory.make(threadContext);
     IReprioritizationTracker rt = ReprioritizationTrackerFactory.make(threadContext);

Modified: manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java?rev=1637969&r1=1637968&r2=1637969&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java (original)
+++ manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java Mon Nov 10 20:17:12 2014
@@ -138,8 +138,9 @@ public class StartupThread extends Threa
                   jobManager.prepareJobScan(jobID,legalLinkTypes,hopcountMethod,
                     model,jobType == IJobDescription.TYPE_CONTINUOUS,lastSeedingVersion == null,
                     requestMinimum);
-                  ManifoldCF.resetAllDocumentPriorities(threadContext);
                   
+                  ManifoldCF.resetAllDocumentPriorities(threadContext,processID);
+
                   if (Logging.threads.isDebugEnabled())
                     Logging.threads.debug("Prepared job "+jobID.toString()+" for execution.");