You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/11/10 21:17:12 UTC
svn commit: r1637969 - in
/manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system:
CrawlerAgent.java JobResetThread.java ManifoldCF.java StartupThread.java
Author: kwright
Date: Mon Nov 10 20:17:12 2014
New Revision: 1637969
URL: http://svn.apache.org/r1637969
Log:
Use reprioritization tracker to make sure reset happens as a consistent whole.
Modified:
manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/CrawlerAgent.java
manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java
manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java
manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
Modified: manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/CrawlerAgent.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/CrawlerAgent.java?rev=1637969&r1=1637968&r2=1637969&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/CrawlerAgent.java (original)
+++ manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/CrawlerAgent.java Mon Nov 10 20:17:12 2014
@@ -153,7 +153,9 @@ public class CrawlerAgent implements IAg
// to take care of are dangling ones that won't get done because the process that was doing them went
// away. BUT: somebody may have blown away lock info, in which case we won't know anything at all.
// So we do everything in that case.
- ManifoldCF.resetAllDocumentPriorities(threadContext);
+
+ ManifoldCF.resetAllDocumentPriorities(threadContext,currentProcessID);
+
}
/** Cleanup after agents process.
@@ -170,14 +172,23 @@ public class CrawlerAgent implements IAg
{
IJobManager jobManager = JobManagerFactory.make(threadContext);
jobManager.cleanupProcessData(cleanupProcessID);
-
- /*
+
+ // If one agents process was starting a reprioritization, it could have started the reprioritization sequence, but
+ // failed to complete it. If so, we may need to reset/complete the reprioritization sequence, which is defined as:
+ // - Resetting prioritization parameters
+ // - Removing all existing document priorities
+ // These must go together in order for the reset to be correct.
+
IReprioritizationTracker rt = ReprioritizationTrackerFactory.make(threadContext);
String reproID = rt.isSpecifiedProcessReprioritizing(cleanupProcessID);
if (reproID != null)
{
// We have to take over the prioritization for the process, which apparently died
// in the middle.
+
+ jobManager.clearAllDocumentPriorities();
+
+ /*
IRepositoryConnectionManager connectionManager = RepositoryConnectionManagerFactory.make(threadContext);
// Reprioritize all documents in the jobqueue, 1000 at a time
@@ -212,10 +223,10 @@ public class CrawlerAgent implements IAg
Logging.threads.debug("Reprioritized "+Integer.toString(docs.length)+" not-yet-processed documents in "+new Long(System.currentTimeMillis()-startTime)+" ms");
}
+ */
rt.doneReprioritization(reproID);
}
- */
}
/** Start the agent. This method should spin up the agent threads, and
Modified: manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java?rev=1637969&r1=1637968&r2=1637969&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java (original)
+++ manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobResetThread.java Mon Nov 10 20:17:12 2014
@@ -106,7 +106,7 @@ public class JobResetThread extends Thre
{
Logging.threads.debug("Job reset thread reprioritizing documents...");
- ManifoldCF.resetAllDocumentPriorities(threadContext);
+ ManifoldCF.resetAllDocumentPriorities(threadContext,processID);
Logging.threads.debug("Job reset thread done reprioritizing documents.");
Modified: manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java?rev=1637969&r1=1637968&r2=1637969&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java (original)
+++ manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java Mon Nov 10 20:17:12 2014
@@ -993,13 +993,25 @@ public class ManifoldCF extends org.apac
/** Reset all (active) document priorities. This operation may occur due to various externally-triggered
* events, such a job abort, pause, resume, wait, or unwait.
*/
- public static void resetAllDocumentPriorities(IThreadContext threadContext)
+ public static void resetAllDocumentPriorities(IThreadContext threadContext, String processID)
throws ManifoldCFException
{
+ // The reprioritization cycle is as follows now:
+ // (1) We reset the reprioritization tracker, which causes all bins to be be reset, and locks reprioritization so that it is blocked;
+ // (2) We clear all document priorities;
+ // (3) We unlock reprioritization, so that it may proceed.
IJobManager jobManager = JobManagerFactory.make(threadContext);
+ IReprioritizationTracker rt = ReprioritizationTrackerFactory.make(threadContext);
+
+ String reproID = IDFactory.make(threadContext);
+
+ rt.startReprioritization(System.currentTimeMillis(),processID,reproID);
+
jobManager.clearAllDocumentPriorities();
- /*
+ rt.doneReprioritization(reproID);
+
+ /*
ILockManager lockManager = LockManagerFactory.make(threadContext);
IRepositoryConnectionManager connectionManager = RepositoryConnectionManagerFactory.make(threadContext);
IReprioritizationTracker rt = ReprioritizationTrackerFactory.make(threadContext);
Modified: manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java?rev=1637969&r1=1637968&r2=1637969&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java (original)
+++ manifoldcf/branches/CONNECTORS-1100/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java Mon Nov 10 20:17:12 2014
@@ -138,8 +138,9 @@ public class StartupThread extends Threa
jobManager.prepareJobScan(jobID,legalLinkTypes,hopcountMethod,
model,jobType == IJobDescription.TYPE_CONTINUOUS,lastSeedingVersion == null,
requestMinimum);
- ManifoldCF.resetAllDocumentPriorities(threadContext);
+ ManifoldCF.resetAllDocumentPriorities(threadContext,processID);
+
if (Logging.threads.isDebugEnabled())
Logging.threads.debug("Prepared job "+jobID.toString()+" for execution.");