You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/11/03 03:56:26 UTC
svn commit: r1636233 - in /manifoldcf/branches/dev_1x: ./ framework/
framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/
framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/
framework/pull-agent/src/main/java/...
Author: kwright
Date: Mon Nov 3 02:56:25 2014
New Revision: 1636233
URL: http://svn.apache.org/r1636233
Log:
Pull up fix for CONNECTORS-1091 from trunk.
Modified:
manifoldcf/branches/dev_1x/ (props changed)
manifoldcf/branches/dev_1x/framework/ (props changed)
manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java
manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/reprioritizationtracker/ReprioritizationTracker.java
manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
Propchange: manifoldcf/branches/dev_1x/
------------------------------------------------------------------------------
Merged /manifoldcf/trunk:r1636232
Propchange: manifoldcf/branches/dev_1x/framework/
------------------------------------------------------------------------------
Merged /manifoldcf/trunk/framework:r1636232
Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java?rev=1636233&r1=1636232&r2=1636233&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java (original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java Mon Nov 3 02:56:25 2014
@@ -215,16 +215,6 @@ public interface IJobManager
// These methods support the "set doc priority" thread
- /** Get a list of already-processed documents to reprioritize. Documents in all jobs will be
- * returned by this method. Up to n document descriptions will be returned.
- *@param currentTime is the current time stamp for this prioritization pass. Avoid
- * picking up any documents that are labeled with this timestamp or after.
- *@param n is the maximum number of document descriptions desired.
- *@return the document descriptions.
- */
- public DocumentDescription[] getNextAlreadyProcessedReprioritizationDocuments(long currentTime, int n)
- throws ManifoldCFException;
-
/** Get a list of not-yet-processed documents to reprioritize. Documents in all jobs will be
* returned by this method. Up to n document descriptions will be returned.
*@param currentTime is the current time stamp for this prioritization pass. Avoid
Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1636233&r1=1636232&r2=1636233&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java (original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java Mon Nov 3 02:56:25 2014
@@ -2039,57 +2039,6 @@ public class JobManager implements IJobM
// These methods support the reprioritization thread.
- /** Get a list of already-processed documents to reprioritize. Documents in all jobs will be
- * returned by this method. Up to n document descriptions will be returned.
- *@param currentTime is the current time stamp for this prioritization pass. Avoid
- * picking up any documents that are labeled with this timestamp or after.
- *@param n is the maximum number of document descriptions desired.
- *@return the document descriptions.
- */
- @Override
- public DocumentDescription[] getNextAlreadyProcessedReprioritizationDocuments(long currentTime, int n)
- throws ManifoldCFException
- {
- StringBuilder sb = new StringBuilder();
- ArrayList list = new ArrayList();
-
- // The desired query is:
- // SELECT docid FROM jobqueue WHERE prioritysettime < (currentTime) LIMIT (n)
-
- sb.append("SELECT ")
- .append(jobQueue.idField).append(",")
- .append(jobQueue.docHashField).append(",")
- .append(jobQueue.docIDField).append(",")
- .append(jobQueue.jobIDField)
- .append(" FROM ").append(jobQueue.getTableName()).append(" WHERE ");
-
- sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{
- new MultiClause(jobQueue.statusField,new Object[]{
- jobQueue.statusToString(JobQueue.STATUS_COMPLETE),
- jobQueue.statusToString(JobQueue.STATUS_UNCHANGED),
- jobQueue.statusToString(JobQueue.STATUS_PURGATORY)}),
- new UnitaryClause(jobQueue.prioritySetField,"<",new Long(currentTime))})).append(" ");
-
- sb.append(database.constructOffsetLimitClause(0,n));
-
- IResultSet set = database.performQuery(sb.toString(),list,null,null,n,null);
-
- DocumentDescription[] rval = new DocumentDescription[set.getRowCount()];
-
- int i = 0;
- while (i < set.getRowCount())
- {
- IResultRow row = set.getRow(i);
- rval[i] =new DocumentDescription((Long)row.getValue(jobQueue.idField),
- (Long)row.getValue(jobQueue.jobIDField),
- (String)row.getValue(jobQueue.docHashField),
- (String)row.getValue(jobQueue.docIDField));
- i++;
- }
-
- return rval;
- }
-
/** Get a list of not-yet-processed documents to reprioritize. Documents in all jobs will be
* returned by this method. Up to n document descriptions will be returned.
*@param currentTime is the current time stamp for this prioritization pass. Avoid
@@ -2116,9 +2065,10 @@ public class JobManager implements IJobM
JobQueue.statusToString(jobQueue.STATUS_HOPCOUNTREMOVED),
JobQueue.statusToString(jobQueue.STATUS_PENDING),
JobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)}),
- new UnitaryClause(jobQueue.prioritySetField,"<",new Long(currentTime))})).append(" AND ")
- .append(jobQueue.checkActionField).append("=?").append(" AND ");
-
+ new UnitaryClause(jobQueue.prioritySetField,"<",new Long(currentTime))}));
+
+ sb.append(" AND ")
+ .append(jobQueue.checkActionField).append("=?");
list.add(jobQueue.actionToString(JobQueue.ACTION_RESCAN));
// Per CONNECTORS-290, we need to be leaving priorities blank for jobs that aren't using them,
@@ -2130,7 +2080,7 @@ public class JobManager implements IJobM
// expected to be short, because typically this state is the result of an installation procedure
// rather than willful action on the part of a user.
- sb.append("EXISTS(SELECT 'x' FROM ").append(jobs.getTableName()).append(" t1 WHERE ")
+ sb.append(" AND EXISTS(SELECT 'x' FROM ").append(jobs.getTableName()).append(" t1 WHERE ")
.append(database.buildConjunctionClause(list,new ClauseDescription[]{
new MultiClause("t1."+jobs.statusField,new Object[]{
Jobs.statusToString(Jobs.STATUS_STARTINGUP),
@@ -2148,7 +2098,8 @@ public class JobManager implements IJobM
// Analyze jobqueue tables unconditionally, since it's become much more sensitive in 8.3 than it used to be.
//jobQueue.unconditionallyAnalyzeTables();
- IResultSet set = database.performQuery(sb.toString(),list,null,null,n,null);
+ //IResultSet set = database.performQuery(sb.toString(),list,null,null,n,null);
+ IResultSet set = database.performQuery(sb.toString(),list,null,null);
DocumentDescription[] rval = new DocumentDescription[set.getRowCount()];
@@ -2745,7 +2696,7 @@ public class JobManager implements IJobM
.append(" t0 ").append(jobQueue.getGetNextDocumentsIndexHint()).append(" WHERE ");
sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{
- //new UnitaryClause(jobQueue.docPriorityField,">=",new Long(0L)),
+ new UnitaryClause("t0."+jobQueue.docPriorityField,"<",JobQueue.nullDocPriority), // Note: This is technically correct, but I need to confirm that it works OK for MySQL and HSQLDB
new MultiClause(jobQueue.statusField,
new Object[]{jobQueue.statusToString(JobQueue.STATUS_PENDING),
jobQueue.statusToString(JobQueue.STATUS_PENDINGPURGATORY)}),
Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java?rev=1636233&r1=1636232&r2=1636233&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java (original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java Mon Nov 3 02:56:25 2014
@@ -752,6 +752,7 @@ public class JobQueue extends org.apache
// Map COMPLETE to PENDINGPURGATORY
HashMap map = new HashMap();
map.put(statusField,statusToString(STATUS_PENDINGPURGATORY));
+ map.put(prioritySetField,new Long(0L));
// Do not reset priorities here! They should all be blank at this point.
map.put(checkTimeField,new Long(0L));
map.put(checkActionField,actionToString(ACTION_RESCAN));
@@ -809,6 +810,7 @@ public class JobQueue extends org.apache
// Map COMPLETE to PENDINGPURGATORY.
HashMap map = new HashMap();
map.put(statusField,statusToString(STATUS_PENDINGPURGATORY));
+ map.put(prioritySetField,new Long(0L));
// Do not reset priorities here! They should all be blank at this point.
map.put(checkTimeField,new Long(0L));
map.put(checkActionField,actionToString(ACTION_RESCAN));
Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/reprioritizationtracker/ReprioritizationTracker.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/reprioritizationtracker/ReprioritizationTracker.java?rev=1636233&r1=1636232&r2=1636233&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/reprioritizationtracker/ReprioritizationTracker.java (original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/reprioritizationtracker/ReprioritizationTracker.java Mon Nov 3 02:56:25 2014
@@ -338,16 +338,18 @@ public class ReprioritizationTracker imp
{
byte[] timeData = lockManager.readData(trackerTimestampResource);
if (timeData == null || timeData.length != 8)
+ {
return null;
+ }
- long rval = ((long)timeData[0]) & 0xffL +
- (((long)timeData[1]) << 8) & 0xff00L +
- (((long)timeData[2]) << 16) & 0xff0000L +
- (((long)timeData[3]) << 24) & 0xff000000L +
- (((long)timeData[4]) << 32) & 0xff00000000L +
- (((long)timeData[5]) << 40) & 0xff0000000000L +
- (((long)timeData[6]) << 48) & 0xff000000000000L +
- (((long)timeData[7]) << 56) & 0xff00000000000000L;
+ long rval = (((long)timeData[0]) & 0xffL) +
+ ((((long)timeData[1]) << 8) & 0xff00L) +
+ ((((long)timeData[2]) << 16) & 0xff0000L) +
+ ((((long)timeData[3]) << 24) & 0xff000000L) +
+ ((((long)timeData[4]) << 32) & 0xff00000000L) +
+ ((((long)timeData[5]) << 40) & 0xff0000000000L) +
+ ((((long)timeData[6]) << 48) & 0xff000000000000L) +
+ ((((long)timeData[7]) << 56) & 0xff00000000000000L);
return new Long(rval);
}
@@ -372,6 +374,7 @@ public class ReprioritizationTracker imp
timeData[5] = (byte)((time >> 40) & 0xffL);
timeData[6] = (byte)((time >> 48) & 0xffL);
timeData[7] = (byte)((time >> 56) & 0xffL);
+
lockManager.writeData(trackerTimestampResource, timeData);
}
}
@@ -443,14 +446,14 @@ public class ReprioritizationTracker imp
byte[] data = lockManager.readData(trackerMinimumDepthResource);
if (data == null || data.length != 8)
return 0.0;
- long dataLong = ((long)data[0]) & 0xffL +
- (((long)data[1]) << 8) & 0xff00L +
- (((long)data[2]) << 16) & 0xff0000L +
- (((long)data[3]) << 24) & 0xff000000L +
- (((long)data[4]) << 32) & 0xff00000000L +
- (((long)data[5]) << 40) & 0xff0000000000L +
- (((long)data[6]) << 48) & 0xff000000000000L +
- (((long)data[7]) << 56) & 0xff00000000000000L;
+ long dataLong = (((long)data[0]) & 0xffL) +
+ ((((long)data[1]) << 8) & 0xff00L) +
+ ((((long)data[2]) << 16) & 0xff0000L) +
+ ((((long)data[3]) << 24) & 0xff000000L) +
+ ((((long)data[4]) << 32) & 0xff00000000L) +
+ ((((long)data[5]) << 40) & 0xff0000000000L) +
+ ((((long)data[6]) << 48) & 0xff000000000000L) +
+ ((((long)data[7]) << 56) & 0xff00000000000000L);
return Double.longBitsToDouble(dataLong);
}
Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java?rev=1636233&r1=1636232&r2=1636233&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java (original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java Mon Nov 3 02:56:25 2014
@@ -138,6 +138,8 @@ public class StartupThread extends Threa
jobManager.prepareJobScan(jobID,legalLinkTypes,hopcountMethod,
model,jobType == IJobDescription.TYPE_CONTINUOUS,lastSeedingVersion == null,
requestMinimum);
+ ManifoldCF.resetAllDocumentPriorities(threadContext,currentTime,processID);
+
if (Logging.threads.isDebugEnabled())
Logging.threads.debug("Prepared job "+jobID.toString()+" for execution.");