You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/11/03 03:56:26 UTC

svn commit: r1636233 - in /manifoldcf/branches/dev_1x: ./ framework/ framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/ framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/ framework/pull-agent/src/main/java/...

Author: kwright
Date: Mon Nov  3 02:56:25 2014
New Revision: 1636233

URL: http://svn.apache.org/r1636233
Log:
Pull up fix for CONNECTORS-1091 from trunk.

Modified:
    manifoldcf/branches/dev_1x/   (props changed)
    manifoldcf/branches/dev_1x/framework/   (props changed)
    manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java
    manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
    manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
    manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/reprioritizationtracker/ReprioritizationTracker.java
    manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java

Propchange: manifoldcf/branches/dev_1x/
------------------------------------------------------------------------------
  Merged /manifoldcf/trunk:r1636232

Propchange: manifoldcf/branches/dev_1x/framework/
------------------------------------------------------------------------------
  Merged /manifoldcf/trunk/framework:r1636232

Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java?rev=1636233&r1=1636232&r2=1636233&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java (original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java Mon Nov  3 02:56:25 2014
@@ -215,16 +215,6 @@ public interface IJobManager
 
   // These methods support the "set doc priority" thread
 
-  /** Get a list of already-processed documents to reprioritize.  Documents in all jobs will be
-  * returned by this method.  Up to n document descriptions will be returned.
-  *@param currentTime is the current time stamp for this prioritization pass.  Avoid
-  *  picking up any documents that are labeled with this timestamp or after.
-  *@param n is the maximum number of document descriptions desired.
-  *@return the document descriptions.
-  */
-  public DocumentDescription[] getNextAlreadyProcessedReprioritizationDocuments(long currentTime, int n)
-    throws ManifoldCFException;
-
   /** Get a list of not-yet-processed documents to reprioritize.  Documents in all jobs will be
   * returned by this method.  Up to n document descriptions will be returned.
   *@param currentTime is the current time stamp for this prioritization pass.  Avoid

Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1636233&r1=1636232&r2=1636233&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java (original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java Mon Nov  3 02:56:25 2014
@@ -2039,57 +2039,6 @@ public class JobManager implements IJobM
 
   // These methods support the reprioritization thread.
 
-  /** Get a list of already-processed documents to reprioritize.  Documents in all jobs will be
-  * returned by this method.  Up to n document descriptions will be returned.
-  *@param currentTime is the current time stamp for this prioritization pass.  Avoid
-  *  picking up any documents that are labeled with this timestamp or after.
-  *@param n is the maximum number of document descriptions desired.
-  *@return the document descriptions.
-  */
-  @Override
-  public DocumentDescription[] getNextAlreadyProcessedReprioritizationDocuments(long currentTime, int n)
-    throws ManifoldCFException
-  {
-    StringBuilder sb = new StringBuilder();
-    ArrayList list = new ArrayList();
-
-    // The desired query is:
-    // SELECT docid FROM jobqueue WHERE prioritysettime < (currentTime) LIMIT (n)
-
-    sb.append("SELECT ")
-      .append(jobQueue.idField).append(",")
-      .append(jobQueue.docHashField).append(",")
-      .append(jobQueue.docIDField).append(",")
-      .append(jobQueue.jobIDField)
-      .append(" FROM ").append(jobQueue.getTableName()).append(" WHERE ");
-    
-    sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{
-      new MultiClause(jobQueue.statusField,new Object[]{
-        jobQueue.statusToString(JobQueue.STATUS_COMPLETE),
-        jobQueue.statusToString(JobQueue.STATUS_UNCHANGED),
-        jobQueue.statusToString(JobQueue.STATUS_PURGATORY)}),
-      new UnitaryClause(jobQueue.prioritySetField,"<",new Long(currentTime))})).append(" ");
-      
-    sb.append(database.constructOffsetLimitClause(0,n));
-
-    IResultSet set = database.performQuery(sb.toString(),list,null,null,n,null);
-
-    DocumentDescription[] rval = new DocumentDescription[set.getRowCount()];
-
-    int i = 0;
-    while (i < set.getRowCount())
-    {
-      IResultRow row = set.getRow(i);
-      rval[i] =new DocumentDescription((Long)row.getValue(jobQueue.idField),
-        (Long)row.getValue(jobQueue.jobIDField),
-        (String)row.getValue(jobQueue.docHashField),
-        (String)row.getValue(jobQueue.docIDField));
-      i++;
-    }
-
-    return rval;
-  }
-
   /** Get a list of not-yet-processed documents to reprioritize.  Documents in all jobs will be
   * returned by this method.  Up to n document descriptions will be returned.
   *@param currentTime is the current time stamp for this prioritization pass.  Avoid
@@ -2116,9 +2065,10 @@ public class JobManager implements IJobM
           JobQueue.statusToString(jobQueue.STATUS_HOPCOUNTREMOVED),
           JobQueue.statusToString(jobQueue.STATUS_PENDING),
           JobQueue.statusToString(jobQueue.STATUS_PENDINGPURGATORY)}),
-        new UnitaryClause(jobQueue.prioritySetField,"<",new Long(currentTime))})).append(" AND ")
-      .append(jobQueue.checkActionField).append("=?").append(" AND ");
-
+        new UnitaryClause(jobQueue.prioritySetField,"<",new Long(currentTime))}));
+    
+    sb.append(" AND ")
+      .append(jobQueue.checkActionField).append("=?");
     list.add(jobQueue.actionToString(JobQueue.ACTION_RESCAN));
 
     // Per CONNECTORS-290, we need to be leaving priorities blank for jobs that aren't using them,
@@ -2130,7 +2080,7 @@ public class JobManager implements IJobM
     // expected to be short, because typically this state is the result of an installation procedure
     // rather than willful action on the part of a user.
         
-    sb.append("EXISTS(SELECT 'x' FROM ").append(jobs.getTableName()).append(" t1 WHERE ")
+    sb.append(" AND EXISTS(SELECT 'x' FROM ").append(jobs.getTableName()).append(" t1 WHERE ")
       .append(database.buildConjunctionClause(list,new ClauseDescription[]{
         new MultiClause("t1."+jobs.statusField,new Object[]{
           Jobs.statusToString(Jobs.STATUS_STARTINGUP),
@@ -2148,7 +2098,8 @@ public class JobManager implements IJobM
     // Analyze jobqueue tables unconditionally, since it's become much more sensitive in 8.3 than it used to be.
     //jobQueue.unconditionallyAnalyzeTables();
 
-    IResultSet set = database.performQuery(sb.toString(),list,null,null,n,null);
+    //IResultSet set = database.performQuery(sb.toString(),list,null,null,n,null);
+    IResultSet set = database.performQuery(sb.toString(),list,null,null);
 
     DocumentDescription[] rval = new DocumentDescription[set.getRowCount()];
 
@@ -2745,7 +2696,7 @@ public class JobManager implements IJobM
       .append(" t0 ").append(jobQueue.getGetNextDocumentsIndexHint()).append(" WHERE ");
       
     sb.append(database.buildConjunctionClause(list,new ClauseDescription[]{
-      //new UnitaryClause(jobQueue.docPriorityField,">=",new Long(0L)),
+      new UnitaryClause("t0."+jobQueue.docPriorityField,"<",JobQueue.nullDocPriority),  // Note: This is technically correct, but I need to confirm that it works OK for MySQL and HSQLDB
       new MultiClause(jobQueue.statusField,
         new Object[]{jobQueue.statusToString(JobQueue.STATUS_PENDING),
           jobQueue.statusToString(JobQueue.STATUS_PENDINGPURGATORY)}),

Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java?rev=1636233&r1=1636232&r2=1636233&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java (original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java Mon Nov  3 02:56:25 2014
@@ -752,6 +752,7 @@ public class JobQueue extends org.apache
     // Map COMPLETE to PENDINGPURGATORY
     HashMap map = new HashMap();
     map.put(statusField,statusToString(STATUS_PENDINGPURGATORY));
+    map.put(prioritySetField,new Long(0L));
     // Do not reset priorities here!  They should all be blank at this point.
     map.put(checkTimeField,new Long(0L));
     map.put(checkActionField,actionToString(ACTION_RESCAN));
@@ -809,6 +810,7 @@ public class JobQueue extends org.apache
     // Map COMPLETE to PENDINGPURGATORY.
     HashMap map = new HashMap();
     map.put(statusField,statusToString(STATUS_PENDINGPURGATORY));
+    map.put(prioritySetField,new Long(0L));
     // Do not reset priorities here!  They should all be blank at this point.
     map.put(checkTimeField,new Long(0L));
     map.put(checkActionField,actionToString(ACTION_RESCAN));

Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/reprioritizationtracker/ReprioritizationTracker.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/reprioritizationtracker/ReprioritizationTracker.java?rev=1636233&r1=1636232&r2=1636233&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/reprioritizationtracker/ReprioritizationTracker.java (original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/reprioritizationtracker/ReprioritizationTracker.java Mon Nov  3 02:56:25 2014
@@ -338,16 +338,18 @@ public class ReprioritizationTracker imp
   {
     byte[] timeData = lockManager.readData(trackerTimestampResource);
     if (timeData == null || timeData.length != 8)
+    {
       return null;
+    }
     
-    long rval = ((long)timeData[0]) & 0xffL +
-      (((long)timeData[1]) << 8) & 0xff00L +
-      (((long)timeData[2]) << 16) & 0xff0000L +
-      (((long)timeData[3]) << 24) & 0xff000000L +
-      (((long)timeData[4]) << 32) & 0xff00000000L +
-      (((long)timeData[5]) << 40) & 0xff0000000000L +
-      (((long)timeData[6]) << 48) & 0xff000000000000L +
-      (((long)timeData[7]) << 56) & 0xff00000000000000L;
+    long rval = (((long)timeData[0]) & 0xffL) +
+      ((((long)timeData[1]) << 8) & 0xff00L) +
+      ((((long)timeData[2]) << 16) & 0xff0000L) +
+      ((((long)timeData[3]) << 24) & 0xff000000L) +
+      ((((long)timeData[4]) << 32) & 0xff00000000L) +
+      ((((long)timeData[5]) << 40) & 0xff0000000000L) +
+      ((((long)timeData[6]) << 48) & 0xff000000000000L) +
+      ((((long)timeData[7]) << 56) & 0xff00000000000000L);
     
     return new Long(rval);
   }
@@ -372,6 +374,7 @@ public class ReprioritizationTracker imp
       timeData[5] = (byte)((time >> 40) & 0xffL);
       timeData[6] = (byte)((time >> 48) & 0xffL);
       timeData[7] = (byte)((time >> 56) & 0xffL);
+      
       lockManager.writeData(trackerTimestampResource, timeData);
     }
   }
@@ -443,14 +446,14 @@ public class ReprioritizationTracker imp
     byte[] data = lockManager.readData(trackerMinimumDepthResource);
     if (data == null || data.length != 8)
       return 0.0;
-    long dataLong = ((long)data[0]) & 0xffL +
-      (((long)data[1]) << 8) & 0xff00L +
-      (((long)data[2]) << 16) & 0xff0000L +
-      (((long)data[3]) << 24) & 0xff000000L +
-      (((long)data[4]) << 32) & 0xff00000000L +
-      (((long)data[5]) << 40) & 0xff0000000000L +
-      (((long)data[6]) << 48) & 0xff000000000000L +
-      (((long)data[7]) << 56) & 0xff00000000000000L;
+    long dataLong = (((long)data[0]) & 0xffL) +
+      ((((long)data[1]) << 8) & 0xff00L) +
+      ((((long)data[2]) << 16) & 0xff0000L) +
+      ((((long)data[3]) << 24) & 0xff000000L) +
+      ((((long)data[4]) << 32) & 0xff00000000L) +
+      ((((long)data[5]) << 40) & 0xff0000000000L) +
+      ((((long)data[6]) << 48) & 0xff000000000000L) +
+      ((((long)data[7]) << 56) & 0xff00000000000000L);
 
     return Double.longBitsToDouble(dataLong);
   }

Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java?rev=1636233&r1=1636232&r2=1636233&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java (original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java Mon Nov  3 02:56:25 2014
@@ -138,6 +138,8 @@ public class StartupThread extends Threa
                   jobManager.prepareJobScan(jobID,legalLinkTypes,hopcountMethod,
                     model,jobType == IJobDescription.TYPE_CONTINUOUS,lastSeedingVersion == null,
                     requestMinimum);
+                  ManifoldCF.resetAllDocumentPriorities(threadContext,currentTime,processID);
+                  
                   if (Logging.threads.isDebugEnabled())
                     Logging.threads.debug("Prepared job "+jobID.toString()+" for execution.");