You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/08/14 03:12:13 UTC

svn commit: r1513705 - in /manifoldcf/trunk: ./ framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/

Author: kwright
Date: Wed Aug 14 01:12:13 2013
New Revision: 1513705

URL: http://svn.apache.org/r1513705
Log:
Fix for CONNECTORS-764.

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
    manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
    manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1513705&r1=1513704&r2=1513705&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Wed Aug 14 01:12:13 2013
@@ -3,6 +3,10 @@ $Id$
 
 ======================= 1.4-dev =====================
 
+CONNECTORS-764: HOPCOUNTREMOVED records need to be reset when
+a job's hopcount limits change.  It also makes sense to reset them
+when the set of documents is changed.
+
 CONNECTORS-750: Skip files when catching FileNotFoundException,
 e.g. access/permission denied files in FileConnector.
 (Shinichiro Abe)

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1513705&r1=1513704&r2=1513705&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java Wed Aug 14 01:12:13 2013
@@ -5456,6 +5456,7 @@ public class JobManager implements IJobM
     boolean requestMinimum)
     throws ManifoldCFException
   {
+
     // (1) If the connector has MODEL_ADD_CHANGE_DELETE, then
     // we let the connector run the show; there's no purge phase, and therefore the
     // documents are left in a COMPLETED state if they don't show up in the list
@@ -5474,7 +5475,7 @@ public class JobManager implements IJobM
     if (connectorModel == IRepositoryConnector.MODEL_ADD_CHANGE_DELETE)
     {
       if (fromBeginningOfTime)
-        jobQueue.queueAllExisting(jobID);
+        queueAllExisting(jobID,legalLinkTypes);
       return;
     }
     
@@ -5483,7 +5484,7 @@ public class JobManager implements IJobM
     if (connectorModel == IRepositoryConnector.MODEL_CHAINED_ADD_CHANGE_DELETE)
     {
       if (fromBeginningOfTime)
-        jobQueue.queueAllExisting(jobID);
+        queueAllExisting(jobID,legalLinkTypes);
       else
         jobQueue.preparePartialScan(jobID);
       return;
@@ -5507,6 +5508,58 @@ public class JobManager implements IJobM
       jobQueue.prepareIncrementalScan(jobID);
   }
 
+  /** Queue all existing.
+  *@param jobID is the job id.
+  *@param legalLinkTypes are the link types allowed for the job.
+  */
+  protected void queueAllExisting(Long jobID, String[] legalLinkTypes)
+    throws ManifoldCFException
+  {
+    while (true)
+    {
+      long sleepAmt = 0L;
+      database.beginTransaction();
+      try
+      {
+        if (legalLinkTypes.length > 0)
+        {
+          jobQueue.reactivateHopcountRemovedRecords(jobID);
+        }
+
+        jobQueue.queueAllExisting(jobID);
+        TrackerClass.notePrecommit();
+        database.performCommit();
+        TrackerClass.noteCommit();
+        break;
+      }
+      catch (ManifoldCFException e)
+      {
+        database.signalRollback();
+        TrackerClass.noteRollback();
+        if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT)
+        {
+          if (Logging.perf.isDebugEnabled())
+            Logging.perf.debug("Aborted transaction during queueAllExisting: "+e.getMessage());
+          sleepAmt = getRandomAmount();
+          continue;
+        }
+        throw e;
+      }
+      catch (Error e)
+      {
+        database.signalRollback();
+        TrackerClass.noteRollback();
+        throw e;
+      }
+      finally
+      {
+        database.endTransaction();
+        sleepFor(sleepAmt);
+      }
+    }
+
+  }
+  
   /** Prepare for a full scan.
   *@param jobID is the job id.
   *@param legalLinkTypes are the link types allowed for the job.

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java?rev=1513705&r1=1513704&r2=1513705&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobQueue.java Wed Aug 14 01:12:13 2013
@@ -562,7 +562,7 @@ public class JobQueue extends org.apache
   public void prepareFullScan(Long jobID)
     throws ManifoldCFException
   {
-    // Delete PENDING entries
+    // Delete PENDING and HOPCOUNTREMOVED entries (they are treated the same)
     ArrayList list = new ArrayList();
     list.add(jobID);
     list.add(statusToString(STATUS_PENDING));
@@ -571,7 +571,9 @@ public class JobQueue extends org.apache
     list.clear();
     String query = buildConjunctionClause(list,new ClauseDescription[]{
       new UnitaryClause(jobIDField,jobID),
-      new UnitaryClause(statusField,statusToString(STATUS_PENDING))});
+      new MultiClause(statusField,new Object[]{
+        statusToString(STATUS_PENDING),
+        statusToString(STATUS_HOPCOUNTREMOVED)})});
     performDelete("WHERE "+query,list,null);
 
     // Turn PENDINGPURGATORY and COMPLETED into PURGATORY.
@@ -626,7 +628,7 @@ public class JobQueue extends org.apache
     noteModifications(0,1,0);
     // Do an analyze, otherwise our plans are going to be crap right off the bat
     unconditionallyAnalyzeTables();
-    }
+  }
     
   /** Prepare for a "partial" job.  This is called ONLY when the job is inactive.
   *

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java?rev=1513705&r1=1513704&r2=1513705&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java Wed Aug 14 01:12:13 2013
@@ -752,6 +752,33 @@ public class Jobs extends org.apache.man
                   isSame = false;
               }
 
+              if (isSame)
+              {
+                // Compare hopcount filter criteria.
+                Map filterRows = hopFilterManager.readRows(id);
+                Map newFilterRows = jobDescription.getHopCountFilters();
+                if (filterRows.size() != newFilterRows.size())
+                  isSame = false;
+                else
+                {
+                  for (String linkType : (Collection<String>)filterRows.keySet())
+                  {
+                    Integer oldCount = (Integer)filterRows.get(linkType);
+                    Integer newCount = (Integer)newFilterRows.get(linkType);
+                    if (oldCount == null || newCount == null)
+                    {
+                      isSame = false;
+                      break;
+                    }
+                    else if (oldCount.intValue() != newCount.intValue())
+                    {
+                      isSame = false;
+                      break;
+                    }
+                  }
+                }
+              }
+              
               if (!isSame)
                 values.put(lastCheckTimeField,null);