You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/08/07 21:26:36 UTC

svn commit: r1370460 - in /manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs: HopCount.java JobManager.java

Author: kwright
Date: Tue Aug  7 19:26:36 2012
New Revision: 1370460

URL: http://svn.apache.org/viewvc?rev=1370460&view=rev
Log:
Add logic to put documents in 'rescan' state if hopcount changes detected during queuing

Modified:
    manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java
    manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java

Modified: manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java?rev=1370460&r1=1370459&r2=1370460&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java (original)
+++ manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java Tue Aug  7 19:26:36 2012
@@ -364,20 +364,20 @@ public class HopCount extends org.apache
 
   /** Record a reference from source to target.  This reference will be marked as "new" or "existing".
   */
-  public void recordReference(Long jobID, String[] legalLinkTypes, String sourceDocumentIDHash, String targetDocumentIDHash, String linkType,
+  public boolean recordReference(Long jobID, String[] legalLinkTypes, String sourceDocumentIDHash, String targetDocumentIDHash, String linkType,
     int hopcountMethod)
     throws ManifoldCFException
   {
-    doRecord(jobID,legalLinkTypes,sourceDocumentIDHash,new String[]{targetDocumentIDHash},linkType,hopcountMethod);
+    return doRecord(jobID,legalLinkTypes,sourceDocumentIDHash,new String[]{targetDocumentIDHash},linkType,hopcountMethod)[0];
   }
 
   /** Record a set of references from source to target.  This reference will be marked as "new" or "existing".
   */
-  public void recordReferences(Long jobID, String[] legalLinkTypes, String sourceDocumentIDHash, String[] targetDocumentIDHashes, String linkType,
+  public boolean[] recordReferences(Long jobID, String[] legalLinkTypes, String sourceDocumentIDHash, String[] targetDocumentIDHashes, String linkType,
     int hopcountMethod)
     throws ManifoldCFException
   {
-    doRecord(jobID,legalLinkTypes,sourceDocumentIDHash,targetDocumentIDHashes,linkType,hopcountMethod);
+    return doRecord(jobID,legalLinkTypes,sourceDocumentIDHash,targetDocumentIDHashes,linkType,hopcountMethod);
   }
 
   /** Complete a recalculation pass for a set of source documents.  All child links that are not marked as "new"
@@ -390,13 +390,19 @@ public class HopCount extends org.apache
   }
 
   /** Do the work of recording source-target references. */
-  protected void doRecord(Long jobID, String[] legalLinkTypes, String sourceDocumentIDHash, String[] targetDocumentIDHashes, String linkType,
+  protected boolean[] doRecord(Long jobID, String[] legalLinkTypes, String sourceDocumentIDHash, String[] targetDocumentIDHashes, String linkType,
     int hopcountMethod)
     throws ManifoldCFException
   {
 
     // We have to both add the reference, AND invalidate appropriate cached hopcounts (if it is a NEW
     // link.)
+    boolean[] rval = new boolean[targetDocumentIDHashes.length];
+    for (int i = 0; i < rval.length; i++)
+    {
+      rval[i] = false;
+    }
+    
     beginTransaction();
     try
     {
@@ -404,6 +410,18 @@ public class HopCount extends org.apache
       if (newReferences.length > 0)
       {
         // There are added links.
+        
+        // First, note them in return value
+        Set<String> newSet = new HashSet<String>();
+        for (int i = 0; i < newReferences.length; i++)
+        {
+          newSet.add(newReferences[i]);
+        }
+        for (int i = 0; i < rval.length; i++)
+        {
+          if (newSet.contains(targetDocumentIDHashes[i]))
+            rval[i] = true;
+        }
 
         // The add causes hopcount records to be queued for processing (and created if they don't exist).
         // ALL the hopcount records for the target document ids must be queued, for all the link types
@@ -495,6 +513,7 @@ public class HopCount extends org.apache
         if (Logging.hopcount.isDebugEnabled())
           Logging.hopcount.debug("Done queueing "+Integer.toString(targetDocumentIDHashes.length)+" documents");
       }
+      return rval;
     }
     catch (ManifoldCFException e)
     {

Modified: manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1370460&r1=1370459&r2=1370460&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java (original)
+++ manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java Tue Aug  7 19:26:36 2012
@@ -3891,6 +3891,10 @@ public class JobManager implements IJobM
         // Update all the carrydown data at once, for greatest efficiency.
         boolean[] carrydownChangesSeen = carryDown.recordCarrydownDataMultiple(jobID,parentIdentifierHash,reorderedDocIDHashes,dataNames,dataHashValues,dataValues);
 
+        boolean[] hopcountChangesSeen = null;
+        if (parentIdentifierHash != null && relationshipType != null)
+          hopcountChangesSeen = hopCount.recordReferences(jobID,legalLinkTypes,parentIdentifierHash,reorderedDocIDHashes,relationshipType,hopcountMethod);
+
         // Loop through the document id's again, and perform updates where needed
         boolean[] reorderedRval = new boolean[reorderedDocIDHashes.length];
 
@@ -3905,12 +3909,11 @@ public class JobManager implements IJobM
           else
             // It was an existing row; do the update logic
             reorderedRval[z] = jobQueue.updateExistingRecord(jr.getRecordID(),jr.getStatus(),jr.getCheckTimeValue(),
-            0L,currentTime,carrydownChangesSeen[z],reorderedDocumentPriorities[z],reorderedDocumentPrerequisites[z]);
+              0L,currentTime,carrydownChangesSeen[z] || (hopcountChangesSeen!=null && hopcountChangesSeen[z]),
+              reorderedDocumentPriorities[z],reorderedDocumentPrerequisites[z]);
           z++;
         }
 
-        if (parentIdentifierHash != null && relationshipType != null)
-          hopCount.recordReferences(jobID,legalLinkTypes,parentIdentifierHash,reorderedDocIDHashes,relationshipType,hopcountMethod);
 
         database.performCommit();