You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/08/13 14:48:59 UTC

svn commit: r1372402 - /manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java

Author: kwright
Date: Mon Aug 13 12:48:58 2012
New Revision: 1372402

URL: http://svn.apache.org/viewvc?rev=1372402&view=rev
Log:
Optimization: only requeue when we know there's been a significant change.  Part of CONNECTORS-501.

Modified:
    manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java?rev=1372402&r1=1372401&r2=1372402&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java Mon Aug 13 12:48:58 2012
@@ -411,18 +411,6 @@ public class HopCount extends org.apache
       {
         // There are added links.
         
-        // First, note them in return value
-        Set<String> newSet = new HashSet<String>();
-        for (int i = 0; i < newReferences.length; i++)
-        {
-          newSet.add(newReferences[i]);
-        }
-        for (int i = 0; i < rval.length; i++)
-        {
-          if (newSet.contains(targetDocumentIDHashes[i]) &&
-            (sourceDocumentIDHash==null || !sourceDocumentIDHash.equals(targetDocumentIDHashes[i])))
-            rval[i] = true;
-        }
 
         // The add causes hopcount records to be queued for processing (and created if they don't exist).
         // ALL the hopcount records for the target document ids must be queued, for all the link types
@@ -505,7 +493,19 @@ public class HopCount extends org.apache
         }
 
         // Now add these documents to the processing queue
-        addToProcessingQueue(jobID,legalLinkTypes,newReferences,estimates,sourceDocumentIDHash,linkType,hopcountMethod);
+        boolean[] hasChanged = addToProcessingQueue(jobID,legalLinkTypes,newReferences,estimates,sourceDocumentIDHash,linkType,hopcountMethod);
+
+        // First, note them in return value
+        Map<String,Boolean> changeMap = new HashMap<String,Boolean>();
+        for (int i = 0; i < newReferences.length; i++)
+        {
+          changeMap.put(newReferences[i],new Boolean(hasChanged[i]));
+        }
+        for (int i = 0; i < rval.length; i++)
+        {
+          if (changeMap.get(targetDocumentIDHashes[i]).booleanValue())
+            rval[i] = true;
+        }
 
         if (Logging.hopcount.isDebugEnabled())
           Logging.hopcount.debug("Done queueing "+Integer.toString(targetDocumentIDHashes.length)+" documents");
@@ -790,8 +790,9 @@ public class HopCount extends org.apache
   *@param sourceDocumentIDHash is the source document identifier for the links from source to target documents.
   *@param linkType is the link type for this queue addition.
   *@param hopcountMethod is the desired method of managing hopcounts.
+  *@return a boolean array which is the subset of documentIDHashes whose distances may have changed.
   */
-  protected void addToProcessingQueue(Long jobID, String[] affectedLinkTypes, String[] documentIDHashes,
+  protected boolean[] addToProcessingQueue(Long jobID, String[] affectedLinkTypes, String[] documentIDHashes,
     Answer[] startingAnswers, String sourceDocumentIDHash, String linkType, int hopcountMethod)
     throws ManifoldCFException
   {
@@ -837,6 +838,12 @@ public class HopCount extends org.apache
       answerMap.put(affectedLinkTypes[u],startingAnswers[u]);
     }
 
+    boolean[] rval = new boolean[documentIDHashes.length];
+    for (int i = 0; i < rval.length; i++)
+    {
+      rval[i] = false;
+    }
+
     // Do this in a transaction
     beginTransaction();
     try
@@ -931,6 +938,8 @@ public class HopCount extends org.apache
                 " than new distance "+Integer.toString(newAnswerValue)+", so not queuing for job "+jobID);
               matchMap.remove(q);
             }
+            else
+              rval[i] = true;
           }
         }
       }
@@ -1009,6 +1018,7 @@ public class HopCount extends org.apache
       endTransaction();
     }
     noteModifications(0,documentIDHashes.length,0);
+    return rval;
   }
 
   /** Do the work of marking add-dep-dependent links in the hopcount table. */