You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/08/13 14:48:59 UTC
svn commit: r1372402 -
/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java
Author: kwright
Date: Mon Aug 13 12:48:58 2012
New Revision: 1372402
URL: http://svn.apache.org/viewvc?rev=1372402&view=rev
Log:
Optimization: only requeue when we know there's been a significant change. Part of CONNECTORS-501.
Modified:
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java
Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java?rev=1372402&r1=1372401&r2=1372402&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java Mon Aug 13 12:48:58 2012
@@ -411,18 +411,6 @@ public class HopCount extends org.apache
{
// There are added links.
- // First, note them in return value
- Set<String> newSet = new HashSet<String>();
- for (int i = 0; i < newReferences.length; i++)
- {
- newSet.add(newReferences[i]);
- }
- for (int i = 0; i < rval.length; i++)
- {
- if (newSet.contains(targetDocumentIDHashes[i]) &&
- (sourceDocumentIDHash==null || !sourceDocumentIDHash.equals(targetDocumentIDHashes[i])))
- rval[i] = true;
- }
// The add causes hopcount records to be queued for processing (and created if they don't exist).
// ALL the hopcount records for the target document ids must be queued, for all the link types
@@ -505,7 +493,19 @@ public class HopCount extends org.apache
}
// Now add these documents to the processing queue
- addToProcessingQueue(jobID,legalLinkTypes,newReferences,estimates,sourceDocumentIDHash,linkType,hopcountMethod);
+ boolean[] hasChanged = addToProcessingQueue(jobID,legalLinkTypes,newReferences,estimates,sourceDocumentIDHash,linkType,hopcountMethod);
+
+ // First, note them in return value
+ Map<String,Boolean> changeMap = new HashMap<String,Boolean>();
+ for (int i = 0; i < newReferences.length; i++)
+ {
+ changeMap.put(newReferences[i],new Boolean(hasChanged[i]));
+ }
+ for (int i = 0; i < rval.length; i++)
+ {
+ if (changeMap.get(targetDocumentIDHashes[i]).booleanValue())
+ rval[i] = true;
+ }
if (Logging.hopcount.isDebugEnabled())
Logging.hopcount.debug("Done queueing "+Integer.toString(targetDocumentIDHashes.length)+" documents");
@@ -790,8 +790,9 @@ public class HopCount extends org.apache
*@param sourceDocumentIDHash is the source document identifier for the links from source to target documents.
*@param linkType is the link type for this queue addition.
*@param hopcountMethod is the desired method of managing hopcounts.
+ *@return a boolean array which is the subset of documentIDHashes whose distances may have changed.
*/
- protected void addToProcessingQueue(Long jobID, String[] affectedLinkTypes, String[] documentIDHashes,
+ protected boolean[] addToProcessingQueue(Long jobID, String[] affectedLinkTypes, String[] documentIDHashes,
Answer[] startingAnswers, String sourceDocumentIDHash, String linkType, int hopcountMethod)
throws ManifoldCFException
{
@@ -837,6 +838,12 @@ public class HopCount extends org.apache
answerMap.put(affectedLinkTypes[u],startingAnswers[u]);
}
+ boolean[] rval = new boolean[documentIDHashes.length];
+ for (int i = 0; i < rval.length; i++)
+ {
+ rval[i] = false;
+ }
+
// Do this in a transaction
beginTransaction();
try
@@ -931,6 +938,8 @@ public class HopCount extends org.apache
" than new distance "+Integer.toString(newAnswerValue)+", so not queuing for job "+jobID);
matchMap.remove(q);
}
+ else
+ rval[i] = true;
}
}
}
@@ -1009,6 +1018,7 @@ public class HopCount extends org.apache
endTransaction();
}
noteModifications(0,documentIDHashes.length,0);
+ return rval;
}
/** Do the work of marking add-dep-dependent links in the hopcount table. */