You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/08/07 21:26:36 UTC
svn commit: r1370460 - in
/manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs:
HopCount.java JobManager.java
Author: kwright
Date: Tue Aug 7 19:26:36 2012
New Revision: 1370460
URL: http://svn.apache.org/viewvc?rev=1370460&view=rev
Log:
Add logic to put documents in 'rescan' state if hopcount changes detected during queuing
Modified:
manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java
manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
Modified: manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java?rev=1370460&r1=1370459&r2=1370460&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java (original)
+++ manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java Tue Aug 7 19:26:36 2012
@@ -364,20 +364,20 @@ public class HopCount extends org.apache
/** Record a reference from source to target. This reference will be marked as "new" or "existing".
*/
- public void recordReference(Long jobID, String[] legalLinkTypes, String sourceDocumentIDHash, String targetDocumentIDHash, String linkType,
+ public boolean recordReference(Long jobID, String[] legalLinkTypes, String sourceDocumentIDHash, String targetDocumentIDHash, String linkType,
int hopcountMethod)
throws ManifoldCFException
{
- doRecord(jobID,legalLinkTypes,sourceDocumentIDHash,new String[]{targetDocumentIDHash},linkType,hopcountMethod);
+ return doRecord(jobID,legalLinkTypes,sourceDocumentIDHash,new String[]{targetDocumentIDHash},linkType,hopcountMethod)[0];
}
/** Record a set of references from source to target. This reference will be marked as "new" or "existing".
*/
- public void recordReferences(Long jobID, String[] legalLinkTypes, String sourceDocumentIDHash, String[] targetDocumentIDHashes, String linkType,
+ public boolean[] recordReferences(Long jobID, String[] legalLinkTypes, String sourceDocumentIDHash, String[] targetDocumentIDHashes, String linkType,
int hopcountMethod)
throws ManifoldCFException
{
- doRecord(jobID,legalLinkTypes,sourceDocumentIDHash,targetDocumentIDHashes,linkType,hopcountMethod);
+ return doRecord(jobID,legalLinkTypes,sourceDocumentIDHash,targetDocumentIDHashes,linkType,hopcountMethod);
}
/** Complete a recalculation pass for a set of source documents. All child links that are not marked as "new"
@@ -390,13 +390,19 @@ public class HopCount extends org.apache
}
/** Do the work of recording source-target references. */
- protected void doRecord(Long jobID, String[] legalLinkTypes, String sourceDocumentIDHash, String[] targetDocumentIDHashes, String linkType,
+ protected boolean[] doRecord(Long jobID, String[] legalLinkTypes, String sourceDocumentIDHash, String[] targetDocumentIDHashes, String linkType,
int hopcountMethod)
throws ManifoldCFException
{
// We have to both add the reference, AND invalidate appropriate cached hopcounts (if it is a NEW
// link.)
+ boolean[] rval = new boolean[targetDocumentIDHashes.length];
+ for (int i = 0; i < rval.length; i++)
+ {
+ rval[i] = false;
+ }
+
beginTransaction();
try
{
@@ -404,6 +410,18 @@ public class HopCount extends org.apache
if (newReferences.length > 0)
{
// There are added links.
+
+ // First, note them in return value
+ Set<String> newSet = new HashSet<String>();
+ for (int i = 0; i < newReferences.length; i++)
+ {
+ newSet.add(newReferences[i]);
+ }
+ for (int i = 0; i < rval.length; i++)
+ {
+ if (newSet.contains(targetDocumentIDHashes[i]))
+ rval[i] = true;
+ }
// The add causes hopcount records to be queued for processing (and created if they don't exist).
// ALL the hopcount records for the target document ids must be queued, for all the link types
@@ -495,6 +513,7 @@ public class HopCount extends org.apache
if (Logging.hopcount.isDebugEnabled())
Logging.hopcount.debug("Done queueing "+Integer.toString(targetDocumentIDHashes.length)+" documents");
}
+ return rval;
}
catch (ManifoldCFException e)
{
Modified: manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1370460&r1=1370459&r2=1370460&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java (original)
+++ manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java Tue Aug 7 19:26:36 2012
@@ -3891,6 +3891,10 @@ public class JobManager implements IJobM
// Update all the carrydown data at once, for greatest efficiency.
boolean[] carrydownChangesSeen = carryDown.recordCarrydownDataMultiple(jobID,parentIdentifierHash,reorderedDocIDHashes,dataNames,dataHashValues,dataValues);
+ boolean[] hopcountChangesSeen = null;
+ if (parentIdentifierHash != null && relationshipType != null)
+ hopcountChangesSeen = hopCount.recordReferences(jobID,legalLinkTypes,parentIdentifierHash,reorderedDocIDHashes,relationshipType,hopcountMethod);
+
// Loop through the document id's again, and perform updates where needed
boolean[] reorderedRval = new boolean[reorderedDocIDHashes.length];
@@ -3905,12 +3909,11 @@ public class JobManager implements IJobM
else
// It was an existing row; do the update logic
reorderedRval[z] = jobQueue.updateExistingRecord(jr.getRecordID(),jr.getStatus(),jr.getCheckTimeValue(),
- 0L,currentTime,carrydownChangesSeen[z],reorderedDocumentPriorities[z],reorderedDocumentPrerequisites[z]);
+ 0L,currentTime,carrydownChangesSeen[z] || (hopcountChangesSeen!=null && hopcountChangesSeen[z]),
+ reorderedDocumentPriorities[z],reorderedDocumentPrerequisites[z]);
z++;
}
- if (parentIdentifierHash != null && relationshipType != null)
- hopCount.recordReferences(jobID,legalLinkTypes,parentIdentifierHash,reorderedDocIDHashes,relationshipType,hopcountMethod);
database.performCommit();