You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/07/25 14:55:01 UTC
svn commit: r1613423 - in /manifoldcf/trunk/framework:
agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Author: kwright
Date: Fri Jul 25 12:55:00 2014
New Revision: 1613423
URL: http://svn.apache.org/r1613423
Log:
Put in sanity checks for connector document disposition.
Modified:
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1613423&r1=1613422&r2=1613423&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java Fri Jul 25 12:55:00 2014
@@ -3767,6 +3767,8 @@ public class IncrementalIngester extends
public int sendDocument(String documentURI, RepositoryDocument document)
throws ManifoldCFException, ServiceInterruption, IOException
{
+ if (documentProcessed)
+ throw new IllegalStateException("Document cannot have multiple dispositions");
int rval = activities.sendDocument(documentURI,document);
documentProcessed = true;
return rval;
@@ -3779,6 +3781,8 @@ public class IncrementalIngester extends
public void noDocument()
throws ManifoldCFException, ServiceInterruption
{
+ if (documentProcessed)
+ throw new IllegalStateException("Document cannot have multiple dispositions");
activities.noDocument();
documentProcessed = true;
}
Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1613423&r1=1613422&r2=1613423&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java Fri Jul 25 12:55:00 2014
@@ -1159,6 +1159,8 @@ public class WorkerThread extends Thread
// Whether a component was touched or not, keyed by document identifier.
// This does not include primary document. The set is keyed by component id hash.
protected final Map<String,Set<String>> touchedComponentSet = new HashMap<String,Set<String>>();
+ // This represents primary documents.
+ protected final Set<String> touchedPrimarySet = new HashSet<String>();
/** Constructor.
*@param jobManager is the job manager
@@ -1501,6 +1503,7 @@ public class WorkerThread extends Thread
{
String documentIdentifierHash = ManifoldCF.hash(documentIdentifier);
String componentIdentifierHash = computeComponentIDHash(componentIdentifier);
+ checkMultipleDispositions(documentIdentifier,componentIdentifier,componentIdentifierHash);
ingester.documentRecord(
pipelineSpecification.getBasicPipelineSpecification(),
connectionName,documentIdentifierHash,componentIdentifierHash,
@@ -1573,6 +1576,7 @@ public class WorkerThread extends Thread
String documentIdentifierHash = ManifoldCF.hash(documentIdentifier);
String componentIdentifierHash = computeComponentIDHash(componentIdentifier);
+ checkMultipleDispositions(documentIdentifier,componentIdentifier,componentIdentifierHash);
if (data != null)
{
@@ -1634,6 +1638,7 @@ public class WorkerThread extends Thread
// (by ignoring it and allowing it to be deleted later)
String documentIdentifierHash = ManifoldCF.hash(documentIdentifier);
String componentIdentifierHash = computeComponentIDHash(componentIdentifier);
+ checkMultipleDispositions(documentIdentifier,componentIdentifier,componentIdentifierHash);
ingester.documentNoData(
computePipelineSpecification(documentIdentifierHash,componentIdentifierHash),
@@ -1657,6 +1662,8 @@ public class WorkerThread extends Thread
public void removeDocument(String documentIdentifier)
throws ManifoldCFException, ServiceInterruption
{
+ checkMultipleDispositions(documentIdentifier,null,null);
+
String documentIdentifierHash = ManifoldCF.hash(documentIdentifier);
ingester.documentRemove(
pipelineSpecification.getBasicPipelineSpecification(),
@@ -1665,6 +1672,7 @@ public class WorkerThread extends Thread
// Note that we touched it, so it won't get checked
touchedSet.add(documentIdentifier);
+ touchComponentSet(documentIdentifier,null);
}
/** Retain existing document component. Use this method to signal that an already-existing
@@ -1678,10 +1686,11 @@ public class WorkerThread extends Thread
String componentIdentifier)
throws ManifoldCFException
{
- touchComponentSet(documentIdentifier,computeComponentIDHash(componentIdentifier));
+ String componentIdentifierHash = computeComponentIDHash(componentIdentifier);
+ checkMultipleDispositions(documentIdentifier,componentIdentifier,componentIdentifierHash);
+ touchComponentSet(documentIdentifier,componentIdentifierHash);
}
-
/** Delete the current document from the search engine index, while keeping track of the version information
* for it (to reduce churn).
* Use noDocument() above instead.
@@ -2096,10 +2105,33 @@ public class WorkerThread extends Thread
return ManifoldCF.createJobSpecificString(jobID,simpleString);
}
+ protected void checkMultipleDispositions(String documentIdentifier, String componentIdentifier, String componentIdentifierHash)
+ {
+ if (abortSet.contains(documentIdentifier))
+ throw new IllegalStateException("Multiple document dispositions not allowed: Abort cannot be combiend with component disposition; document '"+documentIdentifier+"'");
+ if (documentDeletedSet.contains(documentIdentifier))
+ throw new IllegalStateException("Multiple document dispositions not allowed: Document delete cannot be combined with component disposition; document '"+documentIdentifier+"'");
+ if (componentIdentifierHash == null)
+ {
+ // Primary
+ if (touchedPrimarySet.contains(documentIdentifier))
+ throw new IllegalStateException("Multiple document primary component dispositions not allowed: document '"+documentIdentifier+"'");
+ }
+ else
+ {
+ Set<String> components = touchedComponentSet.get(documentIdentifier);
+ if (components.contains(componentIdentifierHash))
+ throw new IllegalStateException("Multiple document component dispositions not allowed: document '"+documentIdentifier+"', component '"+componentIdentifier+"'");
+ }
+ }
+
protected void touchComponentSet(String documentIdentifier, String componentIdentifierHash)
{
if (componentIdentifierHash == null)
+ {
+ touchedPrimarySet.add(documentIdentifier);
return;
+ }
Set<String> components = touchedComponentSet.get(documentIdentifier);
if (components == null)
{