You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2012/08/07 23:35:05 UTC

svn commit: r1370546 - in /manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler: interfaces/IJobManager.java jobs/JobManager.java system/DocumentCleanupThread.java system/ExpireThread.java

Author: kwright
Date: Tue Aug  7 21:35:04 2012
New Revision: 1370546

URL: http://svn.apache.org/viewvc?rev=1370546&view=rev
Log:
Push the semantics of each variant of document deletion down into IJobManager.  This makes it possible to do different things based on context, which we'll need (in addition to document state) to decide how to deal with NEEDSRESCAN states.

Modified:
    manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java
    manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
    manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/DocumentCleanupThread.java
    manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ExpireThread.java

Modified: manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java?rev=1370546&r1=1370545&r2=1370546&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java (original)
+++ manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java Tue Aug  7 21:35:04 2012
@@ -274,9 +274,13 @@ public interface IJobManager
   public void markDocumentCompleted(DocumentDescription documentDescription)
     throws ManifoldCFException;
 
-  /** Note deletion as result of document processing by a job thread of a document.
+  /** Delete from queue as a result of processing of an active document.
+  * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING,
+  * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN.  The RESCAN variants are interpreted
+  * as meaning that the document should not be deleted, but should instead be popped back on the queue for
+  * a repeat processing attempt.
   *@param documentDescriptions are the set of description objects for the documents that were processed.
-  *@param hopcountMethod is one of complete, partial, or nevercomplete.
+  *@param hopcountMethod describes how to handle deletions for hopcount purposes.
   *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
   *  to be requeued as a result of the change.
   */
@@ -284,9 +288,13 @@ public interface IJobManager
     int hopcountMethod)
     throws ManifoldCFException;
 
-  /** Note deletion as result of document processing by a job thread of a document.
+  /** Delete from queue as a result of processing of an active document.
+  * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING,
+  * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN.  The RESCAN variants are interpreted
+  * as meaning that the document should not be deleted, but should instead be popped back on the queue for
+  * a repeat processing attempt.
   *@param documentDescription is the description object for the document that was processed.
-  *@param hopcountMethod is one of complete, partial, or nevercomplete.
+  *@param hopcountMethod describes how to handle deletions for hopcount purposes.
   *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
   *  to be requeued as a result of the change.
   */
@@ -294,6 +302,56 @@ public interface IJobManager
     int hopcountMethod)
     throws ManifoldCFException;
 
+  /** Delete from queue as a result of expiration of an active document.
+  * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING,
+  * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN.  Since the document expired,
+  * no special activity takes place as a result of the document being in a RESCAN state.
+  *@param documentDescriptions are the set of description objects for the documents that were processed.
+  *@param hopcountMethod describes how to handle deletions for hopcount purposes.
+  *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
+  *  to be requeued as a result of the change.
+  */
+  public DocumentDescription[] markDocumentExpiredMultiple(Long jobID, String[] legalLinkTypes, DocumentDescription[] documentDescriptions,
+    int hopcountMethod)
+    throws ManifoldCFException;
+  
+  /** Delete from queue as a result of expiration of an active document.
+  * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING,
+  * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN.  Since the document expired,
+  * no special activity takes place as a result of the document being in a RESCAN state.
+  *@param documentDescription is the description object for the document that was processed.
+  *@param hopcountMethod describes how to handle deletions for hopcount purposes.
+  *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
+  *  to be requeued as a result of the change.
+  */
+  public DocumentDescription[] markDocumentExpired(Long jobID, String[] legalLinkTypes, DocumentDescription documentDescription,
+    int hopcountMethod)
+    throws ManifoldCFException;
+
+  /** Delete from queue as a result of cleaning up an unreachable document.
+  * The document is expected to be in the PURGATORY state.  There is never any need to reprocess the
+  * document.
+  *@param documentDescriptions are the set of description objects for the documents that were processed.
+  *@param hopcountMethod describes how to handle deletions for hopcount purposes.
+  *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
+  *  to be requeued as a result of the change.
+  */
+  public DocumentDescription[] markDocumentCleanedUpMultiple(Long jobID, String[] legalLinkTypes, DocumentDescription[] documentDescriptions,
+    int hopcountMethod)
+    throws ManifoldCFException;
+
+  /** Delete from queue as a result of cleaning up an unreachable document.
+  * The document is expected to be in the PURGATORY state.  There is never any need to reprocess the
+  * document.
+  *@param documentDescription is the description object for the document that was processed.
+  *@param hopcountMethod describes how to handle deletions for hopcount purposes.
+  *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
+  *  to be requeued as a result of the change.
+  */
+  public DocumentDescription[] markDocumentCleanedUp(Long jobID, String[] legalLinkTypes, DocumentDescription documentDescription,
+    int hopcountMethod)
+    throws ManifoldCFException;
+
   /** Requeue a document set because of carrydown changes.
   * This method is called when carrydown data is modified for a set of documents.  The documents must be requeued for immediate reprocessing, even to the
   * extent that if one is *already* being processed, it will need to be done over again.

Modified: manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1370546&r1=1370545&r2=1370546&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java (original)
+++ manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java Tue Aug  7 21:35:04 2012
@@ -2474,7 +2474,11 @@ public class JobManager implements IJobM
     markDocumentCompletedMultiple(new DocumentDescription[]{documentDescription});
   }
 
-  /** Note deletion as result of document processing by a job thread of a document.
+  /** Delete from queue as a result of processing of an active document.
+  * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING,
+  * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN.  The RESCAN variants are interpreted
+  * as meaning that the document should not be deleted, but should instead be popped back on the queue for
+  * a repeat processing attempt.
   *@param documentDescriptions are the set of description objects for the documents that were processed.
   *@param hopcountMethod describes how to handle deletions for hopcount purposes.
   *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
@@ -2484,6 +2488,100 @@ public class JobManager implements IJobM
     int hopcountMethod)
     throws ManifoldCFException
   {
+    // MHL
+    return doDeleteMultiple(jobID,legalLinkTypes,documentDescriptions,hopcountMethod);
+  }
+
+  /** Delete from queue as a result of processing of an active document.
+  * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING,
+  * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN.  The RESCAN variants are interpreted
+  * as meaning that the document should not be deleted, but should instead be popped back on the queue for
+  * a repeat processing attempt.
+  *@param documentDescription is the description object for the document that was processed.
+  *@param hopcountMethod describes how to handle deletions for hopcount purposes.
+  *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
+  *  to be requeued as a result of the change.
+  */
+  public DocumentDescription[] markDocumentDeleted(Long jobID, String[] legalLinkTypes, DocumentDescription documentDescription,
+    int hopcountMethod)
+    throws ManifoldCFException
+  {
+    return markDocumentDeletedMultiple(jobID,legalLinkTypes,new DocumentDescription[]{documentDescription},hopcountMethod);
+  }
+
+  /** Delete from queue as a result of expiration of an active document.
+  * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING,
+  * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN.  Since the document expired,
+  * no special activity takes place as a result of the document being in a RESCAN state.
+  *@param documentDescriptions are the set of description objects for the documents that were processed.
+  *@param hopcountMethod describes how to handle deletions for hopcount purposes.
+  *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
+  *  to be requeued as a result of the change.
+  */
+  public DocumentDescription[] markDocumentExpiredMultiple(Long jobID, String[] legalLinkTypes, DocumentDescription[] documentDescriptions,
+    int hopcountMethod)
+    throws ManifoldCFException
+  {
+    return doDeleteMultiple(jobID,legalLinkTypes,documentDescriptions,hopcountMethod);
+  }
+  
+  /** Delete from queue as a result of expiration of an active document.
+  * The document is expected to be in one of the active states: ACTIVE, ACTIVESEEDING,
+  * ACTIVENEEDSRESCAN, ACTIVESEEDINGNEEDSRESCAN.  Since the document expired,
+  * no special activity takes place as a result of the document being in a RESCAN state.
+  *@param documentDescription is the description object for the document that was processed.
+  *@param hopcountMethod describes how to handle deletions for hopcount purposes.
+  *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
+  *  to be requeued as a result of the change.
+  */
+  public DocumentDescription[] markDocumentExpired(Long jobID, String[] legalLinkTypes, DocumentDescription documentDescription,
+    int hopcountMethod)
+    throws ManifoldCFException
+  {
+    return markDocumentExpiredMultiple(jobID,legalLinkTypes,new DocumentDescription[]{documentDescription},hopcountMethod);
+  }
+
+  /** Delete from queue as a result of cleaning up an unreachable document.
+  * The document is expected to be in the PURGATORY state.  There is never any need to reprocess the
+  * document.
+  *@param documentDescriptions are the set of description objects for the documents that were processed.
+  *@param hopcountMethod describes how to handle deletions for hopcount purposes.
+  *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
+  *  to be requeued as a result of the change.
+  */
+  public DocumentDescription[] markDocumentCleanedUpMultiple(Long jobID, String[] legalLinkTypes, DocumentDescription[] documentDescriptions,
+    int hopcountMethod)
+    throws ManifoldCFException
+  {
+    return doDeleteMultiple(jobID,legalLinkTypes,documentDescriptions,hopcountMethod);
+  }
+
+  /** Delete from queue as a result of cleaning up an unreachable document.
+  * The document is expected to be in the PURGATORY state.  There is never any need to reprocess the
+  * document.
+  *@param documentDescription is the description object for the document that was processed.
+  *@param hopcountMethod describes how to handle deletions for hopcount purposes.
+  *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
+  *  to be requeued as a result of the change.
+  */
+  public DocumentDescription[] markDocumentCleanedUp(Long jobID, String[] legalLinkTypes, DocumentDescription documentDescription,
+    int hopcountMethod)
+    throws ManifoldCFException
+  {
+    return markDocumentCleanedUpMultiple(jobID,legalLinkTypes,new DocumentDescription[]{documentDescription},hopcountMethod);
+  }
+
+  /** Delete documents with no repercussions.  We don't have to worry about the current state of each document,
+  * since the document is definitely going away.
+  *@param documentDescriptions are the set of description objects for the documents that were processed.
+  *@param hopcountMethod describes how to handle deletions for hopcount purposes.
+  *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
+  *  to be requeued as a result of the change.
+  */
+  protected DocumentDescription[] doDeleteMultiple(Long jobID, String[] legalLinkTypes, DocumentDescription[] documentDescriptions,
+    int hopcountMethod)
+    throws ManifoldCFException
+  {
     if (documentDescriptions.length == 0)
       return new DocumentDescription[0];
 
@@ -2684,18 +2782,6 @@ public class JobManager implements IJobM
     }
   }
 
-  /** Note deletion as result of document processing by a job thread of a document.
-  *@param documentDescription is the description object for the document that was processed.
-  *@param hopcountMethod describes how to handle deletions for hopcount purposes.
-  *@return the set of documents for which carrydown data was changed by this operation.  These documents are likely
-  *  to be requeued as a result of the change.
-  */
-  public DocumentDescription[] markDocumentDeleted(Long jobID, String[] legalLinkTypes, DocumentDescription documentDescription,
-    int hopcountMethod)
-    throws ManifoldCFException
-  {
-    return markDocumentDeletedMultiple(jobID,legalLinkTypes,new DocumentDescription[]{documentDescription},hopcountMethod);
-  }
 
 
   /** Requeue a document for further processing in the future.

Modified: manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/DocumentCleanupThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/DocumentCleanupThread.java?rev=1370546&r1=1370545&r2=1370546&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/DocumentCleanupThread.java (original)
+++ manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/DocumentCleanupThread.java Tue Aug  7 21:35:04 2012
@@ -232,7 +232,7 @@ public class DocumentCleanupThread exten
                   Long jobID = ddd.getJobID();
                   int hopcountMethod = ((Integer)hopcountMethods.get(k)).intValue();
                   String[] legalLinkTypes = (String[])arrayRelationshipTypes.get(k);
-                  DocumentDescription[] requeueCandidates = jobManager.markDocumentDeleted(jobID,legalLinkTypes,ddd,hopcountMethod);
+                  DocumentDescription[] requeueCandidates = jobManager.markDocumentCleanedUp(jobID,legalLinkTypes,ddd,hopcountMethod);
                   // Use the common method for doing the requeuing
                   ManifoldCF.requeueDocumentsDueToCarrydown(jobManager,requeueCandidates,
                     connector,connection,queueTracker,currentTime);

Modified: manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ExpireThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ExpireThread.java?rev=1370546&r1=1370545&r2=1370546&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ExpireThread.java (original)
+++ manifoldcf/branches/CONNECTORS-501/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ExpireThread.java Tue Aug  7 21:35:04 2012
@@ -235,7 +235,7 @@ public class ExpireThread extends Thread
                   Long jobID = ddd.getJobID();
                   int hopcountMethod = ((Integer)hopcountMethods.get(k)).intValue();
                   String[] legalLinkTypes = (String[])arrayRelationshipTypes.get(k);
-                  DocumentDescription[] requeueCandidates = jobManager.markDocumentDeleted(jobID,legalLinkTypes,ddd,hopcountMethod);
+                  DocumentDescription[] requeueCandidates = jobManager.markDocumentExpired(jobID,legalLinkTypes,ddd,hopcountMethod);
                   // Use the common method for doing the requeuing
                   ManifoldCF.requeueDocumentsDueToCarrydown(jobManager,requeueCandidates,
                     connector,connection,queueTracker,currentTime);