You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/11/26 01:01:26 UTC

svn commit: r1641727 - in /manifoldcf/branches/dev_1x: ./ framework/ framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/ framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/

Author: kwright
Date: Wed Nov 26 00:01:26 2014
New Revision: 1641727

URL: http://svn.apache.org/r1641727
Log:
Pull up fix for CONNECTORS-1115 from trunk.

Modified:
    manifoldcf/branches/dev_1x/   (props changed)
    manifoldcf/branches/dev_1x/CHANGES.txt
    manifoldcf/branches/dev_1x/framework/   (props changed)
    manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IProcessActivity.java
    manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java

Propchange: manifoldcf/branches/dev_1x/
------------------------------------------------------------------------------
  Merged /manifoldcf/trunk:r1641724

Modified: manifoldcf/branches/dev_1x/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/CHANGES.txt?rev=1641727&r1=1641726&r2=1641727&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/CHANGES.txt (original)
+++ manifoldcf/branches/dev_1x/CHANGES.txt Wed Nov 26 00:01:26 2014
@@ -3,6 +3,10 @@ $Id$
 
 ======================= 1.8-dev =====================
 
+CONNECTORS-1115: Add ability to retain all components of a document,
+so that individual ones do not need to be specified.
+(Markus Schuch, Karl Wright)
+
 CONNECTORS-1114: SQL exception calling removeDocument().
 (Markus Schuch, Karl Wright)
 

Propchange: manifoldcf/branches/dev_1x/framework/
------------------------------------------------------------------------------
  Merged /manifoldcf/trunk/framework:r1641724

Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IProcessActivity.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IProcessActivity.java?rev=1641727&r1=1641726&r2=1641727&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IProcessActivity.java (original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IProcessActivity.java Wed Nov 26 00:01:26 2014
@@ -224,6 +224,14 @@ public interface IProcessActivity extend
     String componentIdentifier)
     throws ManifoldCFException;
 
+  /** Retain all existing document components of a primary document.  Use this method to signal that
+  * no document components need to be reindexed.  The default behavior is to remove
+  * components that are not mentioned during processing.
+  *@param documentIdentifier is the document's identifier.
+  */
+  public void retainAllComponentDocument(String documentIdentifier)
+    throws ManifoldCFException;
+
   /** Record a document version, WITHOUT reindexing it, or removing it.  (Other
   * documents with the same URL, however, will still be removed.)  This is
   * useful if the version string changes but the document contents are known not

Modified: manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1641727&r1=1641726&r2=1641727&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java (original)
+++ manifoldcf/branches/dev_1x/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java Wed Nov 26 00:01:26 2014
@@ -1157,6 +1157,8 @@ public class WorkerThread extends Thread
     protected final Set<String> documentDeletedSet = new HashSet<String>();
     
     // Whether a component was touched or not, keyed by document identifier.
+    // If there's an entry here, then it means that *all* components for the document are to be retained.
+    protected final Set<String> allComponentsSet = new HashSet<String>();
     // This does not include primary document.  The set is keyed by component id hash.
     protected final Map<String,Set<String>> touchedComponentSet = new HashMap<String,Set<String>>();
     // This represents primary documents.
@@ -1229,6 +1231,8 @@ public class WorkerThread extends Thread
     public boolean wasDocumentComponentTouched(String documentIdentifier,
       String componentIdentifierHash)
     {
+      if (allComponentsSet.contains(documentIdentifier))
+        return true;
       Set<String> components = touchedComponentSet.get(documentIdentifier);
       if (components == null)
         return false;
@@ -1711,6 +1715,19 @@ public class WorkerThread extends Thread
       noDocument(documentIdentifier,version);
     }
 
+    /** Retain all existing document components of a primary document.  Use this method to signal that
+    * no document components need to be reindexed.  The default behavior is to remove
+    * components that are not mentioned during processing.
+    *@param documentIdentifier is the document's identifier.
+    */
+    @Override
+    public void retainAllComponentDocument(String documentIdentifier)
+      throws ManifoldCFException
+    {
+      checkAllComponentsMultipleDispositions(documentIdentifier);
+      touchAllComponentsSet(documentIdentifier);
+    }
+
     /** Delete the specified document from the search engine index, and from the status table.  This
     *  method does NOT keep track of version
     * information for the document and thus can lead to "churn", whereby the same document is queued, processed,
@@ -2124,6 +2141,17 @@ public class WorkerThread extends Thread
       return ManifoldCF.createJobSpecificString(jobID,simpleString);
     }
 
+    protected void checkAllComponentsMultipleDispositions(String documentIdentifier)
+    {
+      if (abortSet.contains(documentIdentifier))
+        throw new IllegalStateException("Multiple document dispositions not allowed: Abort cannot be combined with component disposition; document '"+documentIdentifier+"'");
+      if (documentDeletedSet.contains(documentIdentifier))
+        throw new IllegalStateException("Multiple document dispositions not allowed: Document delete cannot be combined with component disposition; document '"+documentIdentifier+"'");
+      Set<String> components = touchedComponentSet.get(documentIdentifier);
+      if (components != null && components.size() > 0)
+        throw new IllegalStateException("Multiple document dispositions not allowed: Retain all components cannot be combined with individual component disposition; document '"+documentIdentifier+"'");
+    }
+    
     protected void checkMultipleDispositions(String documentIdentifier, String componentIdentifier, String componentIdentifierHash)
     {
       if (abortSet.contains(documentIdentifier))
@@ -2138,12 +2166,19 @@ public class WorkerThread extends Thread
       }
       else
       {
+        if (allComponentsSet.contains(documentIdentifier))
+          throw new IllegalStateException("Multiple document component dispositions not allowed: document '"+documentIdentifier+"', component '"+componentIdentifier+"'");
         Set<String> components = touchedComponentSet.get(documentIdentifier);
         if (components != null && components.contains(componentIdentifierHash))
           throw new IllegalStateException("Multiple document component dispositions not allowed: document '"+documentIdentifier+"', component '"+componentIdentifier+"'");
       }
     }
     
+    protected void touchAllComponentsSet(String documentIdentifier)
+    {
+      allComponentsSet.add(documentIdentifier);
+    }
+    
     protected void touchComponentSet(String documentIdentifier, String componentIdentifierHash)
     {
       if (componentIdentifierHash == null)