You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/07/18 22:06:12 UTC

svn commit: r1611786 - in /manifoldcf/branches/CONNECTORS-989/framework: agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/ agents/src/main/java/org/apache/manifoldcf/agents/interfaces/ pull-agent/src/main/java/org/apache/manifoldcf/c...

Author: kwright
Date: Fri Jul 18 20:06:11 2014
New Revision: 1611786

URL: http://svn.apache.org/r1611786
Log:
Introduce DocumentIngestStatusSet

Added:
    manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/DocumentIngestStatusSet.java   (with props)
Modified:
    manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
    manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java
    manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecificationWithVersions.java
    manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocument.java
    manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
    manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java

Modified: manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1611786&r1=1611785&r2=1611786&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java (original)
+++ manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java Fri Jul 18 20:06:11 2014
@@ -1512,8 +1512,8 @@ public class IncrementalIngester extends
           authorityName = "";
         int indexValue = position.intValue();
         // MHL
-        rval.addStatus(identifierClasses[indexValue],identifierHashes[indexValue],null,outputConnectionName,
-          new DocumentIngestStatus(lastVersion,lastTransformationVersion,lastOutputVersion,paramVersion,authorityName));
+        rval.addStatus(identifierClasses[indexValue],identifierHashes[indexValue],outputConnectionName,
+          null,new DocumentIngestStatus(lastVersion,lastTransformationVersion,lastOutputVersion,paramVersion,authorityName));
       }
     }
   }

Added: manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/DocumentIngestStatusSet.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/DocumentIngestStatusSet.java?rev=1611786&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/DocumentIngestStatusSet.java (added)
+++ manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/DocumentIngestStatusSet.java Fri Jul 18 20:06:11 2014
@@ -0,0 +1,68 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.interfaces;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import java.util.*;
+
+/** This object contains statuses for the primary document and all component documents.
+*/
+public class DocumentIngestStatusSet
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  protected DocumentIngestStatus primary = null;
+  protected final Map<String,DocumentIngestStatus> components = new HashMap<String,DocumentIngestStatus>();
+  
+  /** Constructor */
+  public DocumentIngestStatusSet()
+  {
+  }
+  
+  /** Add document status.
+  *@param componentHash is the component identifier hash, or null.
+  *@param status is the document ingest status.
+  */
+  public void addDocumentStatus(String componentHash, DocumentIngestStatus status)
+  {
+    if (componentHash == null)
+      primary = status;
+    else
+      components.put(componentHash,status);
+  }
+  
+  /** Get primary status.
+  *@return the primary status.
+  */
+  public DocumentIngestStatus getPrimary()
+  {
+    return primary;
+  }
+  
+  /** Get component status.
+  *@param componentHash is the component identifier hash, or null.
+  *@return the component status.
+  */
+  public DocumentIngestStatus getComponent(String componentHash)
+  {
+    if (componentHash == null)
+      return primary;
+    return components.get(componentHash);
+  }
+}

Propchange: manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/DocumentIngestStatusSet.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/DocumentIngestStatusSet.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java?rev=1611786&r1=1611785&r2=1611786&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java (original)
+++ manifoldcf/branches/CONNECTORS-989/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java Fri Jul 18 20:06:11 2014
@@ -28,7 +28,7 @@ public class IngestStatuses
 {
   public static final String _rcsid = "@(#)$Id$";
 
-  protected final Map<OutputKey,Map<String,DocumentIngestStatus>> statuses = new HashMap<OutputKey,Map<String,DocumentIngestStatus>>();
+  protected final Map<OutputKey,DocumentIngestStatusSet> statuses = new HashMap<OutputKey,DocumentIngestStatusSet>();
   
   public IngestStatuses()
   {
@@ -37,50 +37,32 @@ public class IngestStatuses
   /** Add a status record.
   *@param documentClass is the document class.
   *@param documentIDHash is the document id's hash value.
-  *@param componentIDHash is the component id hash value, if any.
   *@param outputConnectionName is the output connection name.
-  *@param status is the status record.
+  *@param componentIDHash is the component ID hash value.
+  *@param status is the status.
   */
-  public void addStatus(String documentClass, String documentIDHash, String componentIDHash, String outputConnectionName, DocumentIngestStatus status)
+  public void addStatus(String documentClass, String documentIDHash, String outputConnectionName,
+    String componentIDHash, DocumentIngestStatus status)
   {
-    if (componentIDHash == null)
-      componentIDHash = "";
     OutputKey ok = new OutputKey(documentClass,documentIDHash,outputConnectionName);
-    Map<String,DocumentIngestStatus> map = statuses.get(ok);
-    if (map == null)
+    DocumentIngestStatusSet set = statuses.get(ok);
+    if (set == null)
     {
-      map = new HashMap<String,DocumentIngestStatus>();
-      statuses.put(ok,map);
+      set = new DocumentIngestStatusSet();
+      statuses.put(ok,set);
     }
-    map.put(componentIDHash,status);
-  }
-  
-  /** Get the set of component hashes for a given output.
-  *@param documentClass is the document class.
-  *@param documentIDHash is the document id's hash value.
-  *@param outputConnectionName is the output connection name.
-  *@return the set of component hashes (empty string meaning no component).
-  */
-  public Set<String> componentIterator(String documentClass, String documentIDHash, String outputConnectionName)
-  {
-    Map<String,DocumentIngestStatus> map = statuses.get(new OutputKey(documentClass,documentIDHash,outputConnectionName));
-    if (map == null)
-      return new HashSet<String>();
-    return map.keySet();
+    set.addDocumentStatus(componentIDHash,status);
   }
   
   /** Retrieve a status record.
   *@param documentClass is the document class.
   *@param documentIDHash is the document id's hash value.
-  *@param componentIDHash is the component id hash value, if any.
   *@param outputConnectionName is the output connection name.
-  *@return the status record, if record.
+  *@return the status record, if exists.
   */
-  public DocumentIngestStatus getStatus(String documentClass, String documentIDHash, String componentIDHash, String outputConnectionName)
+  public DocumentIngestStatusSet getStatus(String documentClass, String documentIDHash, String outputConnectionName)
   {
-    if (componentIDHash == null)
-      componentIDHash = "";
-    return statuses.get(new OutputKey(documentClass,documentIDHash,outputConnectionName)).get(componentIDHash);
+    return statuses.get(new OutputKey(documentClass,documentIDHash,outputConnectionName));
   }
 
   protected static class OutputKey

Modified: manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecificationWithVersions.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecificationWithVersions.java?rev=1611786&r1=1611785&r2=1611786&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecificationWithVersions.java (original)
+++ manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecificationWithVersions.java Fri Jul 18 20:06:11 2014
@@ -49,7 +49,11 @@ public class PipelineSpecificationWithVe
   protected DocumentIngestStatus getStatus(int index)
   {
     IPipelineSpecificationBasic basic = pipelineSpecification.getBasicPipelineSpecification();
-    return queuedDocument.getLastIngestedStatus(basic.getStageConnectionName(basic.getOutputStage(index)));
+    // MHL
+    DocumentIngestStatusSet set = queuedDocument.getLastIngestedStatus(basic.getStageConnectionName(basic.getOutputStage(index)));
+    if (set == null)
+      return null;
+    return set.getPrimary();
   }
   
   /** For a given output index, return a document version string.

Modified: manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocument.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocument.java?rev=1611786&r1=1611785&r2=1611786&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocument.java (original)
+++ manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocument.java Fri Jul 18 20:06:11 2014
@@ -39,7 +39,7 @@ public class QueuedDocument
   /** The document description. */
   protected final DocumentDescription documentDescription;
   /** The last ingested status, null meaning "never ingested". */
-  protected final Map<String,DocumentIngestStatus> lastIngestedStatus;
+  protected final Map<String,DocumentIngestStatusSet> lastIngestedStatus;
   /** The binnames for the document, according to the connector */
   protected final String[] binNames;
   /** This flag indicates whether the document has been processed or not. */
@@ -50,7 +50,7 @@ public class QueuedDocument
   *@param lastIngestedStatus is the document's last ingested status.
   *@param binNames are the bins associated with the document.
   */
-  public QueuedDocument(DocumentDescription documentDescription, Map<String,DocumentIngestStatus> lastIngestedStatus, String[] binNames)
+  public QueuedDocument(DocumentDescription documentDescription, Map<String,DocumentIngestStatusSet> lastIngestedStatus, String[] binNames)
   {
     this.documentDescription = documentDescription;
     this.lastIngestedStatus = lastIngestedStatus;
@@ -69,7 +69,7 @@ public class QueuedDocument
   *@param outputConnectionName is the name of the output connection.
   *@return the last ingested status for that output, or null if not found.
   */
-  public DocumentIngestStatus getLastIngestedStatus(String outputConnectionName)
+  public DocumentIngestStatusSet getLastIngestedStatus(String outputConnectionName)
   {
     if (lastIngestedStatus == null)
       return null;

Modified: manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java?rev=1611786&r1=1611785&r2=1611786&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java (original)
+++ manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java Fri Jul 18 20:06:11 2014
@@ -257,10 +257,9 @@ public class StufferThread extends Threa
             for (int j = 0; j < pipelineSpecifications[i].getOutputCount(); j++)
             {
               String outputName = pipelineSpecifications[i].getStageConnectionName(pipelineSpecifications[i].getOutputStage(j));
-              // MHL
-              DocumentIngestStatus status = statuses.getStatus(documentClasses[i],documentIDHashes[i],null,outputName);
-              if (status != null)
-                versions[i].put(outputName,status);
+              DocumentIngestStatusSet statusSet = statuses.getStatus(documentClasses[i],documentIDHashes[i],outputName);
+              if (statusSet != null)
+                versions[i].put(outputName,statusSet);
             }
           }
 
@@ -335,7 +334,7 @@ public class StufferThread extends Threa
               binNames = new String[]{""};
             }
 
-            QueuedDocument qd = new QueuedDocument(descs[i],(Map<String,DocumentIngestStatus>)versions[i],binNames);
+            QueuedDocument qd = new QueuedDocument(descs[i],(Map<String,DocumentIngestStatusSet>)versions[i],binNames);
 
             // Grab the arraylist that's there, or create it.
             List<QueuedDocument> set = documentSets.get(jobID);

Modified: manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1611786&r1=1611785&r2=1611786&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java (original)
+++ manifoldcf/branches/CONNECTORS-989/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java Fri Jul 18 20:06:11 2014
@@ -2409,13 +2409,16 @@ public class WorkerThread extends Thread
     *@param documentIdentifier is the document identifier.
     *@return the document version string, or null if the document was never previously indexed.
     */
+    @Override
     public String getIndexedVersionString(String documentIdentifier)
     {
       QueuedDocument qd = map.get(documentIdentifier);
-      DocumentIngestStatus status = qd.getLastIngestedStatus(lastOutputConnectionName);
-      if (status == null)
+      DocumentIngestStatusSet status = qd.getLastIngestedStatus(lastOutputConnectionName);
+      // MHL
+      if (status == null || status.getPrimary() == null)
         return null;
-      return status.getDocumentVersion();
+      // MHL
+      return status.getPrimary().getDocumentVersion();
     }
 
   }