You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/07/06 14:38:33 UTC

svn commit: r1608195 - in /manifoldcf/trunk/framework: agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/ agents/src/main/java/org/apache/manifoldcf/agents/interfaces/ pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/

Author: kwright
Date: Sun Jul  6 12:38:33 2014
New Revision: 1608195

URL: http://svn.apache.org/r1608195
Log:
Revamp IncrementalIngester API DocumentIngestStatus methods

Added:
    manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java   (with props)
Removed:
    manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/OutputKey.java
Modified:
    manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
    manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
    manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java

Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1608195&r1=1608194&r2=1608195&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java Sun Jul  6 12:38:33 2014
@@ -1377,7 +1377,7 @@ public class IncrementalIngester extends
   */
   @Override
   public void getPipelineDocumentIngestDataMultiple(
-    Map<OutputKey,DocumentIngestStatus> rval,
+    IngestStatuses rval,
     IPipelineSpecificationBasic[] pipelineSpecificationBasics,
     String[] identifierClasses, String[] identifierHashes)
     throws ManifoldCFException
@@ -1422,7 +1422,7 @@ public class IncrementalIngester extends
   */
   @Override
   public void getPipelineDocumentIngestDataMultiple(
-    Map<OutputKey,DocumentIngestStatus> rval,
+    IngestStatuses rval,
     IPipelineSpecificationBasic pipelineSpecificationBasic,
     String[] identifierClasses, String[] identifierHashes)
     throws ManifoldCFException
@@ -1480,7 +1480,7 @@ public class IncrementalIngester extends
   *@param clause is the in clause for the query.
   *@param list is the parameter list for the query.
   */
-  protected void getPipelineDocumentIngestDataChunk(Map<OutputKey,DocumentIngestStatus> rval, Map<String,Integer> map, String[] outputConnectionNames, List<String> list,
+  protected void getPipelineDocumentIngestDataChunk(IngestStatuses rval, Map<String,Integer> map, String[] outputConnectionNames, List<String> list,
     String[] identifierClasses, String[] identifierHashes)
     throws ManifoldCFException
   {
@@ -1519,7 +1519,7 @@ public class IncrementalIngester extends
         if (authorityName == null)
           authorityName = "";
         int indexValue = position.intValue();
-        rval.put(new OutputKey(identifierClasses[indexValue],identifierHashes[indexValue],outputConnectionName),
+        rval.addStatus(identifierClasses[indexValue],identifierHashes[indexValue],outputConnectionName,
           new DocumentIngestStatus(lastVersion,lastTransformationVersion,lastOutputVersion,paramVersion,authorityName));
       }
     }
@@ -1533,7 +1533,7 @@ public class IncrementalIngester extends
   */
   @Override
   public void getPipelineDocumentIngestData(
-    Map<OutputKey,DocumentIngestStatus> rval,
+    IngestStatuses rval,
     IPipelineSpecificationBasic pipelineSpecificationBasic,
     String identifierClass, String identifierHash)
     throws ManifoldCFException
@@ -1949,54 +1949,6 @@ public class IncrementalIngester extends
       new MultiClause(outputConnNameField,outputConnectionNames)});
   }
   
-  /** Get a chunk of document ingest data records.
-  *@param rval is the document ingest status array where the data should be put.
-  *@param map is the map from id to index.
-  *@param clause is the in clause for the query.
-  *@param list is the parameter list for the query.
-  */
-  protected void getDocumentIngestDataChunk(DocumentIngestStatus[] rval, Map<String,Integer> map, String outputConnectionName, List<String> list)
-    throws ManifoldCFException
-  {
-    ArrayList newList = new ArrayList();
-    String query = buildConjunctionClause(newList,new ClauseDescription[]{
-      new MultiClause(docKeyField,list),
-      new UnitaryClause(outputConnNameField,outputConnectionName)});
-      
-    // Get the primary records associated with this hash value
-    IResultSet set = performQuery("SELECT "+idField+","+docKeyField+","+lastVersionField+","+lastOutputVersionField+","+authorityNameField+","+forcedParamsField+
-      " FROM "+getTableName()+" WHERE "+query,newList,null,null);
-
-    // Now, go through the original request once more, this time building the result
-    for (int i = 0; i < set.getRowCount(); i++)
-    {
-      IResultRow row = set.getRow(i);
-      String docHash = row.getValue(docKeyField).toString();
-      Integer position = map.get(docHash);
-      if (position != null)
-      {
-        Long id = (Long)row.getValue(idField);
-        String lastVersion = (String)row.getValue(lastVersionField);
-        if (lastVersion == null)
-          lastVersion = "";
-        String lastTransformationVersion = (String)row.getValue(lastTransformationVersionField);
-        if (lastTransformationVersion == null)
-          lastTransformationVersion = "";
-        String lastOutputVersion = (String)row.getValue(lastOutputVersionField);
-        if (lastOutputVersion == null)
-          lastOutputVersion = "";
-        String paramVersion = (String)row.getValue(forcedParamsField);
-        if (paramVersion == null)
-          paramVersion = "";
-        String authorityName = (String)row.getValue(authorityNameField);
-        if (authorityName == null)
-          authorityName = "";
-        int indexValue = position.intValue();
-        rval[indexValue] = new DocumentIngestStatus(
-          lastVersion,lastTransformationVersion,lastOutputVersion,paramVersion,authorityName);
-      }
-    }
-  }
 
   // Protected methods
 

Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java?rev=1608195&r1=1608194&r2=1608195&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java Sun Jul  6 12:38:33 2014
@@ -264,7 +264,7 @@ public interface IIncrementalIngester
   *@param identifierHashes is the array of document identifier hashes to look up.
   */
   public void getPipelineDocumentIngestDataMultiple(
-    Map<OutputKey,DocumentIngestStatus> rval,
+    IngestStatuses rval,
     IPipelineSpecificationBasic[] pipelineSpecificationBasics,
     String[] identifierClasses, String[] identifierHashes)
     throws ManifoldCFException;
@@ -276,7 +276,7 @@ public interface IIncrementalIngester
   *@param identifierHashes is the array of document identifier hashes to look up.
   */
   public void getPipelineDocumentIngestDataMultiple(
-    Map<OutputKey,DocumentIngestStatus> rval,
+    IngestStatuses rval,
     IPipelineSpecificationBasic pipelineSpecificationBasic,
     String[] identifierClasses, String[] identifierHashes)
     throws ManifoldCFException;
@@ -288,7 +288,7 @@ public interface IIncrementalIngester
   *@param identifierHash is the hash of the id of the document.
   */
   public void getPipelineDocumentIngestData(
-    Map<OutputKey,DocumentIngestStatus> rval,
+    IngestStatuses rval,
     IPipelineSpecificationBasic pipelineSpecificationBasic,
     String identifierClass, String identifierHash)
     throws ManifoldCFException;

Added: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java?rev=1608195&view=auto
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java (added)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java Sun Jul  6 12:38:33 2014
@@ -0,0 +1,107 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.interfaces;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import java.util.*;
+
+/** This object is part of the IIncrementalIngester API.
+* It is an accumulator and organizer of DocumentIngestStatus records
+*/
+public class IngestStatuses
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  protected final Map<OutputKey,DocumentIngestStatus> statuses = new HashMap<OutputKey,DocumentIngestStatus>();
+  
+  public IngestStatuses()
+  {
+  }
+  
+  /** Add a status record.
+  *@param documentClass is the document class.
+  *@param documentIDHash is the document id's hash value.
+  *@param outputConnectionName is the output connection name.
+  *@param status is the status record.
+  */
+  public void addStatus(String documentClass, String documentIDHash, String outputConnectionName, DocumentIngestStatus status)
+  {
+    statuses.put(new OutputKey(documentClass,documentIDHash,outputConnectionName),status);
+  }
+  
+  /** Retrieve a status record.
+  *@param documentClass is the document class.
+  *@param documentIDHash is the document id's hash value.
+  *@param outputConnectionName is the output connection name.
+  *@return the status record, if record.
+  */
+  public DocumentIngestStatus getStatus(String documentClass, String documentIDHash, String outputConnectionName)
+  {
+    return statuses.get(new OutputKey(documentClass,documentIDHash,outputConnectionName));
+  }
+
+  protected static class OutputKey
+  {
+    protected final String documentClass;
+    protected final String documentIDHash;
+    protected final String outputConnectionName;
+    
+    /** Constructor */
+    public OutputKey(String documentClass, String documentIDHash, String outputConnectionName)
+    {
+      // Identifying information
+      this.documentClass = documentClass;
+      this.documentIDHash = documentIDHash;
+      this.outputConnectionName = outputConnectionName;
+    }
+
+    /** Get the document class */
+    public String getDocumentClass()
+    {
+      return documentClass;
+    }
+    
+    /** Get the document ID hash */
+    public String getDocumentIDHash()
+    {
+      return documentIDHash;
+    }
+    
+    /** Get the output connection name */
+    public String getOutputConnectionName()
+    {
+      return outputConnectionName;
+    }
+    
+    public int hashCode()
+    {
+      return documentClass.hashCode() + documentIDHash.hashCode() + outputConnectionName.hashCode();
+    }
+    
+    public boolean equals(Object o)
+    {
+      if (!(o instanceof OutputKey))
+        return false;
+      OutputKey dis = (OutputKey)o;
+      return dis.documentClass.equals(documentClass) &&
+        dis.documentIDHash.equals(documentIDHash) &&
+        dis.outputConnectionName.equals(outputConnectionName);
+    }
+  }
+}

Propchange: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java?rev=1608195&r1=1608194&r2=1608195&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java Sun Jul  6 12:38:33 2014
@@ -248,7 +248,7 @@ public class StufferThread extends Threa
 
           }
 
-          Map<OutputKey,DocumentIngestStatus> statuses = new HashMap<OutputKey,DocumentIngestStatus>();
+          IngestStatuses statuses = new IngestStatuses();
           ingester.getPipelineDocumentIngestDataMultiple(statuses,pipelineSpecifications,documentClasses,documentIDHashes);
           // Break apart the result.
           for (int i = 0; i < descs.length; i++)
@@ -257,8 +257,7 @@ public class StufferThread extends Threa
             for (int j = 0; j < pipelineSpecifications[i].getOutputCount(); j++)
             {
               String outputName = pipelineSpecifications[i].getStageConnectionName(pipelineSpecifications[i].getOutputStage(j));
-              OutputKey key = new OutputKey(documentClasses[i],documentIDHashes[i],outputName);
-              DocumentIngestStatus status = statuses.get(key);
+              DocumentIngestStatus status = statuses.getStatus(documentClasses[i],documentIDHashes[i],outputName);
               if (status != null)
                 versions[i].put(outputName,status);
             }