You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/07/06 14:38:33 UTC
svn commit: r1608195 - in /manifoldcf/trunk/framework:
agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/
agents/src/main/java/org/apache/manifoldcf/agents/interfaces/
pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/
Author: kwright
Date: Sun Jul 6 12:38:33 2014
New Revision: 1608195
URL: http://svn.apache.org/r1608195
Log:
Revamp IncrementalIngester API DocumentIngestStatus methods
Added:
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java (with props)
Removed:
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/OutputKey.java
Modified:
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1608195&r1=1608194&r2=1608195&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java Sun Jul 6 12:38:33 2014
@@ -1377,7 +1377,7 @@ public class IncrementalIngester extends
*/
@Override
public void getPipelineDocumentIngestDataMultiple(
- Map<OutputKey,DocumentIngestStatus> rval,
+ IngestStatuses rval,
IPipelineSpecificationBasic[] pipelineSpecificationBasics,
String[] identifierClasses, String[] identifierHashes)
throws ManifoldCFException
@@ -1422,7 +1422,7 @@ public class IncrementalIngester extends
*/
@Override
public void getPipelineDocumentIngestDataMultiple(
- Map<OutputKey,DocumentIngestStatus> rval,
+ IngestStatuses rval,
IPipelineSpecificationBasic pipelineSpecificationBasic,
String[] identifierClasses, String[] identifierHashes)
throws ManifoldCFException
@@ -1480,7 +1480,7 @@ public class IncrementalIngester extends
*@param clause is the in clause for the query.
*@param list is the parameter list for the query.
*/
- protected void getPipelineDocumentIngestDataChunk(Map<OutputKey,DocumentIngestStatus> rval, Map<String,Integer> map, String[] outputConnectionNames, List<String> list,
+ protected void getPipelineDocumentIngestDataChunk(IngestStatuses rval, Map<String,Integer> map, String[] outputConnectionNames, List<String> list,
String[] identifierClasses, String[] identifierHashes)
throws ManifoldCFException
{
@@ -1519,7 +1519,7 @@ public class IncrementalIngester extends
if (authorityName == null)
authorityName = "";
int indexValue = position.intValue();
- rval.put(new OutputKey(identifierClasses[indexValue],identifierHashes[indexValue],outputConnectionName),
+ rval.addStatus(identifierClasses[indexValue],identifierHashes[indexValue],outputConnectionName,
new DocumentIngestStatus(lastVersion,lastTransformationVersion,lastOutputVersion,paramVersion,authorityName));
}
}
@@ -1533,7 +1533,7 @@ public class IncrementalIngester extends
*/
@Override
public void getPipelineDocumentIngestData(
- Map<OutputKey,DocumentIngestStatus> rval,
+ IngestStatuses rval,
IPipelineSpecificationBasic pipelineSpecificationBasic,
String identifierClass, String identifierHash)
throws ManifoldCFException
@@ -1949,54 +1949,6 @@ public class IncrementalIngester extends
new MultiClause(outputConnNameField,outputConnectionNames)});
}
- /** Get a chunk of document ingest data records.
- *@param rval is the document ingest status array where the data should be put.
- *@param map is the map from id to index.
- *@param clause is the in clause for the query.
- *@param list is the parameter list for the query.
- */
- protected void getDocumentIngestDataChunk(DocumentIngestStatus[] rval, Map<String,Integer> map, String outputConnectionName, List<String> list)
- throws ManifoldCFException
- {
- ArrayList newList = new ArrayList();
- String query = buildConjunctionClause(newList,new ClauseDescription[]{
- new MultiClause(docKeyField,list),
- new UnitaryClause(outputConnNameField,outputConnectionName)});
-
- // Get the primary records associated with this hash value
- IResultSet set = performQuery("SELECT "+idField+","+docKeyField+","+lastVersionField+","+lastOutputVersionField+","+authorityNameField+","+forcedParamsField+
- " FROM "+getTableName()+" WHERE "+query,newList,null,null);
-
- // Now, go through the original request once more, this time building the result
- for (int i = 0; i < set.getRowCount(); i++)
- {
- IResultRow row = set.getRow(i);
- String docHash = row.getValue(docKeyField).toString();
- Integer position = map.get(docHash);
- if (position != null)
- {
- Long id = (Long)row.getValue(idField);
- String lastVersion = (String)row.getValue(lastVersionField);
- if (lastVersion == null)
- lastVersion = "";
- String lastTransformationVersion = (String)row.getValue(lastTransformationVersionField);
- if (lastTransformationVersion == null)
- lastTransformationVersion = "";
- String lastOutputVersion = (String)row.getValue(lastOutputVersionField);
- if (lastOutputVersion == null)
- lastOutputVersion = "";
- String paramVersion = (String)row.getValue(forcedParamsField);
- if (paramVersion == null)
- paramVersion = "";
- String authorityName = (String)row.getValue(authorityNameField);
- if (authorityName == null)
- authorityName = "";
- int indexValue = position.intValue();
- rval[indexValue] = new DocumentIngestStatus(
- lastVersion,lastTransformationVersion,lastOutputVersion,paramVersion,authorityName);
- }
- }
- }
// Protected methods
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java?rev=1608195&r1=1608194&r2=1608195&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java Sun Jul 6 12:38:33 2014
@@ -264,7 +264,7 @@ public interface IIncrementalIngester
*@param identifierHashes is the array of document identifier hashes to look up.
*/
public void getPipelineDocumentIngestDataMultiple(
- Map<OutputKey,DocumentIngestStatus> rval,
+ IngestStatuses rval,
IPipelineSpecificationBasic[] pipelineSpecificationBasics,
String[] identifierClasses, String[] identifierHashes)
throws ManifoldCFException;
@@ -276,7 +276,7 @@ public interface IIncrementalIngester
*@param identifierHashes is the array of document identifier hashes to look up.
*/
public void getPipelineDocumentIngestDataMultiple(
- Map<OutputKey,DocumentIngestStatus> rval,
+ IngestStatuses rval,
IPipelineSpecificationBasic pipelineSpecificationBasic,
String[] identifierClasses, String[] identifierHashes)
throws ManifoldCFException;
@@ -288,7 +288,7 @@ public interface IIncrementalIngester
*@param identifierHash is the hash of the id of the document.
*/
public void getPipelineDocumentIngestData(
- Map<OutputKey,DocumentIngestStatus> rval,
+ IngestStatuses rval,
IPipelineSpecificationBasic pipelineSpecificationBasic,
String identifierClass, String identifierHash)
throws ManifoldCFException;
Added: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java?rev=1608195&view=auto
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java (added)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java Sun Jul 6 12:38:33 2014
@@ -0,0 +1,107 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.interfaces;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import java.util.*;
+
+/** This object is part of the IIncrementalIngester API.
+* It is an accumulator and organizer of DocumentIngestStatus records
+*/
+public class IngestStatuses
+{
+ public static final String _rcsid = "@(#)$Id$";
+
+ protected final Map<OutputKey,DocumentIngestStatus> statuses = new HashMap<OutputKey,DocumentIngestStatus>();
+
+ public IngestStatuses()
+ {
+ }
+
+ /** Add a status record.
+ *@param documentClass is the document class.
+ *@param documentIDHash is the document id's hash value.
+ *@param outputConnectionName is the output connection name.
+ *@param status is the status record.
+ */
+ public void addStatus(String documentClass, String documentIDHash, String outputConnectionName, DocumentIngestStatus status)
+ {
+ statuses.put(new OutputKey(documentClass,documentIDHash,outputConnectionName),status);
+ }
+
+ /** Retrieve a status record.
+ *@param documentClass is the document class.
+ *@param documentIDHash is the document id's hash value.
+ *@param outputConnectionName is the output connection name.
+ *@return the status record, if record.
+ */
+ public DocumentIngestStatus getStatus(String documentClass, String documentIDHash, String outputConnectionName)
+ {
+ return statuses.get(new OutputKey(documentClass,documentIDHash,outputConnectionName));
+ }
+
+ protected static class OutputKey
+ {
+ protected final String documentClass;
+ protected final String documentIDHash;
+ protected final String outputConnectionName;
+
+ /** Constructor */
+ public OutputKey(String documentClass, String documentIDHash, String outputConnectionName)
+ {
+ // Identifying information
+ this.documentClass = documentClass;
+ this.documentIDHash = documentIDHash;
+ this.outputConnectionName = outputConnectionName;
+ }
+
+ /** Get the document class */
+ public String getDocumentClass()
+ {
+ return documentClass;
+ }
+
+ /** Get the document ID hash */
+ public String getDocumentIDHash()
+ {
+ return documentIDHash;
+ }
+
+ /** Get the output connection name */
+ public String getOutputConnectionName()
+ {
+ return outputConnectionName;
+ }
+
+ public int hashCode()
+ {
+ return documentClass.hashCode() + documentIDHash.hashCode() + outputConnectionName.hashCode();
+ }
+
+ public boolean equals(Object o)
+ {
+ if (!(o instanceof OutputKey))
+ return false;
+ OutputKey dis = (OutputKey)o;
+ return dis.documentClass.equals(documentClass) &&
+ dis.documentIDHash.equals(documentIDHash) &&
+ dis.outputConnectionName.equals(outputConnectionName);
+ }
+ }
+}
Propchange: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java
------------------------------------------------------------------------------
svn:keywords = Id
Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java?rev=1608195&r1=1608194&r2=1608195&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java Sun Jul 6 12:38:33 2014
@@ -248,7 +248,7 @@ public class StufferThread extends Threa
}
- Map<OutputKey,DocumentIngestStatus> statuses = new HashMap<OutputKey,DocumentIngestStatus>();
+ IngestStatuses statuses = new IngestStatuses();
ingester.getPipelineDocumentIngestDataMultiple(statuses,pipelineSpecifications,documentClasses,documentIDHashes);
// Break apart the result.
for (int i = 0; i < descs.length; i++)
@@ -257,8 +257,7 @@ public class StufferThread extends Threa
for (int j = 0; j < pipelineSpecifications[i].getOutputCount(); j++)
{
String outputName = pipelineSpecifications[i].getStageConnectionName(pipelineSpecifications[i].getOutputStage(j));
- OutputKey key = new OutputKey(documentClasses[i],documentIDHashes[i],outputName);
- DocumentIngestStatus status = statuses.get(key);
+ DocumentIngestStatus status = statuses.getStatus(documentClasses[i],documentIDHashes[i],outputName);
if (status != null)
versions[i].put(outputName,status);
}