You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/07/05 20:53:02 UTC
svn commit: r1608109 [1/2] - in /manifoldcf/trunk:
connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/
connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation...
Author: kwright
Date: Sat Jul 5 18:53:01 2014
New Revision: 1608109
URL: http://svn.apache.org/r1608109
Log:
Introduce DocumentVersions and VersionContext. WARNING: API change!!
Added:
manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/VersionContext.java (with props)
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/DocumentVersions.java (with props)
Modified:
manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
manifoldcf/trunk/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java
manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java
manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java
manifoldcf/trunk/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java
manifoldcf/trunk/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/agents/output/hdfs/HDFSOutputConnector.java
manifoldcf/trunk/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java
manifoldcf/trunk/connectors/nulltransformation/connector/src/main/java/org/apache/manifoldcf/agents/transformation/nullconnector/NullConnector.java
manifoldcf/trunk/connectors/opensearchserver/connector/src/main/java/org/apache/manifoldcf/agents/output/opensearchserver/OpenSearchServerConnector.java
manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
manifoldcf/trunk/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineSpecification.java
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java
manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IRepositoryConnector.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecification.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SeedingThread.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Modified: manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java (original)
+++ manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java Sat Jul 5 18:53:01 2014
@@ -52,6 +52,7 @@ import org.apache.http.util.EntityUtils;
import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
import org.apache.manifoldcf.agents.interfaces.IOutputNotifyActivity;
import org.apache.manifoldcf.agents.interfaces.IOutputRemoveActivity;
+import org.apache.manifoldcf.agents.interfaces.IOutputCheckActivity;
import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
import org.apache.manifoldcf.agents.output.BaseOutputConnector;
@@ -68,6 +69,7 @@ import org.apache.manifoldcf.core.interf
import org.apache.manifoldcf.core.interfaces.SpecificationNode;
import org.apache.manifoldcf.core.interfaces.BinaryInput;
import org.apache.manifoldcf.core.interfaces.TempFileInput;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
import org.apache.manifoldcf.agents.system.ManifoldCF;
import org.apache.manifoldcf.agents.system.Logging;
@@ -316,7 +318,7 @@ public class AmazonCloudSearchConnector
*@return true if the mime type is indexable by this connector.
*/
@Override
- public boolean checkMimeTypeIndexable(String outputDescription, String mimeType)
+ public boolean checkMimeTypeIndexable(VersionContext outputDescription, String mimeType, IOutputCheckActivity activities)
throws ManifoldCFException, ServiceInterruption
{
return acceptableMimeTypes.contains(mimeType.toLowerCase(Locale.ROOT));
@@ -337,7 +339,7 @@ public class AmazonCloudSearchConnector
*@return the document status (accepted or permanently rejected).
*/
@Override
- public int addOrReplaceDocumentWithException(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+ public int addOrReplaceDocumentWithException(String documentURI, VersionContext outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
throws ManifoldCFException, ServiceInterruption, IOException
{
// Establish a session
Modified: manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java (original)
+++ manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java Sat Jul 5 18:53:01 2014
@@ -55,11 +55,11 @@ public class DocumentFilter extends org.
* the document will not need to be sent again to the output data store.
*/
@Override
- public String getPipelineDescription(Specification os)
+ public VersionContext getPipelineDescription(Specification os)
throws ManifoldCFException, ServiceInterruption
{
SpecPacker sp = new SpecPacker(os);
- return sp.toPackedString();
+ return new VersionContext(sp.toPackedString(),params,os);
}
/** Detect if a mime type is indexable or not. This method is used by participating repository connectors to pre-filter the number of
@@ -69,10 +69,10 @@ public class DocumentFilter extends org.
*@return true if the mime type is indexable by this connector.
*/
@Override
- public boolean checkMimeTypeIndexable(String outputDescription, String mimeType, IOutputCheckActivity activities)
+ public boolean checkMimeTypeIndexable(VersionContext outputDescription, String mimeType, IOutputCheckActivity activities)
throws ManifoldCFException, ServiceInterruption
{
- SpecPacker sp = new SpecPacker(outputDescription);
+ SpecPacker sp = new SpecPacker(outputDescription.getVersionString());
if (sp.checkMimeType(mimeType))
return super.checkMimeTypeIndexable(outputDescription, mimeType, activities);
else
@@ -80,9 +80,9 @@ public class DocumentFilter extends org.
}
@Override
- public boolean checkLengthIndexable(String outputDescription, long length, IOutputCheckActivity activities)
+ public boolean checkLengthIndexable(VersionContext outputDescription, long length, IOutputCheckActivity activities)
throws ManifoldCFException, ServiceInterruption {
- SpecPacker sp = new SpecPacker(outputDescription);
+ SpecPacker sp = new SpecPacker(outputDescription.getVersionString());
if (sp.checkLengthIndexable(length))
return super.checkLengthIndexable(outputDescription, length, activities);
else
@@ -90,9 +90,9 @@ public class DocumentFilter extends org.
}
@Override
- public boolean checkURLIndexable(String outputDescription, String url, IOutputCheckActivity activities)
+ public boolean checkURLIndexable(VersionContext outputDescription, String url, IOutputCheckActivity activities)
throws ManifoldCFException, ServiceInterruption {
- SpecPacker sp = new SpecPacker(outputDescription);
+ SpecPacker sp = new SpecPacker(outputDescription.getVersionString());
if (sp.checkURLIndexable(url))
return super.checkURLIndexable(outputDescription, url, activities);
else
@@ -114,7 +114,7 @@ public class DocumentFilter extends org.
*@return the document status (accepted or permanently rejected).
*/
@Override
- public int addOrReplaceDocumentWithException(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+ public int addOrReplaceDocumentWithException(String documentURI, VersionContext outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
throws ManifoldCFException, ServiceInterruption, IOException
{
return activities.sendDocument(documentURI, document, authorityNameString);
Modified: manifoldcf/trunk/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java (original)
+++ manifoldcf/trunk/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java Sat Jul 5 18:53:01 2014
@@ -46,6 +46,7 @@ import org.apache.commons.io.FilenameUti
import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
import org.apache.manifoldcf.agents.interfaces.IOutputNotifyActivity;
import org.apache.manifoldcf.agents.interfaces.IOutputRemoveActivity;
+import org.apache.manifoldcf.agents.interfaces.IOutputCheckActivity;
import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
import org.apache.manifoldcf.agents.output.BaseOutputConnector;
@@ -59,6 +60,7 @@ import org.apache.manifoldcf.core.interf
import org.apache.manifoldcf.core.interfaces.IThreadContext;
import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
import org.json.JSONException;
import org.json.JSONObject;
@@ -412,26 +414,26 @@ public class ElasticSearchConnector exte
}
@Override
- public String getPipelineDescription(Specification os)
+ public VersionContext getPipelineDescription(Specification os)
throws ManifoldCFException
{
ElasticSearchSpecs specs = new ElasticSearchSpecs(getSpecNode(os));
- return specs.toJson().toString();
+ return new VersionContext(specs.toJson().toString(),params,os);
}
@Override
- public boolean checkLengthIndexable(String outputDescription, long length)
+ public boolean checkLengthIndexable(VersionContext outputDescription, long length, IOutputCheckActivity activities)
throws ManifoldCFException, ServiceInterruption
{
- ElasticSearchSpecs specs = getSpecsCache(outputDescription);
+ ElasticSearchSpecs specs = getSpecsCache(outputDescription.getVersionString());
long maxFileSize = specs.getMaxFileSize();
if (length > maxFileSize)
return false;
- return super.checkLengthIndexable(outputDescription, length);
+ return super.checkLengthIndexable(outputDescription, length, activities);
}
@Override
- public boolean checkDocumentIndexable(String outputDescription, File localFile)
+ public boolean checkDocumentIndexable(VersionContext outputDescription, File localFile, IOutputCheckActivity activities)
throws ManifoldCFException, ServiceInterruption
{
// No filtering here; we don't look inside the file and don't know its extension. That's done via the url
@@ -446,10 +448,10 @@ public class ElasticSearchConnector exte
*@return true if the file is indexable.
*/
@Override
- public boolean checkURLIndexable(String outputDescription, String url)
+ public boolean checkURLIndexable(VersionContext outputDescription, String url, IOutputCheckActivity activities)
throws ManifoldCFException, ServiceInterruption
{
- ElasticSearchSpecs specs = getSpecsCache(outputDescription);
+ ElasticSearchSpecs specs = getSpecsCache(outputDescription.getVersionString());
return specs.checkExtension(FilenameUtils.getExtension(url));
}
Modified: manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java (original)
+++ manifoldcf/trunk/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java Sat Jul 5 18:53:01 2014
@@ -51,6 +51,7 @@ import org.apache.manifoldcf.core.interf
import org.apache.manifoldcf.core.interfaces.IThreadContext;
import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
import org.json.JSONException;
public class FileOutputConnector extends BaseOutputConnector {
@@ -144,9 +145,9 @@ public class FileOutputConnector extends
* the document will not need to be sent again to the output data store.
*/
@Override
- public String getPipelineDescription(Specification spec) throws ManifoldCFException, ServiceInterruption {
+ public VersionContext getPipelineDescription(Specification spec) throws ManifoldCFException, ServiceInterruption {
FileOutputSpecs specs = new FileOutputSpecs(getSpecNode(spec));
- return specs.toJson().toString();
+ return new VersionContext(specs.toJson().toString(),params,spec);
}
/** Add (or replace) a document in the output data store using the connector.
@@ -164,7 +165,7 @@ public class FileOutputConnector extends
*@return the document status (accepted or permanently rejected).
*/
@Override
- public int addOrReplaceDocument(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities) throws ManifoldCFException, ServiceInterruption {
+ public int addOrReplaceDocumentWithException(String documentURI, VersionContext outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities) throws ManifoldCFException, ServiceInterruption, IOException {
// Establish a session
getSession();
@@ -173,7 +174,7 @@ public class FileOutputConnector extends
FileOutputSpecs specs = null;
StringBuffer path = new StringBuffer();
try {
- specs = new FileOutputSpecs(outputDescription);
+ specs = new FileOutputSpecs(outputDescription.getVersionString());
/*
* make file path
Modified: manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java (original)
+++ manifoldcf/trunk/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java Sat Jul 5 18:53:01 2014
@@ -60,11 +60,12 @@ public class ForcedMetadataConnector ext
*@return a string, of unlimited length, which uniquely describes configuration and specification in such a way that
* if two such strings are equal, nothing that affects how or whether the document is indexed will be different.
*/
- public String getPipelineDescription(Specification spec)
+ @Override
+ public VersionContext getPipelineDescription(Specification spec)
throws ManifoldCFException, ServiceInterruption
{
SpecPacker sp = new SpecPacker(spec);
- return sp.toPackedString();
+ return new VersionContext(sp.toPackedString(),params,spec);
}
/** Add (or replace) a document in the output data store using the connector.
@@ -83,11 +84,12 @@ public class ForcedMetadataConnector ext
*@return the document status (accepted or permanently rejected).
*@throws IOException only if there's a stream error reading the document data.
*/
- public int addOrReplaceDocumentWithException(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+ @Override
+ public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
throws ManifoldCFException, ServiceInterruption, IOException
{
// Unpack the forced metadata
- SpecPacker sp = new SpecPacker(pipelineDescription);
+ SpecPacker sp = new SpecPacker(pipelineDescription.getVersionString());
// We have to create a copy of the Repository Document, since we might be rearranging things
RepositoryDocument docCopy = document.duplicate();
docCopy.clearFields();
Modified: manifoldcf/trunk/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java (original)
+++ manifoldcf/trunk/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java Sat Jul 5 18:53:01 2014
@@ -238,7 +238,7 @@ public class GTSConnector extends org.ap
* the document will not need to be sent again to the output data store.
*/
@Override
- public String getPipelineDescription(Specification spec)
+ public VersionContext getPipelineDescription(Specification spec)
throws ManifoldCFException, ServiceInterruption
{
// The information we want in this string is:
@@ -282,7 +282,7 @@ public class GTSConnector extends org.ap
// From here on down, unpacking is unnecessary.
sb.append(ingestURI);
- return sb.toString();
+ return new VersionContext(sb.toString(),params,spec);
}
/** Add (or replace) a document in the output data store using the connector.
Modified: manifoldcf/trunk/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/agents/output/hdfs/HDFSOutputConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/agents/output/hdfs/HDFSOutputConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/agents/output/hdfs/HDFSOutputConnector.java (original)
+++ manifoldcf/trunk/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/agents/output/hdfs/HDFSOutputConnector.java Sat Jul 5 18:53:01 2014
@@ -49,6 +49,7 @@ import org.apache.manifoldcf.core.interf
import org.apache.manifoldcf.core.interfaces.IThreadContext;
import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
import org.json.JSONException;
public class HDFSOutputConnector extends BaseOutputConnector {
@@ -249,9 +250,9 @@ public class HDFSOutputConnector extends
* the document will not need to be sent again to the output data store.
*/
@Override
- public String getPipelineDescription(Specification spec) throws ManifoldCFException, ServiceInterruption {
+ public VersionContext getPipelineDescription(Specification spec) throws ManifoldCFException, ServiceInterruption {
HDFSOutputSpecs specs = new HDFSOutputSpecs(getSpecNode(spec));
- return specs.toJson().toString();
+ return new VersionContext(specs.toJson().toString(),params,spec);
}
/** Add (or replace) a document in the output data store using the connector.
Modified: manifoldcf/trunk/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java (original)
+++ manifoldcf/trunk/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java Sat Jul 5 18:53:01 2014
@@ -22,6 +22,7 @@ import org.apache.manifoldcf.core.interf
import org.apache.manifoldcf.agents.interfaces.*;
import java.util.*;
+import java.io.*;
/** This is a null output connector. It eats all output and simply logs the events.
*/
@@ -109,10 +110,10 @@ public class NullConnector extends org.a
* the document will not need to be sent again to the output data store.
*/
@Override
- public String getPipelineDescription(Specification spec)
+ public VersionContext getPipelineDescription(Specification spec)
throws ManifoldCFException, ServiceInterruption
{
- return "";
+ return new VersionContext("",params,spec);
}
/** Add (or replace) a document in the output data store using the connector.
@@ -130,8 +131,8 @@ public class NullConnector extends org.a
*@return the document status (accepted or permanently rejected).
*/
@Override
- public int addOrReplaceDocument(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
- throws ManifoldCFException, ServiceInterruption
+ public int addOrReplaceDocumentWithException(String documentURI, VersionContext outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+ throws ManifoldCFException, ServiceInterruption, IOException
{
// Establish a session
getSession();
Modified: manifoldcf/trunk/connectors/nulltransformation/connector/src/main/java/org/apache/manifoldcf/agents/transformation/nullconnector/NullConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/nulltransformation/connector/src/main/java/org/apache/manifoldcf/agents/transformation/nullconnector/NullConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/nulltransformation/connector/src/main/java/org/apache/manifoldcf/agents/transformation/nullconnector/NullConnector.java (original)
+++ manifoldcf/trunk/connectors/nulltransformation/connector/src/main/java/org/apache/manifoldcf/agents/transformation/nullconnector/NullConnector.java Sat Jul 5 18:53:01 2014
@@ -62,7 +62,7 @@ public class NullConnector extends org.a
*@throws IOException only if there's a stream error reading the document data.
*/
@Override
- public int addOrReplaceDocumentWithException(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+ public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
throws ManifoldCFException, ServiceInterruption, IOException
{
long startTime = System.currentTimeMillis();
Modified: manifoldcf/trunk/connectors/opensearchserver/connector/src/main/java/org/apache/manifoldcf/agents/output/opensearchserver/OpenSearchServerConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/opensearchserver/connector/src/main/java/org/apache/manifoldcf/agents/output/opensearchserver/OpenSearchServerConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/opensearchserver/connector/src/main/java/org/apache/manifoldcf/agents/output/opensearchserver/OpenSearchServerConnector.java (original)
+++ manifoldcf/trunk/connectors/opensearchserver/connector/src/main/java/org/apache/manifoldcf/agents/output/opensearchserver/OpenSearchServerConnector.java Sat Jul 5 18:53:01 2014
@@ -59,6 +59,7 @@ import org.apache.manifoldcf.core.interf
import org.apache.manifoldcf.core.interfaces.IThreadContext;
import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
import org.apache.manifoldcf.core.system.Logging;
import org.json.JSONException;
import org.json.JSONObject;
@@ -387,10 +388,10 @@ public class OpenSearchServerConnector e
}
@Override
- public String getPipelineDescription(Specification os)
+ public VersionContext getPipelineDescription(Specification os)
throws ManifoldCFException {
OpenSearchServerSpecs specs = new OpenSearchServerSpecs(getSpecNode(os));
- return specs.toJson().toString();
+ return new VersionContext(specs.toJson().toString(),params,os);
}
@Override
Modified: manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java (original)
+++ manifoldcf/trunk/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java Sat Jul 5 18:53:01 2014
@@ -45,6 +45,7 @@ import org.apache.manifoldcf.core.interf
import org.apache.manifoldcf.core.interfaces.KeystoreManagerFactory;
import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
/** This is the output connector for SOLR. Currently, no frills.
@@ -491,12 +492,12 @@ public class SolrConnector extends org.a
* the document will not need to be sent again to the output data store.
*/
@Override
- public String getPipelineDescription(Specification spec)
+ public VersionContext getPipelineDescription(Specification spec)
throws ManifoldCFException, ServiceInterruption
{
getSession();
SpecPacker sp = new SpecPacker(spec);
- return sp.toPackedString();
+ return new VersionContext(sp.toPackedString(),params,spec);
}
private final static Set<String> acceptableMimeTypes = new HashSet<String>();
@@ -514,6 +515,7 @@ public class SolrConnector extends org.a
*@param mimeType is the mime type of the document.
*@return true if the mime type is indexable by this connector.
*/
+ @Override
public boolean checkMimeTypeIndexable(String outputDescription, String mimeType)
throws ManifoldCFException, ServiceInterruption
{
@@ -535,6 +537,7 @@ public class SolrConnector extends org.a
*@param length is the length of the document.
*@return true if the file is indexable.
*/
+ @Override
public boolean checkLengthIndexable(String outputDescription, long length)
throws ManifoldCFException, ServiceInterruption
{
Modified: manifoldcf/trunk/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java (original)
+++ manifoldcf/trunk/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformer/tika/TikaExtractor.java Sat Jul 5 18:53:01 2014
@@ -74,11 +74,11 @@ public class TikaExtractor extends org.a
* the document will not need to be sent again to the output data store.
*/
@Override
- public String getPipelineDescription(Specification os)
+ public VersionContext getPipelineDescription(Specification os)
throws ManifoldCFException, ServiceInterruption
{
SpecPacker sp = new SpecPacker(os);
- return sp.toPackedString();
+ return new VersionContext(sp.toPackedString(),params,os);
}
// We intercept checks pertaining to the document format and send modified checks further down
@@ -90,7 +90,7 @@ public class TikaExtractor extends org.a
*@param checkActivity is an object including the activities that can be performed by this method.
*@return true if the mime type can be accepted by this connector.
*/
- public boolean checkMimeTypeIndexable(String pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
+ public boolean checkMimeTypeIndexable(VersionContext pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
throws ManifoldCFException, ServiceInterruption
{
// We should see what Tika will transform
@@ -108,7 +108,7 @@ public class TikaExtractor extends org.a
*@return true if the file is acceptable, false if not.
*/
@Override
- public boolean checkDocumentIndexable(String pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
+ public boolean checkDocumentIndexable(VersionContext pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
throws ManifoldCFException, ServiceInterruption
{
// Document contents are not germane anymore, unless it looks like Tika won't accept them.
@@ -124,7 +124,7 @@ public class TikaExtractor extends org.a
*@return true if the file is acceptable, false if not.
*/
@Override
- public boolean checkLengthIndexable(String pipelineDescription, long length, IOutputCheckActivity checkActivity)
+ public boolean checkLengthIndexable(VersionContext pipelineDescription, long length, IOutputCheckActivity checkActivity)
throws ManifoldCFException, ServiceInterruption
{
// Always true
@@ -148,14 +148,14 @@ public class TikaExtractor extends org.a
*@throws IOException only if there's a stream error reading the document data.
*/
@Override
- public int addOrReplaceDocumentWithException(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+ public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
throws ManifoldCFException, ServiceInterruption, IOException
{
// First, make sure downstream pipeline will now accept text/plain;charset=utf-8
if (!activities.checkMimeTypeIndexable("text/plain;charset=utf-8"))
return DOCUMENTSTATUS_REJECTED;
- SpecPacker sp = new SpecPacker(pipelineDescription);
+ SpecPacker sp = new SpecPacker(pipelineDescription.getVersionString());
// Tika's API reads from an input stream and writes to an output Writer.
// Since a RepositoryDocument includes readers and inputstreams exclusively, AND all downstream
Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1608109&r1=1608108&r2=1608109&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java Sat Jul 5 18:53:01 2014
@@ -232,23 +232,6 @@ public class IncrementalIngester extends
}
/** Check if a mime type is indexable.
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param outputDescription is the output description string.
- *@param mimeType is the mime type to check.
- *@return true if the mimeType is indexable.
- */
- @Override
- @Deprecated
- public boolean checkMimeTypeIndexable(String outputConnectionName, String outputDescription, String mimeType)
- throws ManifoldCFException, ServiceInterruption
- {
- return checkMimeTypeIndexable(
- new RuntPipelineSpecification(outputConnectionName,outputDescription),
- mimeType,null);
- }
-
-
- /** Check if a mime type is indexable.
*@param pipelineSpecification is the pipeline specification.
*@param mimeType is the mime type to check.
*@param activity are the activities available to this method.
@@ -277,22 +260,6 @@ public class IncrementalIngester extends
}
/** Check if a file is indexable.
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param outputDescription is the output description string.
- *@param localFile is the local file to check.
- *@return true if the local file is indexable.
- */
- @Override
- @Deprecated
- public boolean checkDocumentIndexable(String outputConnectionName, String outputDescription, File localFile)
- throws ManifoldCFException, ServiceInterruption
- {
- return checkDocumentIndexable(
- new RuntPipelineSpecification(outputConnectionName,outputDescription),
- localFile,null);
- }
-
- /** Check if a file is indexable.
*@param pipelineSpecification is the pipeline specification.
*@param localFile is the local file to check.
*@param activity are the activities available to this method.
@@ -322,23 +289,6 @@ public class IncrementalIngester extends
/** Pre-determine whether a document's length is indexable by this connector. This method is used by participating repository connectors
* to help filter out documents that are too long to be indexable.
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param outputDescription is the output description string.
- *@param length is the length of the document.
- *@return true if the file is indexable.
- */
- @Override
- @Deprecated
- public boolean checkLengthIndexable(String outputConnectionName, String outputDescription, long length)
- throws ManifoldCFException, ServiceInterruption
- {
- return checkLengthIndexable(
- new RuntPipelineSpecification(outputConnectionName,outputDescription),
- length,null);
- }
-
- /** Pre-determine whether a document's length is indexable by this connector. This method is used by participating repository connectors
- * to help filter out documents that are too long to be indexable.
*@param pipelineSpecification is the pipeline specification.
*@param length is the length of the document.
*@param activity are the activities available to this method.
@@ -368,23 +318,6 @@ public class IncrementalIngester extends
/** Pre-determine whether a document's URL is indexable by this connector. This method is used by participating repository connectors
* to help filter out documents that not indexable.
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param outputDescription is the output description string.
- *@param url is the url of the document.
- *@return true if the file is indexable.
- */
- @Override
- @Deprecated
- public boolean checkURLIndexable(String outputConnectionName, String outputDescription, String url)
- throws ManifoldCFException, ServiceInterruption
- {
- return checkURLIndexable(
- new RuntPipelineSpecification(outputConnectionName,outputDescription),
- url,null);
- }
-
- /** Pre-determine whether a document's URL is indexable by this connector. This method is used by participating repository connectors
- * to help filter out documents that not indexable.
*@param pipelineSpecification is the pipeline specification.
*@param url is the url of the document.
*@param activity are the activities available to this method.
@@ -517,21 +450,8 @@ public class IncrementalIngester extends
*@param spec is the output specification.
*@return the description string.
*/
- @Deprecated
@Override
- public String getOutputDescription(String outputConnectionName, OutputSpecification spec)
- throws ManifoldCFException, ServiceInterruption
- {
- return getOutputDescription(outputConnectionName,(Specification)spec);
- }
-
- /** Get an output version string for a document.
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param spec is the output specification.
- *@return the description string.
- */
- @Override
- public String getOutputDescription(String outputConnectionName, Specification spec)
+ public VersionContext getOutputDescription(String outputConnectionName, Specification spec)
throws ManifoldCFException, ServiceInterruption
{
IOutputConnection connection = connectionManager.load(outputConnectionName);
@@ -555,7 +475,8 @@ public class IncrementalIngester extends
*@param spec is the transformation specification.
*@return the description string.
*/
- public String getTransformationDescription(String transformationConnectionName, Specification spec)
+ @Override
+ public VersionContext getTransformationDescription(String transformationConnectionName, Specification spec)
throws ManifoldCFException, ServiceInterruption
{
ITransformationConnection connection = transformationConnectionManager.load(transformationConnectionName);
@@ -652,7 +573,7 @@ public class IncrementalIngester extends
if (newStage == -1)
break;
stageNames[stageCount] = basicSpecification.getStageConnectionName(newStage);
- stageDescriptions[stageCount] = pipelineSpecification.getStageDescriptionString(newStage);
+ stageDescriptions[stageCount] = pipelineSpecification.getStageDescriptionString(newStage).getVersionString();
stageCount++;
currentStage = newStage;
}
@@ -690,31 +611,6 @@ public class IncrementalIngester extends
/** Record a document version, but don't ingest it.
* The purpose of this method is to keep track of the frequency at which ingestion "attempts" take place.
* ServiceInterruption is thrown if this action must be rescheduled.
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
- *@param identifierHash is the hashed document identifier.
- *@param documentVersion is the document version.
- *@param recordTime is the time at which the recording took place, in milliseconds since epoch.
- *@param activities is the object used in case a document needs to be removed from the output index as the result of this operation.
- */
- @Override
- @Deprecated
- public void documentRecord(String outputConnectionName,
- String identifierClass, String identifierHash,
- String documentVersion,
- long recordTime, IOutputActivity activities)
- throws ManifoldCFException, ServiceInterruption
- {
- documentRecord(
- new RuntPipelineSpecificationBasic(outputConnectionName),
- identifierClass, identifierHash,
- documentVersion,
- recordTime, activities);
- }
-
- /** Record a document version, but don't ingest it.
- * The purpose of this method is to keep track of the frequency at which ingestion "attempts" take place.
- * ServiceInterruption is thrown if this action must be rescheduled.
*@param pipelineSpecificationBasic is the basic pipeline specification needed.
*@param identifierClass is the name of the space in which the identifier hash should be interpreted.
*@param identifierHash is the hashed document identifier.
@@ -844,185 +740,6 @@ public class IncrementalIngester extends
* method also REMOVES ALL OLD METADATA. When complete, the index will contain only the metadata
* described by the RepositoryDocument object passed to this method.
* ServiceInterruption is thrown if the document ingestion must be rescheduled.
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
- *@param identifierHash is the hashed document identifier.
- *@param documentVersion is the document version.
- *@param outputVersion is the output version string constructed from the output specification by the output connector.
- *@param authorityName is the name of the authority associated with the document, if any.
- *@param data is the document data. The data is closed after ingestion is complete.
- *@param ingestTime is the time at which the ingestion took place, in milliseconds since epoch.
- *@param documentURI is the URI of the document, which will be used as the key of the document in the index.
- *@param activities is an object providing a set of methods that the implementer can use to perform the operation.
- *@return true if the ingest was ok, false if the ingest is illegal (and should not be repeated).
- */
- @Override
- @Deprecated
- public boolean documentIngest(String outputConnectionName,
- String identifierClass, String identifierHash,
- String documentVersion,
- String outputVersion,
- String authorityName,
- RepositoryDocument data,
- long ingestTime, String documentURI,
- IOutputActivity activities)
- throws ManifoldCFException, ServiceInterruption
- {
- return documentIngest(outputConnectionName,
- identifierClass,
- identifierHash,
- documentVersion,
- outputVersion,
- null,
- authorityName,
- data,
- ingestTime,
- documentURI,
- activities);
- }
-
- /** Ingest a document.
- * This ingests the document, and notes it. If this is a repeat ingestion of the document, this
- * method also REMOVES ALL OLD METADATA. When complete, the index will contain only the metadata
- * described by the RepositoryDocument object passed to this method.
- * ServiceInterruption is thrown if the document ingestion must be rescheduled.
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
- *@param identifierHash is the hashed document identifier.
- *@param documentVersion is the document version.
- *@param parameterVersion is the forced parameter version.
- *@param outputVersion is the output version string constructed from the output specification by the output connector.
- *@param authorityName is the name of the authority associated with the document, if any.
- *@param data is the document data. The data is closed after ingestion is complete.
- *@param ingestTime is the time at which the ingestion took place, in milliseconds since epoch.
- *@param documentURI is the URI of the document, which will be used as the key of the document in the index.
- *@param activities is an object providing a set of methods that the implementer can use to perform the operation.
- *@return true if the ingest was ok, false if the ingest is illegal (and should not be repeated).
- */
- @Override
- @Deprecated
- public boolean documentIngest(String outputConnectionName,
- String identifierClass, String identifierHash,
- String documentVersion,
- String outputVersion,
- String parameterVersion,
- String authorityName,
- RepositoryDocument data,
- long ingestTime, String documentURI,
- IOutputActivity activities)
- throws ManifoldCFException, ServiceInterruption
- {
- try
- {
- return documentIngest(
- new RuntPipelineSpecificationWithVersions(outputConnectionName,outputVersion,
- "","","","",""),
- identifierClass, identifierHash,
- documentVersion,
- parameterVersion,
- authorityName,
- data,
- ingestTime, documentURI,
- activities);
- }
- catch (IOException e)
- {
- handleIOException(e,"fetching");
- return false;
- }
- }
-
- // Standard handling for IOExceptions from reading data
- protected final static long interruptionRetryTime = 5L*60L*1000L;
- protected static void handleIOException(IOException e, String context)
- throws ManifoldCFException, ServiceInterruption
- {
- if ((e instanceof InterruptedIOException) && (!(e instanceof java.net.SocketTimeoutException)))
- throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED);
-
- long currentTime = System.currentTimeMillis();
-
- if (e instanceof java.net.ConnectException)
- {
- // Server isn't up at all. Try for a brief time then give up.
- String message = "Server could not be contacted during "+context+": "+e.getMessage();
- Logging.ingest.warn(message,e);
- throw new ServiceInterruption(message,
- e,
- currentTime + interruptionRetryTime,
- -1L,
- 3,
- true);
- }
-
- if (e instanceof java.net.SocketTimeoutException)
- {
- String message2 = "Socket timeout exception during "+context+": "+e.getMessage();
- Logging.ingest.warn(message2,e);
- throw new ServiceInterruption(message2,
- e,
- currentTime + interruptionRetryTime,
- currentTime + 20L * 60000L,
- -1,
- false);
- }
-
- if (e.getClass().getName().equals("java.net.SocketException"))
- {
- // In the past we would have treated this as a straight document rejection, and
- // treated it in the same manner as a 400. The reasoning is that the server can
- // perfectly legally send out a 400 and drop the connection immediately thereafter,
- // this a race condition.
- // However, Solr 4.0 (or the Jetty version that the example runs on) seems
- // to have a bug where it drops the connection when two simultaneous documents come in
- // at the same time. This is the final version of Solr 4.0 so we need to deal with
- // this.
- if (e.getMessage().toLowerCase(Locale.ROOT).indexOf("broken pipe") != -1 ||
- e.getMessage().toLowerCase(Locale.ROOT).indexOf("connection reset") != -1 ||
- e.getMessage().toLowerCase(Locale.ROOT).indexOf("target server failed to respond") != -1)
- {
- // Treat it as a service interruption, but with a limited number of retries.
- // In that way we won't burden the user with a huge retry interval; it should
- // give up fairly quickly, and yet NOT give up if the error was merely transient
- String message = "Server dropped connection during "+context+": "+e.getMessage();
- Logging.ingest.warn(message,e);
- throw new ServiceInterruption(message,
- e,
- currentTime + interruptionRetryTime,
- -1L,
- 3,
- false);
- }
-
- // Other socket exceptions are service interruptions - but if we keep getting them, it means
- // that a socket timeout is probably set too low to accept this particular document. So
- // we retry for a while, then skip the document.
- String message2 = "Socket exception during "+context+": "+e.getMessage();
- Logging.ingest.warn(message2,e);
- throw new ServiceInterruption(message2,
- e,
- currentTime + interruptionRetryTime,
- currentTime + 20L * 60000L,
- -1,
- false);
- }
-
- // Otherwise, no idea what the trouble is, so presume that retries might fix it.
- String message3 = "IO exception during "+context+": "+e.getMessage();
- Logging.ingest.warn(message3,e);
- throw new ServiceInterruption(message3,
- e,
- currentTime + interruptionRetryTime,
- currentTime + 2L * 60L * 60000L,
- -1,
- true);
- }
-
- /** Ingest a document.
- * This ingests the document, and notes it. If this is a repeat ingestion of the document, this
- * method also REMOVES ALL OLD METADATA. When complete, the index will contain only the metadata
- * described by the RepositoryDocument object passed to this method.
- * ServiceInterruption is thrown if the document ingestion must be rescheduled.
*@param pipelineSpecificationWithVersions is the pipeline specification with already-fetched output versioning information.
*@param identifierClass is the name of the space in which the identifier hash should be interpreted.
*@param identifierHash is the hashed document identifier.
@@ -1075,24 +792,6 @@ public class IncrementalIngester extends
}
}
- /** Note the fact that we checked a document (and found that it did not need to be ingested, because the
- * versions agreed).
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
- *@param identifierHashes are the set of document identifier hashes.
- *@param checkTime is the time at which the check took place, in milliseconds since epoch.
- */
- @Override
- @Deprecated
- public void documentCheckMultiple(String outputConnectionName,
- String[] identifierClasses, String[] identifierHashes,
- long checkTime)
- throws ManifoldCFException
- {
- documentCheckMultiple(new RuntPipelineSpecificationBasic(outputConnectionName),
- identifierClasses,identifierHashes,checkTime);
- }
-
protected static String[] extractOutputConnectionNames(IPipelineSpecificationBasic pipelineSpecificationBasic)
{
String[] rval = new String[pipelineSpecificationBasic.getOutputCount()];
@@ -1191,24 +890,6 @@ public class IncrementalIngester extends
/** Note the fact that we checked a document (and found that it did not need to be ingested, because the
* versions agreed).
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
- *@param identifierHash is the hashed document identifier.
- *@param checkTime is the time at which the check took place, in milliseconds since epoch.
- */
- @Override
- @Deprecated
- public void documentCheck(String outputConnectionName,
- String identifierClass, String identifierHash,
- long checkTime)
- throws ManifoldCFException
- {
- documentCheck(new RuntPipelineSpecificationBasic(outputConnectionName),
- identifierClass,identifierHash,checkTime);
- }
-
- /** Note the fact that we checked a document (and found that it did not need to be ingested, because the
- * versions agreed).
*@param pipelineSpecificationBasic is a basic pipeline specification.
*@param identifierClass is the name of the space in which the identifier hash should be interpreted.
*@param identifierHash is the hashed document identifier.
@@ -1247,28 +928,6 @@ public class IncrementalIngester extends
/** Delete multiple documents from the search engine index.
- *@param outputConnectionNames are the names of the output connections associated with this action.
- *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
- *@param identifierHashes is tha array of document identifier hashes if the documents.
- *@param activities is the object to use to log the details of the ingestion attempt. May be null.
- */
- @Override
- @Deprecated
- public void documentDeleteMultiple(String[] outputConnectionNames,
- String[] identifierClasses, String[] identifierHashes,
- IOutputRemoveActivity activities)
- throws ManifoldCFException, ServiceInterruption
- {
- IPipelineSpecificationBasic[] pipelineSpecs = new IPipelineSpecificationBasic[outputConnectionNames.length];
- for (int i = 0; i < pipelineSpecs.length; i++)
- {
- pipelineSpecs[i] = new RuntPipelineSpecificationBasic(outputConnectionNames[i]);
- }
- documentDeleteMultiple(pipelineSpecs,
- identifierClasses,identifierHashes,activities);
- }
-
- /** Delete multiple documents from the search engine index.
*@param pipelineSpecificationBasics are the pipeline specifications associated with the documents.
*@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
*@param identifierHashes is tha array of document identifier hashes if the documents.
@@ -1315,23 +974,6 @@ public class IncrementalIngester extends
}
/** Delete multiple documents from the search engine index.
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
- *@param identifierHashes is tha array of document identifier hashes if the documents.
- *@param activities is the object to use to log the details of the ingestion attempt. May be null.
- */
- @Override
- @Deprecated
- public void documentDeleteMultiple(String outputConnectionName,
- String[] identifierClasses, String[] identifierHashes,
- IOutputRemoveActivity activities)
- throws ManifoldCFException, ServiceInterruption
- {
- documentDeleteMultiple(new RuntPipelineSpecificationBasic(outputConnectionName),
- identifierClasses,identifierHashes,activities);
- }
-
- /** Delete multiple documents from the search engine index.
*@param pipelineSpecificationBasic is the basic pipeline specification.
*@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
*@param identifierHashes is tha array of document identifier hashes if the documents.
@@ -1658,23 +1300,6 @@ public class IncrementalIngester extends
}
/** Delete a document from the search engine index.
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
- *@param identifierHash is the hash of the id of the document.
- *@param activities is the object to use to log the details of the ingestion attempt. May be null.
- */
- @Override
- @Deprecated
- public void documentDelete(String outputConnectionName,
- String identifierClass, String identifierHash,
- IOutputRemoveActivity activities)
- throws ManifoldCFException, ServiceInterruption
- {
- documentDelete(new RuntPipelineSpecificationBasic(outputConnectionName),
- identifierClass,identifierHash,activities);
- }
-
- /** Delete a document from the search engine index.
*@param pipelineSpecificationBasic is the basic pipeline specification.
*@param identifierClass is the name of the space in which the identifier hash should be interpreted.
*@param identifierHash is the hash of the id of the document.
@@ -1917,135 +1542,6 @@ public class IncrementalIngester extends
new String[]{identifierClass},new String[]{identifierHash});
}
- /** Look up ingestion data for a SET of documents.
- *@param outputConnectionNames are the names of the output connections associated with this action.
- *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
- *@param identifierHashes is the array of document identifier hashes to look up.
- *@return the array of document data. Null will come back for any identifier that doesn't
- * exist in the index.
- */
- @Override
- @Deprecated
- public DocumentIngestStatus[] getDocumentIngestDataMultiple(String[] outputConnectionNames,
- String[] identifierClasses, String[] identifierHashes)
- throws ManifoldCFException
- {
- // Segregate request by connection names
- Map<String,List<Integer>> keyMap = new HashMap<String,List<Integer>>();
- for (int i = 0; i < outputConnectionNames.length; i++)
- {
- String outputConnectionName = outputConnectionNames[i];
- List<Integer> list = keyMap.get(outputConnectionName);
- if (list == null)
- {
- list = new ArrayList<Integer>();
- keyMap.put(outputConnectionName,list);
- }
- list.add(new Integer(i));
- }
-
- // Create the return array.
- DocumentIngestStatus[] rval = new DocumentIngestStatus[outputConnectionNames.length];
- Iterator<String> iter = keyMap.keySet().iterator();
- while (iter.hasNext())
- {
- String outputConnectionName = iter.next();
- List<Integer> list = keyMap.get(outputConnectionName);
- String[] localIdentifierClasses = new String[list.size()];
- String[] localIdentifierHashes = new String[list.size()];
- for (int i = 0; i < localIdentifierClasses.length; i++)
- {
- int index = list.get(i).intValue();
- localIdentifierClasses[i] = identifierClasses[index];
- localIdentifierHashes[i] = identifierHashes[index];
- }
- DocumentIngestStatus[] localRval = getDocumentIngestDataMultiple(outputConnectionName,localIdentifierClasses,localIdentifierHashes);
- for (int i = 0; i < localRval.length; i++)
- {
- int index = list.get(i).intValue();
- rval[index] = localRval[i];
- }
- }
- return rval;
- }
-
- /** Look up ingestion data for a SET of documents.
- *@param outputConnectionName is the names of the output connection associated with this action.
- *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
- *@param identifierHashes is the array of document identifier hashes to look up.
- *@return the array of document data. Null will come back for any identifier that doesn't
- * exist in the index.
- */
- @Override
- @Deprecated
- public DocumentIngestStatus[] getDocumentIngestDataMultiple(String outputConnectionName,
- String[] identifierClasses, String[] identifierHashes)
- throws ManifoldCFException
- {
- // Build the return array
- DocumentIngestStatus[] rval = new DocumentIngestStatus[identifierHashes.length];
-
- // Build a map, so we can convert an identifier into an array index.
- Map<String,Integer> indexMap = new HashMap<String,Integer>();
- for (int i = 0; i < identifierHashes.length; i++)
- {
- indexMap.put(makeKey(identifierClasses[i],identifierHashes[i]),new Integer(i));
- rval[i] = null;
- }
-
- beginTransaction();
- try
- {
- List<String> list = new ArrayList<String>();
- int maxCount = maxClauseDocumentIngestDataChunk(outputConnectionName);
- int j = 0;
- Iterator<String> iter = indexMap.keySet().iterator();
- while (iter.hasNext())
- {
- if (j == maxCount)
- {
- getDocumentIngestDataChunk(rval,indexMap,outputConnectionName,list);
- j = 0;
- list.clear();
- }
- list.add(iter.next());
- j++;
- }
- if (j > 0)
- getDocumentIngestDataChunk(rval,indexMap,outputConnectionName,list);
- return rval;
- }
- catch (ManifoldCFException e)
- {
- signalRollback();
- throw e;
- }
- catch (Error e)
- {
- signalRollback();
- throw e;
- }
- finally
- {
- endTransaction();
- }
- }
-
- /** Look up ingestion data for a documents.
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
- *@param identifierHash is the hash of the id of the document.
- *@return the current document's ingestion data, or null if the document is not currently ingested.
- */
- @Override
- @Deprecated
- public DocumentIngestStatus getDocumentIngestData(String outputConnectionName,
- String identifierClass, String identifierHash)
- throws ManifoldCFException
- {
- return getDocumentIngestDataMultiple(outputConnectionName,new String[]{identifierClass},new String[]{identifierHash})[0];
- }
-
/** Calculate the average time interval between changes for a document.
* This is based on the data gathered for the document.
*@param pipelineSpecificationBasic is the basic pipeline specification.
@@ -2053,6 +1549,7 @@ public class IncrementalIngester extends
*@param identifierHashes is the hashes of the ids of the documents.
*@return the number of milliseconds between changes, or 0 if this cannot be calculated.
*/
+ @Override
public long[] getDocumentUpdateIntervalMultiple(
IPipelineSpecificationBasic pipelineSpecificationBasic,
String[] identifierClasses, String[] identifierHashes)
@@ -2127,39 +1624,6 @@ public class IncrementalIngester extends
new String[]{identifierClass},new String[]{identifierHash})[0];
}
- /** Calculate the average time interval between changes for a document.
- * This is based on the data gathered for the document.
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
- *@param identifierHash is the hash of the id of the document.
- *@return the number of milliseconds between changes, or 0 if this cannot be calculated.
- */
- @Override
- @Deprecated
- public long getDocumentUpdateInterval(String outputConnectionName,
- String identifierClass, String identifierHash)
- throws ManifoldCFException
- {
- return getDocumentUpdateIntervalMultiple(outputConnectionName,new String[]{identifierClass},new String[]{identifierHash})[0];
- }
-
- /** Calculate the average time interval between changes for a document.
- * This is based on the data gathered for the document.
- *@param outputConnectionName is the name of the output connection associated with this action.
- *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
- *@param identifierHashes is the hashes of the ids of the documents.
- *@return the number of milliseconds between changes, or 0 if this cannot be calculated.
- */
- @Override
- @Deprecated
- public long[] getDocumentUpdateIntervalMultiple(String outputConnectionName,
- String[] identifierClasses, String[] identifierHashes)
- throws ManifoldCFException
- {
- return getDocumentUpdateIntervalMultiple(new RuntPipelineSpecificationBasic(outputConnectionName),
- identifierClasses,identifierHashes);
- }
-
/** Calculate the number of clauses.
*/
protected int maxClauseGetIntervals(String[] outputConnectionNames)
@@ -3100,12 +2564,12 @@ public class IncrementalIngester extends
public static class PipelineCheckEntryPoint
{
protected final IPipelineConnector pipelineConnector;
- protected final String pipelineDescriptionString;
+ protected final VersionContext pipelineDescriptionString;
protected final IOutputCheckActivity checkActivity;
public PipelineCheckEntryPoint(
IPipelineConnector pipelineConnector,
- String pipelineDescriptionString,
+ VersionContext pipelineDescriptionString,
IOutputCheckActivity checkActivity)
{
this.pipelineConnector= pipelineConnector;
@@ -3317,12 +2781,12 @@ public class IncrementalIngester extends
public static class PipelineAddEntryPoint
{
protected final IPipelineConnector pipelineConnector;
- protected final String pipelineDescriptionString;
+ protected final VersionContext pipelineDescriptionString;
protected final IOutputAddActivity addActivity;
protected final boolean isActive;
public PipelineAddEntryPoint(IPipelineConnector pipelineConnector,
- String pipelineDescriptionString,
+ VersionContext pipelineDescriptionString,
IOutputAddActivity addActivity,
boolean isActive)
{
@@ -3382,7 +2846,7 @@ public class IncrementalIngester extends
protected final IOutputActivity activity;
public OutputAddEntryPoint(IOutputConnector outputConnector,
- String outputDescriptionString,
+ VersionContext outputDescriptionString,
IOutputActivity activity,
boolean isActive,
String outputConnectionName,
@@ -3517,13 +2981,13 @@ public class IncrementalIngester extends
// next version comes along, and will be deleted if called for also.
noteDocumentIngest(outputConnectionName,docKey,null,null,null,null,null,ingestTime,documentURI,documentURIHash);
int result = super.addOrReplaceDocumentWithException(documentURI, document, authorityNameString);
- noteDocumentIngest(outputConnectionName,docKey,documentVersion,transformationVersion,pipelineDescriptionString,parameterVersion,authorityNameString,ingestTime,documentURI,documentURIHash);
+ noteDocumentIngest(outputConnectionName,docKey,documentVersion,transformationVersion,pipelineDescriptionString.getVersionString(),parameterVersion,authorityNameString,ingestTime,documentURI,documentURIHash);
return result;
}
// If we get here, it means we are noting that the document was examined, but that no change was required. This is signaled
// to noteDocumentIngest by having the null documentURI.
- noteDocumentIngest(outputConnectionName,docKey,documentVersion,transformationVersion,pipelineDescriptionString,parameterVersion,authorityNameString,ingestTime,null,null);
+ noteDocumentIngest(outputConnectionName,docKey,documentVersion,transformationVersion,pipelineDescriptionString.getVersionString(),parameterVersion,authorityNameString,ingestTime,null,null);
return IPipelineConnector.DOCUMENTSTATUS_ACCEPTED;
}
finally
@@ -3636,9 +3100,9 @@ public class IncrementalIngester extends
/** Pipeline specification for backwards-compatible methods without pipelines */
protected static class RuntPipelineSpecification extends RuntPipelineSpecificationBasic implements IPipelineSpecification
{
- protected final String outputDescriptionString;
+ protected final VersionContext outputDescriptionString;
- public RuntPipelineSpecification(String outputConnectionName, String outputDescriptionString)
+ public RuntPipelineSpecification(String outputConnectionName, VersionContext outputDescriptionString)
{
super(outputConnectionName);
this.outputDescriptionString = outputDescriptionString;
@@ -3658,7 +3122,7 @@ public class IncrementalIngester extends
*@return the description string that stage.
*/
@Override
- public String getStageDescriptionString(int stage)
+ public VersionContext getStageDescriptionString(int stage)
{
if (stage == 0)
return outputDescriptionString;
@@ -3676,7 +3140,7 @@ public class IncrementalIngester extends
protected final String oldTransformationVersion;
protected final String oldAuthorityNameString;
- public RuntPipelineSpecificationWithVersions(String outputConnectionName, String outputDescriptionString,
+ public RuntPipelineSpecificationWithVersions(String outputConnectionName, VersionContext outputDescriptionString,
String oldDocumentVersion, String oldParameterVersion, String oldOutputVersion, String oldTransformationVersion,
String oldAuthorityNameString)
{