You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/08 12:20:49 UTC
svn commit: r1630062 - in /manifoldcf/branches/CONNECTORS-1067/framework:
agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/
agents/src/main/java/org/apache/manifoldcf/agents/interfaces/
agents/src/main/java/org/apache/manifoldcf/agen...
Author: kwright
Date: Wed Oct 8 10:20:48 2014
New Revision: 1630062
URL: http://svn.apache.org/r1630062
Log:
Add date check method to agents part of the world
Modified:
manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java
manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java
manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java
manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java
manifoldcf/branches/CONNECTORS-1067/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Modified: manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1630062&r1=1630061&r2=1630062&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java Wed Oct 8 10:20:48 2014
@@ -227,6 +227,34 @@ public class IncrementalIngester extends
return pipelineSpecificationBasic.getStageConnectionName(pipelineSpecificationBasic.getOutputStage(0));
}
+ /** Check if a date is indexable.
+ *@param pipelineSpecification is the pipeline specification.
+ *@param date is the date to check.
+ *@param activity are the activities available to this method.
+ *@return true if the mimeType is indexable.
+ */
+ @Override
+ public boolean checkDateIndexable(
+ IPipelineSpecification pipelineSpecification,
+ Date date,
+ IOutputCheckActivity activity)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ PipelineObject pipeline = pipelineGrab(
+ new PipelineConnections(pipelineSpecification));
+ if (pipeline == null)
+ // A connector is not installed; treat this as a service interruption.
+ throw new ServiceInterruption("One or more connectors are not installed",0L);
+ try
+ {
+ return pipeline.checkDateIndexable(date,activity);
+ }
+ finally
+ {
+ pipeline.release();
+ }
+ }
+
/** Check if a mime type is indexable.
*@param pipelineSpecification is the pipeline specification.
*@param mimeType is the mime type to check.
@@ -2485,6 +2513,18 @@ public class IncrementalIngester extends
addActivities.noDocument();
}
+ /** Detect if a date is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param date is the mime type of the document.
+ *@return true if the date can be accepted by the downstream connection.
+ */
+ @Override
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return addActivities.checkDateIndexable(date);
+ }
+
/** Detect if a mime type is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param mimeType is the mime type of the document.
@@ -2562,7 +2602,14 @@ public class IncrementalIngester extends
this.transformationConnectors = transformationConnectors;
this.outputConnectors = outputConnectors;
}
-
+
+ public boolean checkDateIndexable(Date date, IOutputCheckActivity finalActivity)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ PipelineCheckFanout entryPoint = buildCheckPipeline(finalActivity);
+ return entryPoint.checkDateIndexable(date);
+ }
+
public boolean checkMimeTypeIndexable(String mimeType, IOutputCheckActivity finalActivity)
throws ManifoldCFException, ServiceInterruption
{
@@ -2824,6 +2871,19 @@ public class IncrementalIngester extends
}
@Override
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ // OR all results
+ for (PipelineCheckEntryPoint p : entryPoints)
+ {
+ if (p.checkDateIndexable(date))
+ return true;
+ }
+ return false;
+ }
+
+ @Override
public boolean checkMimeTypeIndexable(String mimeType)
throws ManifoldCFException, ServiceInterruption
{
@@ -2894,6 +2954,12 @@ public class IncrementalIngester extends
this.checkActivity = checkActivity;
}
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return pipelineConnector.checkDateIndexable(pipelineDescriptionString,date,checkActivity);
+ }
+
public boolean checkMimeTypeIndexable(String mimeType)
throws ManifoldCFException, ServiceInterruption
{
@@ -2948,6 +3014,19 @@ public class IncrementalIngester extends
}
@Override
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ // OR all results
+ for (PipelineAddEntryPoint p : entryPoints)
+ {
+ if (p.checkDateIndexable(date))
+ return true;
+ }
+ return false;
+ }
+
+ @Override
public boolean checkMimeTypeIndexable(String mimeType)
throws ManifoldCFException, ServiceInterruption
{
@@ -3137,6 +3216,12 @@ public class IncrementalIngester extends
{
return isActive;
}
+
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return pipelineConnector.checkDateIndexable(pipelineDescriptionString,date,addActivity);
+ }
public boolean checkMimeTypeIndexable(String mimeType)
throws ManifoldCFException, ServiceInterruption
@@ -3782,6 +3867,18 @@ public class IncrementalIngester extends
activities.recordActivity(startTime,activityType,dataSize,entityURI,resultCode,resultDescription);
}
+ /** Detect if a date is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param date is the date of the document.
+ *@return true if the document described by the date can be accepted by the downstream connection.
+ */
+ @Override
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return activities.checkDateIndexable(date);
+ }
+
/** Detect if a mime type is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param mimeType is the mime type of the document.
Modified: manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java?rev=1630062&r1=1630061&r2=1630062&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java Wed Oct 8 10:20:48 2014
@@ -90,6 +90,18 @@ public interface IIncrementalIngester
public VersionContext getTransformationDescription(String transformationConnectionName, Specification spec)
throws ManifoldCFException, ServiceInterruption;
+ /** Check if a document date is indexable.
+ *@param pipelineSpecification is the pipeline specification.
+ *@param date is the date to check
+ *@param activity are the activities available to this method.
+ *@return true if the document with that date is indexable.
+ */
+ public boolean checkDateIndexable(
+ IPipelineSpecification pipelineSpecification,
+ Date date,
+ IOutputCheckActivity activity)
+ throws ManifoldCFException, ServiceInterruption;
+
/** Check if a mime type is indexable.
*@param pipelineSpecification is the pipeline specification.
*@param mimeType is the mime type to check.
Modified: manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java?rev=1630062&r1=1630061&r2=1630062&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java Wed Oct 8 10:20:48 2014
@@ -22,6 +22,7 @@ import org.apache.manifoldcf.core.interf
import org.apache.manifoldcf.agents.interfaces.*;
import java.io.*;
+import java.util.*;
/** This interface abstracts from the activities that a transformation connector can do
when checking a document.
@@ -30,6 +31,14 @@ public interface IOutputCheckActivity
{
public static final String _rcsid = "@(#)$Id$";
+ /** Detect if a document date is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param date is the date of the document.
+ *@return true if the document with that date can be accepted by the downstream connection.
+ */
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption;
+
/** Detect if a mime type is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param mimeType is the mime type of the document.
Modified: manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java?rev=1630062&r1=1630061&r2=1630062&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java Wed Oct 8 10:20:48 2014
@@ -57,6 +57,16 @@ public interface IPipelineConnector exte
public VersionContext getPipelineDescription(Specification spec)
throws ManifoldCFException, ServiceInterruption;
+ /** Detect if a document date is acceptable or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param pipelineDescription is the document's pipeline version string, for this connection.
+ *@param date is the date of the document.
+ *@param checkActivity is an object including the activities that can be performed by this method.
+ *@return true if the document with that date can be accepted by this connector.
+ */
+ public boolean checkDateIndexable(VersionContext pipelineDescription, Date date, IOutputCheckActivity checkActivity)
+ throws ManifoldCFException, ServiceInterruption;
+
/** Detect if a mime type is acceptable or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param pipelineDescription is the document's pipeline version string, for this connection.
Modified: manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java?rev=1630062&r1=1630061&r2=1630062&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java Wed Oct 8 10:20:48 2014
@@ -81,6 +81,20 @@ public abstract class BaseOutputConnecto
// The base implementation does nothing here.
}
+ /** Detect if a document date is acceptable or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param pipelineDescription is the document's pipeline version string, for this connection.
+ *@param date is the date of the document.
+ *@param checkActivity is an object including the activities that can be performed by this method.
+ *@return true if the document with that date can be accepted by this connector.
+ */
+ @Override
+ public boolean checkDateIndexable(VersionContext pipelineDescription, Date date, IOutputCheckActivity checkActivity)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return true;
+ }
+
/** Detect if a mime type is acceptable or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param pipelineDescription is the document's pipeline version string, for this connection.
Modified: manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java?rev=1630062&r1=1630061&r2=1630062&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java Wed Oct 8 10:20:48 2014
@@ -70,6 +70,20 @@ public abstract class BaseTransformation
return false;
}
+ /** Detect if a document date is acceptable or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param pipelineDescription is the document's pipeline version string, for this connection.
+ *@param date is the date of the document.
+ *@param checkActivity is an object including the activities that can be performed by this method.
+ *@return true if the document with that date can be accepted by this connector.
+ */
+ @Override
+ public boolean checkDateIndexable(VersionContext pipelineDescription, Date date, IOutputCheckActivity checkActivity)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return checkActivity.checkDateIndexable(date);
+ }
+
/** Detect if a mime type is acceptable or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param pipelineDescription is the document's pipeline version string, for this connection.
Modified: manifoldcf/branches/CONNECTORS-1067/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1630062&r1=1630061&r2=1630062&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java (original)
+++ manifoldcf/branches/CONNECTORS-1067/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java Wed Oct 8 10:20:48 2014
@@ -2318,6 +2318,18 @@ public class WorkerThread extends Thread
{
}
+ /** Detect if a date is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param date is the document's date
+ *@return true if the document with that date can be accepted by the downstream connection.
+ */
+ @Override
+ public boolean checkDateIndexable(Date date)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return false;
+ }
+
/** Detect if a mime type is acceptable downstream or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param mimeType is the mime type of the document.