You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/09 00:54:19 UTC
svn commit: r1630247 -
/manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
Author: kwright
Date: Wed Oct 8 22:54:18 2014
New Revision: 1630247
URL: http://svn.apache.org/r1630247
Log:
Do hard checks for documents
Modified:
manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
Modified: manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java?rev=1630247&r1=1630246&r2=1630247&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java (original)
+++ manifoldcf/branches/CONNECTORS-1068/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java Wed Oct 8 22:54:18 2014
@@ -63,6 +63,29 @@ public class DocumentFilter extends org.
return new VersionContext(sp.toPackedString(),params,os);
}
+ /** Detect if a document date is acceptable or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param outputDescription is the document's output version.
+ *@param date is the date of the document.
+ *@param activities is an object including the activities that can be performed by this method.
+ *@return true if the document with that date can be accepted by this connector.
+ */
+ @Override
+ public boolean checkDateIndexable(VersionContext outputDescription, Date date, IOutputCheckActivity activities)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ SpecPacker sp = new SpecPacker(outputDescription.getSpecification());
+ return checkDateIndexable(sp, outputDescription, date, activities);
+ }
+
+ protected boolean checkDateIndexable(SpecPacker sp, VersionContext outputDescription, Date date, IOutputCheckActivity activities)
+ throws ManifoldCFException, ServiceInterruption {
+ if (sp.checkDate(date))
+ return super.checkDateIndexable(outputDescription, date, activities);
+ else
+ return false;
+ }
+
/** Detect if a mime type is indexable or not. This method is used by participating repository connectors to pre-filter the number of
* unusable documents that will be passed to this output connector.
*@param outputDescription is the document's output version.
@@ -74,6 +97,11 @@ public class DocumentFilter extends org.
throws ManifoldCFException, ServiceInterruption
{
SpecPacker sp = new SpecPacker(outputDescription.getSpecification());
+ return checkMimeTypeIndexable(sp, outputDescription, mimeType, activities);
+ }
+
+ protected boolean checkMimeTypeIndexable(SpecPacker sp, VersionContext outputDescription, String mimeType, IOutputCheckActivity activities)
+ throws ManifoldCFException, ServiceInterruption {
if (sp.checkMimeType(mimeType))
return super.checkMimeTypeIndexable(outputDescription, mimeType, activities);
else
@@ -84,6 +112,11 @@ public class DocumentFilter extends org.
public boolean checkLengthIndexable(VersionContext outputDescription, long length, IOutputCheckActivity activities)
throws ManifoldCFException, ServiceInterruption {
SpecPacker sp = new SpecPacker(outputDescription.getSpecification());
+ return checkLengthIndexable(sp, outputDescription, length, activities);
+ }
+
+ protected boolean checkLengthIndexable(SpecPacker sp, VersionContext outputDescription, long length, IOutputCheckActivity activities)
+ throws ManifoldCFException, ServiceInterruption {
if (sp.checkLengthIndexable(length))
return super.checkLengthIndexable(outputDescription, length, activities);
else
@@ -94,6 +127,11 @@ public class DocumentFilter extends org.
public boolean checkURLIndexable(VersionContext outputDescription, String url, IOutputCheckActivity activities)
throws ManifoldCFException, ServiceInterruption {
SpecPacker sp = new SpecPacker(outputDescription.getSpecification());
+ return checkURLIndexable(sp, outputDescription, url, activities);
+ }
+
+ protected boolean checkURLIndexable(SpecPacker sp, VersionContext outputDescription, String url, IOutputCheckActivity activities)
+ throws ManifoldCFException, ServiceInterruption {
if (sp.checkURLIndexable(url))
return super.checkURLIndexable(outputDescription, url, activities);
else
@@ -103,9 +141,6 @@ public class DocumentFilter extends org.
/** Add (or replace) a document in the output data store using the connector.
* This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
* necessary.
- * The OutputSpecification is *not* provided to this method, because the goal is consistency, and if output is done it must be consistent with the
- * output description, since that was what was partly used to determine if output should be taking place. So it may be necessary for this method to decode
- * an output description string in order to determine what should be done.
*@param documentURI is the URI of the document. The URI is presumed to be the unique identifier which the output data store will use to process
* and serve the document. This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
*@param outputDescription is the description string that was constructed for this document by the getOutputDescription() method.
@@ -118,6 +153,15 @@ public class DocumentFilter extends org.
public int addOrReplaceDocumentWithException(String documentURI, VersionContext outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
throws ManifoldCFException, ServiceInterruption, IOException
{
+ // Hard filtering (in case connectors don't call check methods above)
+ SpecPacker sp = new SpecPacker(outputDescription.getSpecification());
+ if (!checkURLIndexable(sp, outputDescription, documentURI, activities) ||
+ !checkLengthIndexable(sp, outputDescription, document.getBinaryLength(), activities) ||
+ !checkMimeTypeIndexable(sp, outputDescription, document.getMimeType(), activities) ||
+ !checkDateIndexable(sp, outputDescription, document.getModifiedDate(), activities)) {
+ activities.noDocument();
+ return DOCUMENTSTATUS_REJECTED;
+ }
return activities.sendDocument(documentURI, document);
}
@@ -433,6 +477,11 @@ public class DocumentFilter extends org.
return true;
}
+ public boolean checkDate(Date date) {
+ // MHL
+ return true;
+ }
+
public boolean checkMimeType(String mimeType) {
if (mimeType == null)
mimeType = "application/unknown";