You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/10 09:11:44 UTC
svn commit: r1630671 -
/manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
Author: kwright
Date: Fri Oct 10 07:11:44 2014
New Revision: 1630671
URL: http://svn.apache.org/r1630671
Log:
Add logging and activity recording to document filter. Part of CONNECTORS-1068.
Modified:
manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
Modified: manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java?rev=1630671&r1=1630670&r2=1630671&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java (original)
+++ manifoldcf/trunk/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java Fri Oct 10 07:11:44 2014
@@ -38,7 +38,21 @@ public class DocumentFilter extends org.
private static final String EDIT_SPECIFICATION_CONTENTS_HTML = "editSpecification_Contents.html";
private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
+
+ protected static final String ACTIVITY_FILTER = "filter";
+
+ protected static final String[] activitiesList = new String[]{ACTIVITY_FILTER};
+ /** Return a list of activities that this connector generates.
+ * The connector does NOT need to be connected before this method is called.
+ *@return the set of activities.
+ */
+ @Override
+ public String[] getActivitiesList()
+ {
+ return activitiesList;
+ }
+
/** Constructor.
*/
public DocumentFilter(){
@@ -155,13 +169,42 @@ public class DocumentFilter extends org.
{
// Hard filtering (in case connectors don't call check methods above)
SpecPacker sp = new SpecPacker(outputDescription.getSpecification());
- if (!checkURLIndexable(sp, outputDescription, documentURI, activities) ||
- !checkLengthIndexable(sp, outputDescription, document.getBinaryLength(), activities) ||
- !checkMimeTypeIndexable(sp, outputDescription, document.getMimeType(), activities) ||
- !checkDateIndexable(sp, outputDescription, document.getModifiedDate(), activities)) {
+ if (!checkURLIndexable(sp, outputDescription, documentURI, activities))
+ {
+ activities.noDocument();
+ activities.recordActivity(null, ACTIVITY_FILTER, null, documentURI, "FILTEREDURL", "Rejected due to URL ('"+documentURI+"')");
+ if (Logging.ingest.isDebugEnabled())
+ Logging.ingest.debug("Document filter: Rejected document "+documentURI+" due to URL ('"+documentURI+"')");
+ return DOCUMENTSTATUS_REJECTED;
+ }
+
+ if (!checkLengthIndexable(sp, outputDescription, document.getBinaryLength(), activities))
+ {
+ activities.noDocument();
+ activities.recordActivity(null, ACTIVITY_FILTER, null, documentURI, "FILTEREDLENGTH", "Rejected due to length ("+document.getBinaryLength()+")");
+ if (Logging.ingest.isDebugEnabled())
+ Logging.ingest.debug("Document filter: Rejected document "+documentURI+" due to length ("+document.getBinaryLength()+")");
+ return DOCUMENTSTATUS_REJECTED;
+ }
+
+ if (!checkMimeTypeIndexable(sp, outputDescription, document.getMimeType(), activities))
+ {
activities.noDocument();
+ activities.recordActivity(null, ACTIVITY_FILTER, null, documentURI, "FILTEREDMIMETYPE", "Rejected due to mime type ('"+document.getMimeType()+"')");
+ if (Logging.ingest.isDebugEnabled())
+ Logging.ingest.debug("Document filter: Rejected document "+documentURI+" due to mime type ('"+document.getMimeType()+"')");
return DOCUMENTSTATUS_REJECTED;
}
+
+ if (!checkDateIndexable(sp, outputDescription, document.getModifiedDate(), activities))
+ {
+ activities.noDocument();
+ activities.recordActivity(null, ACTIVITY_FILTER, null, documentURI, "FILTEREDDATE", "Rejected due to date ('"+document.getModifiedDate()+"')");
+ if (Logging.ingest.isDebugEnabled())
+ Logging.ingest.debug("Document filter: Rejected document "+documentURI+" due to date ('"+document.getModifiedDate()+"')");
+ return DOCUMENTSTATUS_REJECTED;
+ }
+
return activities.sendDocument(documentURI, document);
}