You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/17 14:51:38 UTC

svn commit: r1632562 - /manifoldcf/trunk/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java

Author: kwright
Date: Fri Oct 17 12:51:38 2014
New Revision: 1632562

URL: http://svn.apache.org/r1632562
Log:
Record an activity message for all document rejections from the Tika connector

Modified:
    manifoldcf/trunk/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java

Modified: manifoldcf/trunk/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java?rev=1632562&r1=1632561&r2=1632562&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java (original)
+++ manifoldcf/trunk/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java Fri Oct 17 12:51:38 2014
@@ -157,6 +157,8 @@ public class TikaExtractor extends org.a
     if (!activities.checkMimeTypeIndexable("text/plain;charset=utf-8"))
     {
       activities.noDocument();
+      activities.recordActivity(null, ACTIVITY_EXTRACT, null, documentURI,
+        "MIMETYPEREJECTION", "Downstream pipeline rejected mime type 'text/plain;charset=utf-8'");
       return DOCUMENTSTATUS_REJECTED;
     }
 
@@ -263,6 +265,16 @@ public class TikaExtractor extends org.a
           os.close();
           length = new Long(ds.getBinaryLength());
         }
+        
+        // Check to be sure downstream pipeline will accept document of specified length
+        if (!activities.checkLengthIndexable(ds.getBinaryLength()))
+        {
+          activities.noDocument();
+          resultCode = "LENGTHREJECTION";
+          description = "Downstream pipeline rejected document with length "+ds.getBinaryLength();
+          return DOCUMENTSTATUS_REJECTED;
+        }
+
       }
       finally
       {
@@ -271,13 +283,6 @@ public class TikaExtractor extends org.a
           resultCode, description);
       }
       
-      // Check to be sure downstream pipeline will accept document of specified length
-      if (!activities.checkLengthIndexable(ds.getBinaryLength()))
-      {
-        activities.noDocument();
-        return DOCUMENTSTATUS_REJECTED;
-      }
-        
       // Parsing complete!
       // Create a copy of Repository Document
       RepositoryDocument docCopy = document.duplicate();