You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/24 10:13:25 UTC

svn commit: r1634021 - /manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java

Author: kwright
Date: Fri Oct 24 08:13:24 2014
New Revision: 1634021

URL: http://svn.apache.org/r1634021
Log:
Minor cleanup; part of CONNECTORS-1077.

Modified:
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1634021&r1=1634020&r2=1634021&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java Fri Oct 24 08:13:24 2014
@@ -744,8 +744,6 @@ public class WebcrawlerConnector extends
               continue;
             
             processDocument(activities,documentIdentifier,versionString,indexDocument,metaHash,acls,filter);
-              //continue;
-            
             break;
           case RESULT_RETRY_DOCUMENT:
             // Document could not be processed right now.
@@ -1258,7 +1256,7 @@ public class WebcrawlerConnector extends
     
   }
 
-  protected boolean processDocument(IProcessActivity activities, String documentIdentifier, String versionString,
+  protected void processDocument(IProcessActivity activities, String documentIdentifier, String versionString,
     boolean indexDocument, Map<String,Set<String>> metaHash, String[] acls, DocumentURLFilter filter)
     throws ManifoldCFException, ServiceInterruption
   {
@@ -1278,7 +1276,7 @@ public class WebcrawlerConnector extends
         errorCode = "CONTENTNOTINDEXABLE";
         errorDesc = "Content not indexable";
         activities.noDocument(documentIdentifier,versionString);
-        return true;
+        return;
       }
       
       int responseCode = cache.getResponseCode(documentIdentifier);
@@ -1289,7 +1287,7 @@ public class WebcrawlerConnector extends
         errorCode = "RESPONSECODENOTINDEXABLE";
         errorDesc = "HTTP response code not indexable ("+responseCode+")";
         activities.noDocument(documentIdentifier,versionString);
-        return true;
+        return;
       }
 
       long dataLength = cache.getDataLength(documentIdentifier);
@@ -1300,7 +1298,7 @@ public class WebcrawlerConnector extends
         errorCode = activities.EXCLUDED_LENGTH;
         errorDesc = "Rejected due to length ("+dataLength+")";
         activities.noDocument(documentIdentifier,versionString);
-        return true;
+        return;
       }
       
       if (activities.checkURLIndexable(documentIdentifier) == false)
@@ -1310,7 +1308,7 @@ public class WebcrawlerConnector extends
         errorCode = activities.EXCLUDED_URL;
         errorDesc = "Rejected due to URL ('"+documentIdentifier+"')";
         activities.noDocument(documentIdentifier,versionString);
-        return true;
+        return;
       }
 
       String ingestURL = filter.isDocumentIndexable(documentIdentifier);
@@ -1321,7 +1319,7 @@ public class WebcrawlerConnector extends
         errorCode = "JOBRESTRICTION";
         errorDesc = "Rejected because job excludes this URL ('"+documentIdentifier+"')";
         activities.noDocument(documentIdentifier,versionString);
-        return true;
+        return;
       }
       
       // Check if it's a recognized content type
@@ -1352,7 +1350,7 @@ public class WebcrawlerConnector extends
         errorCode = activities.EXCLUDED_MIMETYPE;
         errorDesc = "Rejected because of mime type ("+contentType+")";
         activities.noDocument(documentIdentifier,versionString);
-        return true;
+        return;
       }
       
       // Ingest the document
@@ -1442,7 +1440,6 @@ public class WebcrawlerConnector extends
       else
         Logging.connectors.error("WEB: Expected a cached document for '"+documentIdentifier+"', but none present!");
       
-      return false;
     }
     catch (ManifoldCFException e)
     {