You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/24 10:13:25 UTC
svn commit: r1634021 -
/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Author: kwright
Date: Fri Oct 24 08:13:24 2014
New Revision: 1634021
URL: http://svn.apache.org/r1634021
Log:
Minor cleanup; part of CONNECTORS-1077.
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1634021&r1=1634020&r2=1634021&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java Fri Oct 24 08:13:24 2014
@@ -744,8 +744,6 @@ public class WebcrawlerConnector extends
continue;
processDocument(activities,documentIdentifier,versionString,indexDocument,metaHash,acls,filter);
- //continue;
-
break;
case RESULT_RETRY_DOCUMENT:
// Document could not be processed right now.
@@ -1258,7 +1256,7 @@ public class WebcrawlerConnector extends
}
- protected boolean processDocument(IProcessActivity activities, String documentIdentifier, String versionString,
+ protected void processDocument(IProcessActivity activities, String documentIdentifier, String versionString,
boolean indexDocument, Map<String,Set<String>> metaHash, String[] acls, DocumentURLFilter filter)
throws ManifoldCFException, ServiceInterruption
{
@@ -1278,7 +1276,7 @@ public class WebcrawlerConnector extends
errorCode = "CONTENTNOTINDEXABLE";
errorDesc = "Content not indexable";
activities.noDocument(documentIdentifier,versionString);
- return true;
+ return;
}
int responseCode = cache.getResponseCode(documentIdentifier);
@@ -1289,7 +1287,7 @@ public class WebcrawlerConnector extends
errorCode = "RESPONSECODENOTINDEXABLE";
errorDesc = "HTTP response code not indexable ("+responseCode+")";
activities.noDocument(documentIdentifier,versionString);
- return true;
+ return;
}
long dataLength = cache.getDataLength(documentIdentifier);
@@ -1300,7 +1298,7 @@ public class WebcrawlerConnector extends
errorCode = activities.EXCLUDED_LENGTH;
errorDesc = "Rejected due to length ("+dataLength+")";
activities.noDocument(documentIdentifier,versionString);
- return true;
+ return;
}
if (activities.checkURLIndexable(documentIdentifier) == false)
@@ -1310,7 +1308,7 @@ public class WebcrawlerConnector extends
errorCode = activities.EXCLUDED_URL;
errorDesc = "Rejected due to URL ('"+documentIdentifier+"')";
activities.noDocument(documentIdentifier,versionString);
- return true;
+ return;
}
String ingestURL = filter.isDocumentIndexable(documentIdentifier);
@@ -1321,7 +1319,7 @@ public class WebcrawlerConnector extends
errorCode = "JOBRESTRICTION";
errorDesc = "Rejected because job excludes this URL ('"+documentIdentifier+"')";
activities.noDocument(documentIdentifier,versionString);
- return true;
+ return;
}
// Check if it's a recognized content type
@@ -1352,7 +1350,7 @@ public class WebcrawlerConnector extends
errorCode = activities.EXCLUDED_MIMETYPE;
errorDesc = "Rejected because of mime type ("+contentType+")";
activities.noDocument(documentIdentifier,versionString);
- return true;
+ return;
}
// Ingest the document
@@ -1442,7 +1440,6 @@ public class WebcrawlerConnector extends
else
Logging.connectors.error("WEB: Expected a cached document for '"+documentIdentifier+"', but none present!");
- return false;
}
catch (ManifoldCFException e)
{