You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2015/09/08 20:53:58 UTC
svn commit: r1701852 - in
/manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3:
AmazonS3Connector.java GenericDocumentProcess.java
Author: kwright
Date: Tue Sep 8 18:53:58 2015
New Revision: 1701852
URL: http://svn.apache.org/r1701852
Log:
Add exception handling
Modified:
manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/AmazonS3Connector.java
manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/GenericDocumentProcess.java
Modified: manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/AmazonS3Connector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/AmazonS3Connector.java?rev=1701852&r1=1701851&r2=1701852&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/AmazonS3Connector.java (original)
+++ manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/AmazonS3Connector.java Tue Sep 8 18:53:58 2015
@@ -180,8 +180,9 @@ public class AmazonS3Connector extends B
* Get the Amazons3 client, relevant access keys should have been posted
* already
* @return
+ * @throws ManifoldCFException
*/
- protected AmazonS3 getClient() {
+ protected AmazonS3 getClient() throws ManifoldCFException {
if (amazonS3 == null) {
try {
BasicAWSCredentials awsCreds = new BasicAWSCredentials(
@@ -191,7 +192,8 @@ public class AmazonS3Connector extends B
catch (Exception e) {
Logging.connectors
.error("Error while amazon s3 connectionr", e);
-
+ throw new ManifoldCFException(
+ "Amazon client can not connect at the moment",e.getCause());
}
}
lastSessionFetch = System.currentTimeMillis();
@@ -612,12 +614,12 @@ public class AmazonS3Connector extends B
if (unparsedBuckets != null && StringUtils.isNotEmpty(unparsedBuckets))
bucketsToRemove = unparsedBuckets.split(BUCKET_SPLITTER);
// get seeds
- GeedSeeds(activities, bucketsToRemove);
+ getSeeds(activities, bucketsToRemove);
return new Long(seedTime).toString();
}
- private void GeedSeeds(ISeedingActivity activities, String[] buckets)
+ private void getSeeds(ISeedingActivity activities, String[] buckets)
throws ManifoldCFException, ServiceInterruption {
GetSeedsThread t = new GetSeedsThread(getClient(), buckets);
try {
@@ -726,10 +728,7 @@ public class AmazonS3Connector extends B
IProcessActivity activities, int jobMode,
boolean usesDefaultAuthority) throws ManifoldCFException,
ServiceInterruption {
- AmazonS3 amazons3Client = getClient();
- if (amazons3Client == null)
- throw new ManifoldCFException(
- "Amazon client can not connect at the moment");
+ AmazonS3 amazons3Client = getClient();
documentProcess.doProcessDocument(documentIdentifiers, statuses, spec,
activities, jobMode, usesDefaultAuthority, amazons3Client);
Modified: manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/GenericDocumentProcess.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/GenericDocumentProcess.java?rev=1701852&r1=1701851&r2=1701852&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/GenericDocumentProcess.java (original)
+++ manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/GenericDocumentProcess.java Tue Sep 8 18:53:58 2015
@@ -20,6 +20,7 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.InterruptedIOException;
import java.util.Date;
import java.util.Set;
@@ -124,7 +125,7 @@ public class GenericDocumentProcess exte
String[] users = getUsers(grants);
aclsToUse = users;
-
+
sb.append(lastModified.toString());
versionString = sb.toString();
@@ -148,16 +149,14 @@ public class GenericDocumentProcess exte
String errorDesc = null;
Long fileSize = null;
- String mimeType = TEXT_PLAIN;//default
- long fileLength = s3Obj.getObjectMetadata()
- .getContentLength();
-
+ String mimeType = TEXT_PLAIN;// default
+ long fileLength = s3Obj.getObjectMetadata().getContentLength();
+
if (!activities.checkLengthIndexable(fileLength)) {
errorCode = activities.EXCLUDED_LENGTH;
errorDesc = "Excluded because of document length ("
+ fileLength + ")";
- activities.noDocument(documentIdentifier,
- versionString);
+ activities.noDocument(documentIdentifier, versionString);
continue;
}
@@ -166,28 +165,25 @@ public class GenericDocumentProcess exte
if (!activities.checkURLIndexable(documentURI)) {
errorCode = activities.EXCLUDED_URL;
- errorDesc = "Excluded because of URL ('"
- + documentURI + "')";
- activities.noDocument(documentIdentifier,
- versionString);
+ errorDesc = "Excluded because of URL ('" + documentURI
+ + "')";
+ activities.noDocument(documentIdentifier, versionString);
continue;
}
if (!activities.checkMimeTypeIndexable(mimeType)) {
errorCode = activities.EXCLUDED_MIMETYPE;
- errorDesc = "Excluded because of mime type ('"
- + mimeType + "')";
- activities.noDocument(documentIdentifier,
- versionString);
+ errorDesc = "Excluded because of mime type ('" + mimeType
+ + "')";
+ activities.noDocument(documentIdentifier, versionString);
continue;
}
-
+
if (!activities.checkDateIndexable(lastModified)) {
errorCode = activities.EXCLUDED_DATE;
- errorDesc = "Excluded because of date ("
- + lastModified + ")";
- activities.noDocument(documentIdentifier,
- versionString);
+ errorDesc = "Excluded because of date (" + lastModified
+ + ")";
+ activities.noDocument(documentIdentifier, versionString);
continue;
}
@@ -206,8 +202,7 @@ public class GenericDocumentProcess exte
denyAclsToUse = new String[] { AmazonS3Connector.GLOBAL_DENY_TOKEN };
else
denyAclsToUse = new String[0];
- rd.setSecurity(
- RepositoryDocument.SECURITY_TYPE_DOCUMENT,
+ rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT,
aclsToUse, denyAclsToUse);
rd.setMimeType(mimeType);
@@ -215,12 +210,10 @@ public class GenericDocumentProcess exte
if (lastModified != null)
rd.setModifiedDate(lastModified);
-
- //assign the stream
+ // assign the stream
rd.setBinary(in, fileLength);
- activities.ingestDocumentWithException(
- documentIdentifier, versionString,
- documentURI, rd);
+ activities.ingestDocumentWithException(documentIdentifier,
+ versionString, documentURI, rd);
errorCode = "OK";
fileSize = new Long(fileLength);
@@ -229,7 +222,6 @@ public class GenericDocumentProcess exte
handleIOException(e1);
}
finally {
-
// close input stream
if (in != null)
IOUtils.closeQuietly(in);
@@ -246,24 +238,39 @@ public class GenericDocumentProcess exte
}
- protected static void handleIOException(final IOException e1)
- throws ManifoldCFException, ServiceInterruption {
- Logging.connectors.error("Error while copying stream", e1);
- // Gotta handle this?? MHL
- throw new ManifoldCFException("Error copying stream: "+e1.getMessage(),e1);
+ protected static void handleIOException(final IOException e)
+ throws ManifoldCFException, ServiceInterruption {
+ Logging.connectors.error("Error while copying stream", e);
+ if (!(e instanceof java.net.SocketTimeoutException)
+ && (e instanceof InterruptedIOException)) {
+ throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+ ManifoldCFException.INTERRUPTED);
+ }
+ long currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("IO exception: " + e.getMessage(), e,
+ currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false);
}
- protected static void handleServiceException(final AmazonServiceException e1)
- throws ManifoldCFException, ServiceInterruption {
- // MHL to figure out what to throw?
- Logging.connectors.error(e1);
- throw new ManifoldCFException("Amazon service exception: "+e1.getMessage(),e1);
+ protected static void handleServiceException(final AmazonServiceException e)
+ throws ManifoldCFException, ServiceInterruption {
+ Logging.connectors.error("Service exception status : " + e.getStatusCode(),e);
+
+ if (!e.isRetryable()) {
+ throw new ManifoldCFException("Amazon service exception: "
+ + e.getMessage(), e.getCause());
+ }
+
+ throw new ServiceInterruption(e.getMessage(), System.currentTimeMillis()+300000L);
}
-
- protected static void handleClientException(final AmazonClientException e1)
- throws ManifoldCFException, ServiceInterruption {
- // MHL to figure out what to throw?
- Logging.connectors.error(e1);
- throw new ManifoldCFException("Amazon client exception: "+e1.getMessage(),e1);
+
+ protected static void handleClientException(final AmazonClientException e)
+ throws ManifoldCFException, ServiceInterruption {
+ Logging.connectors.error(e);
+ if (!e.isRetryable()) {
+ throw new ManifoldCFException("Amazon client exception: "
+ + e.getMessage(), e.getCause());
+ }
+
+ throw new ServiceInterruption(e.getMessage(), System.currentTimeMillis()+300000L);
}
}