You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2015/09/08 20:53:58 UTC

svn commit: r1701852 - in /manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3: AmazonS3Connector.java GenericDocumentProcess.java

Author: kwright
Date: Tue Sep  8 18:53:58 2015
New Revision: 1701852

URL: http://svn.apache.org/r1701852
Log:
Add exception handling

Modified:
    manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/AmazonS3Connector.java
    manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/GenericDocumentProcess.java

Modified: manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/AmazonS3Connector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/AmazonS3Connector.java?rev=1701852&r1=1701851&r2=1701852&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/AmazonS3Connector.java (original)
+++ manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/AmazonS3Connector.java Tue Sep  8 18:53:58 2015
@@ -180,8 +180,9 @@ public class AmazonS3Connector extends B
    * Get the Amazons3 client, relevant access keys should have been posted
    * already
    * @return
+ * @throws ManifoldCFException 
    */
-  protected AmazonS3 getClient() {
+  protected AmazonS3 getClient() throws ManifoldCFException {
     if (amazonS3 == null) {
       try {
         BasicAWSCredentials awsCreds = new BasicAWSCredentials(
@@ -191,7 +192,8 @@ public class AmazonS3Connector extends B
       catch (Exception e) {
         Logging.connectors
             .error("Error while amazon s3 connectionr", e);
-
+        throw new ManifoldCFException(
+                "Amazon client can not connect at the moment",e.getCause());
       }
     }
     lastSessionFetch = System.currentTimeMillis();
@@ -612,12 +614,12 @@ public class AmazonS3Connector extends B
     if (unparsedBuckets != null && StringUtils.isNotEmpty(unparsedBuckets))
       bucketsToRemove = unparsedBuckets.split(BUCKET_SPLITTER);
     // get seeds
-    GeedSeeds(activities, bucketsToRemove);
+    getSeeds(activities, bucketsToRemove);
 
     return new Long(seedTime).toString();
   }
 
-  private void GeedSeeds(ISeedingActivity activities, String[] buckets)
+  private void getSeeds(ISeedingActivity activities, String[] buckets)
       throws ManifoldCFException, ServiceInterruption {
     GetSeedsThread t = new GetSeedsThread(getClient(), buckets);
     try {
@@ -726,10 +728,7 @@ public class AmazonS3Connector extends B
       IProcessActivity activities, int jobMode,
       boolean usesDefaultAuthority) throws ManifoldCFException,
       ServiceInterruption {
-    AmazonS3 amazons3Client = getClient();
-    if (amazons3Client == null)
-      throw new ManifoldCFException(
-          "Amazon client can not connect at the moment");
+    AmazonS3 amazons3Client = getClient();    
     documentProcess.doProcessDocument(documentIdentifiers, statuses, spec,
         activities, jobMode, usesDefaultAuthority, amazons3Client);
 

Modified: manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/GenericDocumentProcess.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/GenericDocumentProcess.java?rev=1701852&r1=1701851&r2=1701852&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/GenericDocumentProcess.java (original)
+++ manifoldcf/branches/CONNECTORS-1233/connectors/amazons3/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/amazons3/GenericDocumentProcess.java Tue Sep  8 18:53:58 2015
@@ -20,6 +20,7 @@ import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.InterruptedIOException;
 import java.util.Date;
 import java.util.Set;
 
@@ -124,7 +125,7 @@ public class GenericDocumentProcess exte
         String[] users = getUsers(grants);
 
         aclsToUse = users;
-        
+
         sb.append(lastModified.toString());
         versionString = sb.toString();
 
@@ -148,16 +149,14 @@ public class GenericDocumentProcess exte
         String errorDesc = null;
         Long fileSize = null;
 
-        String mimeType = TEXT_PLAIN;//default        
-        long fileLength = s3Obj.getObjectMetadata()
-          .getContentLength();
-        
+        String mimeType = TEXT_PLAIN;// default
+        long fileLength = s3Obj.getObjectMetadata().getContentLength();
+
         if (!activities.checkLengthIndexable(fileLength)) {
           errorCode = activities.EXCLUDED_LENGTH;
           errorDesc = "Excluded because of document length ("
               + fileLength + ")";
-          activities.noDocument(documentIdentifier,
-              versionString);
+          activities.noDocument(documentIdentifier, versionString);
           continue;
         }
 
@@ -166,28 +165,25 @@ public class GenericDocumentProcess exte
 
         if (!activities.checkURLIndexable(documentURI)) {
           errorCode = activities.EXCLUDED_URL;
-          errorDesc = "Excluded because of URL ('"
-              + documentURI + "')";
-          activities.noDocument(documentIdentifier,
-              versionString);
+          errorDesc = "Excluded because of URL ('" + documentURI
+              + "')";
+          activities.noDocument(documentIdentifier, versionString);
           continue;
         }
 
         if (!activities.checkMimeTypeIndexable(mimeType)) {
           errorCode = activities.EXCLUDED_MIMETYPE;
-          errorDesc = "Excluded because of mime type ('"
-              + mimeType + "')";
-          activities.noDocument(documentIdentifier,
-              versionString);
+          errorDesc = "Excluded because of mime type ('" + mimeType
+              + "')";
+          activities.noDocument(documentIdentifier, versionString);
           continue;
         }
-        
+
         if (!activities.checkDateIndexable(lastModified)) {
           errorCode = activities.EXCLUDED_DATE;
-          errorDesc = "Excluded because of date ("
-              + lastModified + ")";
-          activities.noDocument(documentIdentifier,
-              versionString);
+          errorDesc = "Excluded because of date (" + lastModified
+              + ")";
+          activities.noDocument(documentIdentifier, versionString);
           continue;
         }
 
@@ -206,8 +202,7 @@ public class GenericDocumentProcess exte
             denyAclsToUse = new String[] { AmazonS3Connector.GLOBAL_DENY_TOKEN };
           else
             denyAclsToUse = new String[0];
-          rd.setSecurity(
-              RepositoryDocument.SECURITY_TYPE_DOCUMENT,
+          rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT,
               aclsToUse, denyAclsToUse);
 
           rd.setMimeType(mimeType);
@@ -215,12 +210,10 @@ public class GenericDocumentProcess exte
           if (lastModified != null)
             rd.setModifiedDate(lastModified);
 
-
-          //assign the stream
+          // assign the stream
           rd.setBinary(in, fileLength);
-          activities.ingestDocumentWithException(
-              documentIdentifier, versionString,
-              documentURI, rd);
+          activities.ingestDocumentWithException(documentIdentifier,
+              versionString, documentURI, rd);
 
           errorCode = "OK";
           fileSize = new Long(fileLength);
@@ -229,7 +222,6 @@ public class GenericDocumentProcess exte
           handleIOException(e1);
         }
         finally {
-
           // close input stream
           if (in != null)
             IOUtils.closeQuietly(in);
@@ -246,24 +238,39 @@ public class GenericDocumentProcess exte
 
   }
 
-  protected static void handleIOException(final IOException e1)
-    throws ManifoldCFException, ServiceInterruption {
-    Logging.connectors.error("Error while copying stream", e1);
-    // Gotta handle this?? MHL
-    throw new ManifoldCFException("Error copying stream: "+e1.getMessage(),e1);
+  protected static void handleIOException(final IOException e)
+      throws ManifoldCFException, ServiceInterruption {
+    Logging.connectors.error("Error while copying stream", e);
+    if (!(e instanceof java.net.SocketTimeoutException)
+        && (e instanceof InterruptedIOException)) {
+      throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+          ManifoldCFException.INTERRUPTED);
+    }
+    long currentTime = System.currentTimeMillis();
+    throw new ServiceInterruption("IO exception: " + e.getMessage(), e,
+        currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false);
   }
 
-  protected static void handleServiceException(final AmazonServiceException e1)
-    throws ManifoldCFException, ServiceInterruption {
-    // MHL to figure out what to throw?
-    Logging.connectors.error(e1);
-    throw new ManifoldCFException("Amazon service exception: "+e1.getMessage(),e1);
+  protected static void handleServiceException(final AmazonServiceException e)
+      throws ManifoldCFException, ServiceInterruption {
+    Logging.connectors.error("Service exception status : " + e.getStatusCode(),e);
+    
+    if (!e.isRetryable()) {
+      throw new ManifoldCFException("Amazon service exception: "
+          + e.getMessage(), e.getCause());
+    }
+    
+    throw new ServiceInterruption(e.getMessage(), System.currentTimeMillis()+300000L);
   }
-  
-  protected static void handleClientException(final AmazonClientException e1)
-    throws ManifoldCFException, ServiceInterruption {
-    // MHL to figure out what to throw?
-    Logging.connectors.error(e1);
-    throw new ManifoldCFException("Amazon client exception: "+e1.getMessage(),e1);
+
+  protected static void handleClientException(final AmazonClientException e)
+      throws ManifoldCFException, ServiceInterruption {
+    Logging.connectors.error(e);
+    if (!e.isRetryable()) {
+      throw new ManifoldCFException("Amazon client exception: "
+          + e.getMessage(), e.getCause());
+    }
+
+    throw new ServiceInterruption(e.getMessage(), System.currentTimeMillis()+300000L);
   }
 }