You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mb...@apache.org on 2020/06/19 12:32:27 UTC

[asterixdb] 02/06: [ASTERIXDB-2743][EXT] Skip not found files when querying S3 external dataset

This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 619b8c53b95bb4a67a6a527fbf41a63b0522173e
Author: Hussain Towaileb <Hu...@Gmail.com>
AuthorDate: Thu Jun 11 17:49:45 2020 +0300

    [ASTERIXDB-2743][EXT] Skip not found files when querying S3 external dataset
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    - When querying an S3 external dataset, if some files are not found
      due to possiblity getting deleted while querying, skip the filies
      and continue to the next ones instead of failing.
    
    Change-Id: I6d3e691a1714228f1844faf05095cf8aa439ea1d
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/6723
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Hussain Towaileb <hu...@gmail.com>
    Reviewed-by: Ali Alsuliman <al...@gmail.com>
---
 .../external/input/record/reader/aws/AwsS3InputStream.java    | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java
index bcbf540..9e10e6a 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStream.java
@@ -33,13 +33,19 @@ import org.apache.asterix.external.util.ExternalDataConstants;
 import org.apache.asterix.external.util.ExternalDataUtils;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.util.CleanupUtils;
+import org.apache.hyracks.util.LogRedactionUtil;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 
 import software.amazon.awssdk.core.exception.SdkException;
 import software.amazon.awssdk.services.s3.S3Client;
 import software.amazon.awssdk.services.s3.model.GetObjectRequest;
+import software.amazon.awssdk.services.s3.model.NoSuchKeyException;
 
 public class AwsS3InputStream extends AbstractMultipleInputStream {
 
+    private static final Logger LOGGER = LogManager.getLogger();
+
     // Configuration
     private final Map<String, String> configuration;
 
@@ -83,6 +89,11 @@ public class AwsS3InputStream extends AbstractMultipleInputStream {
         // the header, then the S3 stream gets closed in the close method
         try {
             in = s3Client.getObject(getObjectRequest);
+        } catch (NoSuchKeyException ex) {
+            LOGGER.debug(() -> "Key " + LogRedactionUtil.userData(getObjectRequest.key()) + " was not found in bucket "
+                    + getObjectRequest.bucket());
+            nextFileIndex++;
+            return advance();
         } catch (SdkException ex) {
             throw new RuntimeDataException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex.getMessage());
         }