You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pc...@apache.org on 2022/08/30 03:46:42 UTC

[arrow] branch master updated: ARROW-17079: [C++] Improve error messages for AWS S3 calls (#13979)

This is an automated email from the ASF dual-hosted git repository.

pcmoritz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new b43c6f6b18 ARROW-17079: [C++] Improve error messages for AWS S3 calls (#13979)
b43c6f6b18 is described below

commit b43c6f6b18acee019a500bc6a05af18c07f2ca79
Author: Philipp Moritz <pc...@gmail.com>
AuthorDate: Mon Aug 29 20:46:34 2022 -0700

    ARROW-17079: [C++] Improve error messages for AWS S3 calls (#13979)
    
    First part of improving error messages from S3 operations. We include the operation that failed in the error message now. This makes it easier to debug problems (e.g. bucket permissions or other infrastructure hickups) because it allows to re-run the operation that failed with the AWS CLI for further diagnosing what is going wrong.
    
    Example (changes in bold):
    
    > When getting information for key 'test.csv' in bucket 'pcmoritz-test-bucket-arrow-errors': AWS Error [code 15] **during HeadObject operation**: No response body.
    
    Authored-by: Philipp Moritz <pc...@gmail.com>
    Signed-off-by: Philipp Moritz <pc...@gmail.com>
---
 cpp/src/arrow/filesystem/s3_internal.h     | 46 +++++++++++-----------------
 cpp/src/arrow/filesystem/s3fs.cc           | 48 +++++++++++++++---------------
 cpp/src/arrow/filesystem/s3fs_benchmark.cc |  6 ++--
 cpp/src/arrow/filesystem/s3fs_test.cc      | 14 ++++-----
 4 files changed, 52 insertions(+), 62 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3_internal.h b/cpp/src/arrow/filesystem/s3_internal.h
index ceb92b5548..ae938c1760 100644
--- a/cpp/src/arrow/filesystem/s3_internal.h
+++ b/cpp/src/arrow/filesystem/s3_internal.h
@@ -39,19 +39,6 @@ namespace arrow {
 namespace fs {
 namespace internal {
 
-#define ARROW_AWS_ASSIGN_OR_RAISE_IMPL(outcome_name, lhs, rexpr) \
-  auto outcome_name = (rexpr);                                   \
-  if (!outcome_name.IsSuccess()) {                               \
-    return ErrorToStatus(outcome_name.GetError());               \
-  }                                                              \
-  lhs = std::move(outcome_name).GetResultWithOwnership();
-
-#define ARROW_AWS_ASSIGN_OR_RAISE_NAME(x, y) ARROW_CONCAT(x, y)
-
-#define ARROW_AWS_ASSIGN_OR_RAISE(lhs, rexpr) \
-  ARROW_AWS_ASSIGN_OR_RAISE_IMPL(             \
-      ARROW_AWS_ASSIGN_OR_RAISE_NAME(_aws_error_or_value, __COUNTER__), lhs, rexpr);
-
 // XXX Should we expose this at some point?
 enum class S3Backend { Amazon, Minio, Other };
 
@@ -104,60 +91,63 @@ inline bool IsAlreadyExists(const Aws::Client::AWSError<Aws::S3::S3Errors>& erro
 // TODO qualify error messages with a prefix indicating context
 // (e.g. "When completing multipart upload to bucket 'xxx', key 'xxx': ...")
 template <typename ErrorType>
-Status ErrorToStatus(const std::string& prefix,
+Status ErrorToStatus(const std::string& prefix, const std::string& operation,
                      const Aws::Client::AWSError<ErrorType>& error) {
   // XXX Handle fine-grained error types
   // See
   // https://sdk.amazonaws.com/cpp/api/LATEST/namespace_aws_1_1_s3.html#ae3f82f8132b619b6e91c88a9f1bde371
   return Status::IOError(prefix, "AWS Error [code ",
-                         static_cast<int>(error.GetErrorType()),
-                         "]: ", error.GetMessage());
+                         static_cast<int>(error.GetErrorType()), "] during ", operation,
+                         " operation: ", error.GetMessage());
 }
 
 template <typename ErrorType, typename... Args>
-Status ErrorToStatus(const std::tuple<Args&...>& prefix,
+Status ErrorToStatus(const std::tuple<Args&...>& prefix, const std::string& operation,
                      const Aws::Client::AWSError<ErrorType>& error) {
   std::stringstream ss;
   ::arrow::internal::PrintTuple(&ss, prefix);
-  return ErrorToStatus(ss.str(), error);
+  return ErrorToStatus(ss.str(), operation, error);
 }
 
 template <typename ErrorType>
-Status ErrorToStatus(const Aws::Client::AWSError<ErrorType>& error) {
-  return ErrorToStatus(std::string(), error);
+Status ErrorToStatus(const std::string& operation,
+                     const Aws::Client::AWSError<ErrorType>& error) {
+  return ErrorToStatus(std::string(), operation, error);
 }
 
 template <typename AwsResult, typename Error>
-Status OutcomeToStatus(const std::string& prefix,
+Status OutcomeToStatus(const std::string& prefix, const std::string& operation,
                        const Aws::Utils::Outcome<AwsResult, Error>& outcome) {
   if (outcome.IsSuccess()) {
     return Status::OK();
   } else {
-    return ErrorToStatus(prefix, outcome.GetError());
+    return ErrorToStatus(prefix, operation, outcome.GetError());
   }
 }
 
 template <typename AwsResult, typename Error, typename... Args>
-Status OutcomeToStatus(const std::tuple<Args&...>& prefix,
+Status OutcomeToStatus(const std::tuple<Args&...>& prefix, const std::string& operation,
                        const Aws::Utils::Outcome<AwsResult, Error>& outcome) {
   if (outcome.IsSuccess()) {
     return Status::OK();
   } else {
-    return ErrorToStatus(prefix, outcome.GetError());
+    return ErrorToStatus(prefix, operation, outcome.GetError());
   }
 }
 
 template <typename AwsResult, typename Error>
-Status OutcomeToStatus(const Aws::Utils::Outcome<AwsResult, Error>& outcome) {
-  return OutcomeToStatus(std::string(), outcome);
+Status OutcomeToStatus(const std::string& operation,
+                       const Aws::Utils::Outcome<AwsResult, Error>& outcome) {
+  return OutcomeToStatus(std::string(), operation, outcome);
 }
 
 template <typename AwsResult, typename Error>
-Result<AwsResult> OutcomeToResult(Aws::Utils::Outcome<AwsResult, Error> outcome) {
+Result<AwsResult> OutcomeToResult(const std::string& operation,
+                                  Aws::Utils::Outcome<AwsResult, Error> outcome) {
   if (outcome.IsSuccess()) {
     return std::move(outcome).GetResultWithOwnership();
   } else {
-    return ErrorToStatus(outcome.GetError());
+    return ErrorToStatus(operation, outcome.GetError());
   }
 }
 
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index fb933e4d4d..878f54812b 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -629,7 +629,7 @@ class S3Client : public Aws::S3::S3Client {
       } else if (!outcome.IsSuccess()) {
         return ErrorToStatus(std::forward_as_tuple("When resolving region for bucket '",
                                                    request.GetBucket(), "': "),
-                             outcome.GetError());
+                             "HeadBucket", outcome.GetError());
       } else {
         return Status::IOError("When resolving region for bucket '", request.GetBucket(),
                                "': missing 'x-amz-bucket-region' header in response");
@@ -945,7 +945,7 @@ Result<S3Model::GetObjectResult> GetObjectRange(Aws::S3::S3Client* client,
   req.SetKey(ToAwsString(path.key));
   req.SetRange(ToAwsString(FormatRange(start, length)));
   req.SetResponseStreamFactory(AwsWriteableStreamFactory(out, length));
-  return OutcomeToResult(client->GetObject(req));
+  return OutcomeToResult("GetObject", client->GetObject(req));
 }
 
 template <typename ObjectResult>
@@ -1076,7 +1076,7 @@ class ObjectInputFile final : public io::RandomAccessFile {
         return ErrorToStatus(
             std::forward_as_tuple("When reading information for key '", path_.key,
                                   "' in bucket '", path_.bucket, "': "),
-            outcome.GetError());
+            "HeadObject", outcome.GetError());
       }
     }
     content_length_ = outcome.GetResult().GetContentLength();
@@ -1250,7 +1250,7 @@ class ObjectOutputStream final : public io::OutputStream {
       return ErrorToStatus(
           std::forward_as_tuple("When initiating multiple part upload for key '",
                                 path_.key, "' in bucket '", path_.bucket, "': "),
-          outcome.GetError());
+          "CreateMultipartUpload", outcome.GetError());
     }
     upload_id_ = outcome.GetResult().GetUploadId();
     upload_state_ = std::make_shared<UploadState>();
@@ -1273,7 +1273,7 @@ class ObjectOutputStream final : public io::OutputStream {
       return ErrorToStatus(
           std::forward_as_tuple("When aborting multiple part upload for key '", path_.key,
                                 "' in bucket '", path_.bucket, "': "),
-          outcome.GetError());
+          "AbortMultipartUpload", outcome.GetError());
     }
     current_part_.reset();
     client_ = nullptr;
@@ -1321,7 +1321,7 @@ class ObjectOutputStream final : public io::OutputStream {
         return ErrorToStatus(
             std::forward_as_tuple("When completing multiple part upload for key '",
                                   path_.key, "' in bucket '", path_.bucket, "': "),
-            outcome.GetError());
+            "CompleteMultipartUpload", outcome.GetError());
       }
 
       client_ = nullptr;
@@ -1514,7 +1514,7 @@ class ObjectOutputStream final : public io::OutputStream {
     return ErrorToStatus(
         std::forward_as_tuple("When uploading part for key '", req.GetKey(),
                               "' in bucket '", req.GetBucket(), "': "),
-        outcome.GetError());
+        "UploadPart", outcome.GetError());
   }
 
  protected:
@@ -1744,7 +1744,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
       if (!IsNotFound(outcome.GetError())) {
         return ErrorToStatus(std::forward_as_tuple(
                                  "When testing for existence of bucket '", bucket, "': "),
-                             outcome.GetError());
+                             "HeadBucket", outcome.GetError());
       }
       return false;
     }
@@ -1763,7 +1763,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
         return Status::OK();
       } else if (!IsNotFound(outcome.GetError())) {
         return ErrorToStatus(
-            std::forward_as_tuple("When creating bucket '", bucket, "': "),
+            std::forward_as_tuple("When creating bucket '", bucket, "': "), "HeadBucket",
             outcome.GetError());
       }
 
@@ -1790,7 +1790,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
     auto outcome = client_->CreateBucket(req);
     if (!outcome.IsSuccess() && !IsAlreadyExists(outcome.GetError())) {
       return ErrorToStatus(std::forward_as_tuple("When creating bucket '", bucket, "': "),
-                           outcome.GetError());
+                           "CreateBucket", outcome.GetError());
     }
     return Status::OK();
   }
@@ -1803,7 +1803,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
     req.SetBody(std::make_shared<std::stringstream>(""));
     return OutcomeToStatus(
         std::forward_as_tuple("When creating key '", key, "' in bucket '", bucket, "': "),
-        client_->PutObject(req));
+        "PutObject", client_->PutObject(req));
   }
 
   Status CreateEmptyDir(const std::string& bucket, const std::string& key) {
@@ -1817,7 +1817,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
     req.SetKey(ToAwsString(key));
     return OutcomeToStatus(
         std::forward_as_tuple("When delete key '", key, "' in bucket '", bucket, "': "),
-        client_->DeleteObject(req));
+        "DeleteObject", client_->DeleteObject(req));
   }
 
   Status CopyObject(const S3Path& src_path, const S3Path& dest_path) {
@@ -1831,7 +1831,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
         std::forward_as_tuple("When copying key '", src_path.key, "' in bucket '",
                               src_path.bucket, "' to key '", dest_path.key,
                               "' in bucket '", dest_path.bucket, "': "),
-        client_->CopyObject(req));
+        "CopyObject", client_->CopyObject(req));
   }
 
   // On Minio, an empty "directory" doesn't satisfy the same API requests as
@@ -1885,7 +1885,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
     }
     return ErrorToStatus(std::forward_as_tuple("When reading information for key '", key,
                                                "' in bucket '", bucket, "': "),
-                         outcome.GetError());
+                         "HeadObject", outcome.GetError());
   }
 
   Result<bool> IsEmptyDirectory(
@@ -1911,7 +1911,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
     return ErrorToStatus(
         std::forward_as_tuple("When listing objects under key '", path.key,
                               "' in bucket '", path.bucket, "': "),
-        outcome.GetError());
+        "ListObjectsV2", outcome.GetError());
   }
 
   Status CheckNestingDepth(int32_t nesting_depth) {
@@ -1991,7 +1991,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
       }
       return ErrorToStatus(std::forward_as_tuple("When listing objects under key '", key,
                                                  "' in bucket '", bucket, "': "),
-                           error);
+                           "ListObjectsV2", error);
     };
 
     auto handle_recursion = [&](int32_t nesting_depth) -> Result<bool> {
@@ -2029,7 +2029,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
       }
       return ErrorToStatus(std::forward_as_tuple("When listing objects under key '", key,
                                                  "' in bucket '", bucket, "': "),
-                           error);
+                           "ListObjectsV2", error);
     };
 
     auto handle_recursion = [producer, select,
@@ -2091,7 +2091,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
     auto handle_error = [=](const AWSError<S3Errors>& error) -> Status {
       return ErrorToStatus(std::forward_as_tuple("When listing objects under key '", key,
                                                  "' in bucket '", bucket, "': "),
-                           error);
+                           "ListObjectsV2", error);
     };
 
     auto self = shared_from_this();
@@ -2113,7 +2113,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
 
       Status operator()(const S3Model::DeleteObjectsOutcome& outcome) {
         if (!outcome.IsSuccess()) {
-          return ErrorToStatus(outcome.GetError());
+          return ErrorToStatus("DeleteObjects", outcome.GetError());
         }
         // Also need to check per-key errors, even on successful outcome
         // See
@@ -2201,7 +2201,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
   static Result<std::vector<std::string>> ProcessListBuckets(
       const Aws::S3::Model::ListBucketsOutcome& outcome) {
     if (!outcome.IsSuccess()) {
-      return ErrorToStatus(std::forward_as_tuple("When listing buckets: "),
+      return ErrorToStatus(std::forward_as_tuple("When listing buckets: "), "ListBuckets",
                            outcome.GetError());
     }
     std::vector<std::string> buckets;
@@ -2308,7 +2308,7 @@ Result<FileInfo> S3FileSystem::GetFileInfo(const std::string& s) {
         return ErrorToStatus(
             std::forward_as_tuple("When getting information for bucket '", path.bucket,
                                   "': "),
-            outcome.GetError());
+            "HeadBucket", outcome.GetError());
       }
       info.set_type(FileType::NotFound);
       return info;
@@ -2333,7 +2333,7 @@ Result<FileInfo> S3FileSystem::GetFileInfo(const std::string& s) {
       return ErrorToStatus(
           std::forward_as_tuple("When getting information for key '", path.key,
                                 "' in bucket '", path.bucket, "': "),
-          outcome.GetError());
+          "HeadObject", outcome.GetError());
     }
     // Not found => perhaps it's an empty "directory"
     ARROW_ASSIGN_OR_RAISE(bool is_dir, impl_->IsEmptyDirectory(path, &outcome));
@@ -2478,7 +2478,7 @@ Status S3FileSystem::DeleteDir(const std::string& s) {
     req.SetBucket(ToAwsString(path.bucket));
     return OutcomeToStatus(
         std::forward_as_tuple("When deleting bucket '", path.bucket, "': "),
-        impl_->client_->DeleteBucket(req));
+        "DeleteBucket", impl_->client_->DeleteBucket(req));
   } else if (path.key.empty()) {
     return Status::IOError("Would delete bucket '", path.bucket, "'. ",
                            "To delete buckets, enable the allow_bucket_deletion option.");
@@ -2536,7 +2536,7 @@ Status S3FileSystem::DeleteFile(const std::string& s) {
       return ErrorToStatus(
           std::forward_as_tuple("When getting information for key '", path.key,
                                 "' in bucket '", path.bucket, "': "),
-          outcome.GetError());
+          "HeadObject", outcome.GetError());
     }
   }
   // Object found, delete it
diff --git a/cpp/src/arrow/filesystem/s3fs_benchmark.cc b/cpp/src/arrow/filesystem/s3fs_benchmark.cc
index c2922b4c28..2121642963 100644
--- a/cpp/src/arrow/filesystem/s3fs_benchmark.cc
+++ b/cpp/src/arrow/filesystem/s3fs_benchmark.cc
@@ -125,14 +125,14 @@ class MinioFixture : public benchmark::Fixture {
   Status MakeBucket() {
     Aws::S3::Model::HeadBucketRequest head;
     head.SetBucket(ToAwsString(bucket_));
-    const Status st = OutcomeToStatus(client_->HeadBucket(head));
+    const Status st = OutcomeToStatus("HeadBucket", client_->HeadBucket(head));
     if (st.ok()) {
       // Bucket exists already
       return st;
     }
     Aws::S3::Model::CreateBucketRequest req;
     req.SetBucket(ToAwsString(bucket_));
-    return OutcomeToStatus(client_->CreateBucket(req));
+    return OutcomeToStatus("CreateBucket", client_->CreateBucket(req));
   }
 
   /// Make an object with dummy data.
@@ -141,7 +141,7 @@ class MinioFixture : public benchmark::Fixture {
     req.SetBucket(ToAwsString(bucket_));
     req.SetKey(ToAwsString(name));
     req.SetBody(std::make_shared<std::stringstream>(std::string(size, 'a')));
-    return OutcomeToStatus(client_->PutObject(req));
+    return OutcomeToStatus("PutObject", client_->PutObject(req));
   }
 
   /// Make an object with Parquet data.
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index 1d89e2da71..69ad5306a0 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -413,28 +413,28 @@ class TestS3FS : public S3TestMixin {
     {
       Aws::S3::Model::CreateBucketRequest req;
       req.SetBucket(ToAwsString("bucket"));
-      ASSERT_OK(OutcomeToStatus(client_->CreateBucket(req)));
+      ASSERT_OK(OutcomeToStatus("CreateBucket", client_->CreateBucket(req)));
       req.SetBucket(ToAwsString("empty-bucket"));
-      ASSERT_OK(OutcomeToStatus(client_->CreateBucket(req)));
+      ASSERT_OK(OutcomeToStatus("CreateBucket", client_->CreateBucket(req)));
     }
     {
       Aws::S3::Model::PutObjectRequest req;
       req.SetBucket(ToAwsString("bucket"));
       req.SetKey(ToAwsString("emptydir/"));
       req.SetBody(std::make_shared<std::stringstream>(""));
-      ASSERT_OK(OutcomeToStatus(client_->PutObject(req)));
+      ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
       // NOTE: no need to create intermediate "directories" somedir/ and
       // somedir/subdir/
       req.SetKey(ToAwsString("somedir/subdir/subfile"));
       req.SetBody(std::make_shared<std::stringstream>("sub data"));
-      ASSERT_OK(OutcomeToStatus(client_->PutObject(req)));
+      ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
       req.SetKey(ToAwsString("somefile"));
       req.SetBody(std::make_shared<std::stringstream>("some data"));
       req.SetContentType("x-arrow/test");
-      ASSERT_OK(OutcomeToStatus(client_->PutObject(req)));
+      ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
       req.SetKey(ToAwsString("otherdir/1/2/3/otherfile"));
       req.SetBody(std::make_shared<std::stringstream>("other data"));
-      ASSERT_OK(OutcomeToStatus(client_->PutObject(req)));
+      ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
     }
   }
 
@@ -1203,7 +1203,7 @@ class TestS3FSGeneric : public S3TestMixin, public GenericFileSystemTest {
     {
       Aws::S3::Model::CreateBucketRequest req;
       req.SetBucket(ToAwsString("s3fs-test-bucket"));
-      ASSERT_OK(OutcomeToStatus(client_->CreateBucket(req)));
+      ASSERT_OK(OutcomeToStatus("CreateBucket", client_->CreateBucket(req)));
     }
 
     options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key());