You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/05/10 08:37:23 UTC

[arrow-rs] branch master updated: Object Store (AWS): Support dynamically resolving S3 bucket region (#4188)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 016e7a07f Object Store (AWS): Support dynamically resolving S3 bucket region (#4188)
016e7a07f is described below

commit 016e7a07f88ca510efe41269500a9130262a99bb
Author: Josh Wiley <jo...@cloudbend.dev>
AuthorDate: Wed May 10 01:37:17 2023 -0700

    Object Store (AWS): Support dynamically resolving S3 bucket region (#4188)
    
    * feat(object_store): resolve aws region using bucket name
    
    * feat(object_store): resolve bucket region as floating fn
    
    * fix(object_store): clippy warnings
    
    * Cleanup error handling
    
    ---------
    
    Co-authored-by: Raphael Taylor-Davies <r....@googlemail.com>
---
 object_store/src/aws/mod.rs | 73 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/object_store/src/aws/mod.rs b/object_store/src/aws/mod.rs
index 34d468f39..bc852ed48 100644
--- a/object_store/src/aws/mod.rs
+++ b/object_store/src/aws/mod.rs
@@ -38,7 +38,7 @@ use futures::stream::BoxStream;
 use futures::TryStreamExt;
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
-use snafu::{OptionExt, ResultExt, Snafu};
+use snafu::{ensure, OptionExt, ResultExt, Snafu};
 use std::collections::BTreeSet;
 use std::ops::Range;
 use std::str::FromStr;
@@ -144,6 +144,18 @@ enum Error {
 
     #[snafu(display("Configuration key: '{}' is not known.", key))]
     UnknownConfigurationKey { key: String },
+
+    #[snafu(display("Bucket '{}' not found", bucket))]
+    BucketNotFound { bucket: String },
+
+    #[snafu(display("Failed to resolve region for bucket '{}'", bucket))]
+    ResolveRegion {
+        bucket: String,
+        source: reqwest::Error,
+    },
+
+    #[snafu(display("Failed to parse the region for bucket '{}'", bucket))]
+    RegionParse { bucket: String },
 }
 
 impl From<Error> for super::Error {
@@ -160,6 +172,38 @@ impl From<Error> for super::Error {
     }
 }
 
+/// Get the bucket region using the [HeadBucket API]. This will fail if the bucket does not exist.
+/// [HeadBucket API]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadBucket.html
+pub async fn resolve_bucket_region(
+    bucket: &str,
+    client_options: &ClientOptions,
+) -> Result<String> {
+    use reqwest::StatusCode;
+
+    let endpoint = format!("https://{}.s3.amazonaws.com", bucket);
+
+    let client = client_options.client()?;
+
+    let response = client
+        .head(&endpoint)
+        .send()
+        .await
+        .context(ResolveRegionSnafu { bucket })?;
+
+    ensure!(
+        response.status() != StatusCode::NOT_FOUND,
+        BucketNotFoundSnafu { bucket }
+    );
+
+    let region = response
+        .headers()
+        .get("x-amz-bucket-region")
+        .and_then(|x| x.to_str().ok())
+        .context(RegionParseSnafu { bucket })?;
+
+    Ok(region.to_string())
+}
+
 /// Interface for [Amazon S3](https://aws.amazon.com/s3/).
 #[derive(Debug)]
 pub struct AmazonS3 {
@@ -1563,3 +1607,30 @@ mod tests {
         }
     }
 }
+
+#[cfg(test)]
+mod s3_resolve_bucket_region_tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn test_private_bucket() {
+        let bucket = "bloxbender";
+
+        let region = resolve_bucket_region(bucket, &ClientOptions::new())
+            .await
+            .unwrap();
+
+        let expected = "us-west-2".to_string();
+
+        assert_eq!(region, expected);
+    }
+
+    #[tokio::test]
+    async fn test_bucket_does_not_exist() {
+        let bucket = "please-dont-exist";
+
+        let result = resolve_bucket_region(bucket, &ClientOptions::new()).await;
+
+        assert!(result.is_err());
+    }
+}