You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/13 20:36:58 UTC

[arrow-rs] branch master updated: object_store: fix: Incorrect parsing of https Path Style S3 url (#4082)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 485696e3e object_store: fix: Incorrect parsing of https Path Style S3 url (#4082)
485696e3e is described below

commit 485696e3e4b555d9c84300e5fa788df351b83936
Author: Robert Pack <42...@users.noreply.github.com>
AuthorDate: Thu Apr 13 22:36:52 2023 +0200

    object_store: fix: Incorrect parsing of https Path Style S3 url (#4082)
    
    * fix: parse reagion from path-style urls, not bucket
    
    * fix: test
    
    * fix: parse s3 bucket from first path segment
    
    * test: add test for parsing bucket from path style url
---
 object_store/src/aws/mod.rs | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/object_store/src/aws/mod.rs b/object_store/src/aws/mod.rs
index de62360d0..34d468f39 100644
--- a/object_store/src/aws/mod.rs
+++ b/object_store/src/aws/mod.rs
@@ -805,12 +805,16 @@ impl AmazonS3Builder {
     fn parse_url(&mut self, url: &str) -> Result<()> {
         let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
         let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
-
         match parsed.scheme() {
             "s3" | "s3a" => self.bucket_name = Some(host.to_string()),
             "https" => match host.splitn(4, '.').collect_tuple() {
-                Some(("s3", bucket, "amazonaws", "com")) => {
-                    self.bucket_name = Some(bucket.to_string());
+                Some(("s3", region, "amazonaws", "com")) => {
+                    self.region = Some(region.to_string());
+                    if let Some(bucket) =
+                        parsed.path_segments().and_then(|mut path| path.next())
+                    {
+                        self.bucket_name = Some(bucket.into());
+                    }
                 }
                 Some((bucket, "s3", region, "amazonaws.com")) => {
                     self.bucket_name = Some(bucket.to_string());
@@ -1519,10 +1523,24 @@ mod tests {
 
         let mut builder = AmazonS3Builder::new();
         builder
-            .parse_url("https://s3.bucket.amazonaws.com")
+            .parse_url("https://s3.region.amazonaws.com")
+            .unwrap();
+        assert_eq!(builder.region, Some("region".to_string()));
+
+        let mut builder = AmazonS3Builder::new();
+        builder
+            .parse_url("https://s3.region.amazonaws.com/bucket")
             .unwrap();
+        assert_eq!(builder.region, Some("region".to_string()));
         assert_eq!(builder.bucket_name, Some("bucket".to_string()));
 
+        let mut builder = AmazonS3Builder::new();
+        builder
+            .parse_url("https://s3.region.amazonaws.com/bucket.with.dot/path")
+            .unwrap();
+        assert_eq!(builder.region, Some("region".to_string()));
+        assert_eq!(builder.bucket_name, Some("bucket.with.dot".to_string()));
+
         let mut builder = AmazonS3Builder::new();
         builder
             .parse_url("https://bucket.s3.region.amazonaws.com")