You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/06/11 11:29:45 UTC

[doris] branch branch-2.0-beta updated: [bugfix](s3 fs) fix s3 uri parsing for http/https uri (#20656)

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0-beta
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0-beta by this push:
     new ee97899deb [bugfix](s3 fs) fix s3 uri parsing for http/https uri (#20656)
ee97899deb is described below

commit ee97899deb2dfe681f630c76df65dab76319df2b
Author: Kang <kx...@gmail.com>
AuthorDate: Sun Jun 11 14:00:04 2023 +0800

    [bugfix](s3 fs) fix s3 uri parsing for http/https uri (#20656)
---
 be/src/util/s3_uri.cpp       | 32 +++++++++++++++++++++++++-------
 be/src/util/s3_uri.h         |  3 +++
 be/test/util/s3_uri_test.cpp | 29 +++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/be/src/util/s3_uri.cpp b/be/src/util/s3_uri.cpp
index 08041fece8..c2e4a72098 100644
--- a/be/src/util/s3_uri.cpp
+++ b/be/src/util/s3_uri.cpp
@@ -25,6 +25,9 @@
 
 namespace doris {
 
+const std::string S3URI::_SCHEME_S3 = "s3";
+const std::string S3URI::_SCHEME_HTTP = "http";
+const std::string S3URI::_SCHEME_HTTPS = "https";
 const std::string S3URI::_SCHEME_DELIM = "://";
 const std::string S3URI::_PATH_DELIM = "/";
 const std::string S3URI::_QUERY_DELIM = "?";
@@ -42,15 +45,30 @@ Status S3URI::parse() {
     std::vector<std::string> scheme_split = strings::Split(_location, _SCHEME_DELIM);
     std::string rest;
     if (scheme_split.size() == 2) {
-        // has scheme, eg: s3://bucket1/path/to/file.txt
-        rest = scheme_split[1];
-        std::vector<std::string> authority_split =
-                strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1));
-        if (authority_split.size() != 2) {
+        if (scheme_split[0] == _SCHEME_S3) {
+            // has scheme, eg: s3://bucket1/path/to/file.txt
+            rest = scheme_split[1];
+            std::vector<std::string> authority_split =
+                    strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1));
+            if (authority_split.size() != 2) {
+                return Status::InvalidArgument("Invalid S3 URI: {}", _location);
+            }
+            _bucket = authority_split[0];
+            _key = authority_split[1];
+        } else if (scheme_split[0] == _SCHEME_HTTP || scheme_split[0] == _SCHEME_HTTPS) {
+            // has scheme, eg: http(s)://host/bucket1/path/to/file.txt
+            rest = scheme_split[1];
+            std::vector<std::string> authority_split =
+                    strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 2));
+            if (authority_split.size() != 3) {
+                return Status::InvalidArgument("Invalid S3 HTTP URI: {}", _location);
+            }
+            // authority_split[1] is host
+            _bucket = authority_split[1];
+            _key = authority_split[2];
+        } else {
             return Status::InvalidArgument("Invalid S3 URI: {}", _location);
         }
-        _bucket = authority_split[0];
-        _key = authority_split[1];
     } else if (scheme_split.size() == 1) {
         // no scheme, eg: path/to/file.txt
         _bucket = ""; // unknown
diff --git a/be/src/util/s3_uri.h b/be/src/util/s3_uri.h
index 76ffea5824..769053450b 100644
--- a/be/src/util/s3_uri.h
+++ b/be/src/util/s3_uri.h
@@ -41,6 +41,9 @@ public:
     std::string to_string() const;
 
 private:
+    static const std::string _SCHEME_S3;
+    static const std::string _SCHEME_HTTP;
+    static const std::string _SCHEME_HTTPS;
     static const std::string _SCHEME_DELIM;
     static const std::string _PATH_DELIM;
     static const std::string _QUERY_DELIM;
diff --git a/be/test/util/s3_uri_test.cpp b/be/test/util/s3_uri_test.cpp
index ab5192a8a0..93aa2c72eb 100644
--- a/be/test/util/s3_uri_test.cpp
+++ b/be/test/util/s3_uri_test.cpp
@@ -56,13 +56,42 @@ TEST_F(S3URITest, EncodedString) {
     EXPECT_EQ("path%20to%20file", uri1.get_key());
 }
 
+TEST_F(S3URITest, HttpURI) {
+    std::string p1 = "http://a.b.com/bucket/path/to/file";
+    S3URI uri1(p1);
+    EXPECT_TRUE(uri1.parse());
+    EXPECT_EQ("bucket", uri1.get_bucket());
+    EXPECT_EQ("path/to/file", uri1.get_key());
+
+    std::string p2 = "https://a.b.com/bucket/path/to/file";
+    S3URI uri2(p2);
+    EXPECT_TRUE(uri2.parse());
+    EXPECT_EQ("bucket", uri2.get_bucket());
+    EXPECT_EQ("path/to/file", uri2.get_key());
+}
+
+TEST_F(S3URITest, InvalidSchema) {
+    std::string p1 = "xxx://a.b.com/bucket/path/to/file";
+    S3URI uri1(p1);
+    EXPECT_FALSE(uri1.parse());
+}
+
 TEST_F(S3URITest, MissingKey) {
     std::string p1 = "https://bucket/";
     S3URI uri1(p1);
     EXPECT_FALSE(uri1.parse());
+
     std::string p2 = "s3://bucket/";
     S3URI uri2(p2);
     EXPECT_FALSE(uri2.parse());
+
+    std::string p3 = "http://a.b.com/bucket/";
+    S3URI uri3(p3);
+    EXPECT_FALSE(uri3.parse());
+
+    std::string p4 = "http://a.b.com/";
+    S3URI uri4(p4);
+    EXPECT_FALSE(uri4.parse());
 }
 
 TEST_F(S3URITest, RelativePathing) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org