You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/06/11 11:29:45 UTC
[doris] branch branch-2.0-beta updated: [bugfix](s3 fs) fix s3 uri parsing for http/https uri (#20656)
This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0-beta
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0-beta by this push:
new ee97899deb [bugfix](s3 fs) fix s3 uri parsing for http/https uri (#20656)
ee97899deb is described below
commit ee97899deb2dfe681f630c76df65dab76319df2b
Author: Kang <kx...@gmail.com>
AuthorDate: Sun Jun 11 14:00:04 2023 +0800
[bugfix](s3 fs) fix s3 uri parsing for http/https uri (#20656)
---
be/src/util/s3_uri.cpp | 32 +++++++++++++++++++++++++-------
be/src/util/s3_uri.h | 3 +++
be/test/util/s3_uri_test.cpp | 29 +++++++++++++++++++++++++++++
3 files changed, 57 insertions(+), 7 deletions(-)
diff --git a/be/src/util/s3_uri.cpp b/be/src/util/s3_uri.cpp
index 08041fece8..c2e4a72098 100644
--- a/be/src/util/s3_uri.cpp
+++ b/be/src/util/s3_uri.cpp
@@ -25,6 +25,9 @@
namespace doris {
+const std::string S3URI::_SCHEME_S3 = "s3";
+const std::string S3URI::_SCHEME_HTTP = "http";
+const std::string S3URI::_SCHEME_HTTPS = "https";
const std::string S3URI::_SCHEME_DELIM = "://";
const std::string S3URI::_PATH_DELIM = "/";
const std::string S3URI::_QUERY_DELIM = "?";
@@ -42,15 +45,30 @@ Status S3URI::parse() {
std::vector<std::string> scheme_split = strings::Split(_location, _SCHEME_DELIM);
std::string rest;
if (scheme_split.size() == 2) {
- // has scheme, eg: s3://bucket1/path/to/file.txt
- rest = scheme_split[1];
- std::vector<std::string> authority_split =
- strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1));
- if (authority_split.size() != 2) {
+ if (scheme_split[0] == _SCHEME_S3) {
+ // has scheme, eg: s3://bucket1/path/to/file.txt
+ rest = scheme_split[1];
+ std::vector<std::string> authority_split =
+ strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1));
+ if (authority_split.size() != 2) {
+ return Status::InvalidArgument("Invalid S3 URI: {}", _location);
+ }
+ _bucket = authority_split[0];
+ _key = authority_split[1];
+ } else if (scheme_split[0] == _SCHEME_HTTP || scheme_split[0] == _SCHEME_HTTPS) {
+ // has scheme, eg: http(s)://host/bucket1/path/to/file.txt
+ rest = scheme_split[1];
+ std::vector<std::string> authority_split =
+ strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 2));
+ if (authority_split.size() != 3) {
+ return Status::InvalidArgument("Invalid S3 HTTP URI: {}", _location);
+ }
+ // authority_split[1] is host
+ _bucket = authority_split[1];
+ _key = authority_split[2];
+ } else {
return Status::InvalidArgument("Invalid S3 URI: {}", _location);
}
- _bucket = authority_split[0];
- _key = authority_split[1];
} else if (scheme_split.size() == 1) {
// no scheme, eg: path/to/file.txt
_bucket = ""; // unknown
diff --git a/be/src/util/s3_uri.h b/be/src/util/s3_uri.h
index 76ffea5824..769053450b 100644
--- a/be/src/util/s3_uri.h
+++ b/be/src/util/s3_uri.h
@@ -41,6 +41,9 @@ public:
std::string to_string() const;
private:
+ static const std::string _SCHEME_S3;
+ static const std::string _SCHEME_HTTP;
+ static const std::string _SCHEME_HTTPS;
static const std::string _SCHEME_DELIM;
static const std::string _PATH_DELIM;
static const std::string _QUERY_DELIM;
diff --git a/be/test/util/s3_uri_test.cpp b/be/test/util/s3_uri_test.cpp
index ab5192a8a0..93aa2c72eb 100644
--- a/be/test/util/s3_uri_test.cpp
+++ b/be/test/util/s3_uri_test.cpp
@@ -56,13 +56,42 @@ TEST_F(S3URITest, EncodedString) {
EXPECT_EQ("path%20to%20file", uri1.get_key());
}
+TEST_F(S3URITest, HttpURI) {
+ std::string p1 = "http://a.b.com/bucket/path/to/file";
+ S3URI uri1(p1);
+ EXPECT_TRUE(uri1.parse());
+ EXPECT_EQ("bucket", uri1.get_bucket());
+ EXPECT_EQ("path/to/file", uri1.get_key());
+
+ std::string p2 = "https://a.b.com/bucket/path/to/file";
+ S3URI uri2(p2);
+ EXPECT_TRUE(uri2.parse());
+ EXPECT_EQ("bucket", uri2.get_bucket());
+ EXPECT_EQ("path/to/file", uri2.get_key());
+}
+
+TEST_F(S3URITest, InvalidSchema) {
+ std::string p1 = "xxx://a.b.com/bucket/path/to/file";
+ S3URI uri1(p1);
+ EXPECT_FALSE(uri1.parse());
+}
+
TEST_F(S3URITest, MissingKey) {
std::string p1 = "https://bucket/";
S3URI uri1(p1);
EXPECT_FALSE(uri1.parse());
+
std::string p2 = "s3://bucket/";
S3URI uri2(p2);
EXPECT_FALSE(uri2.parse());
+
+ std::string p3 = "http://a.b.com/bucket/";
+ S3URI uri3(p3);
+ EXPECT_FALSE(uri3.parse());
+
+ std::string p4 = "http://a.b.com/";
+ S3URI uri4(p4);
+ EXPECT_FALSE(uri4.parse());
}
TEST_F(S3URITest, RelativePathing) {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org