You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/07/20 12:49:02 UTC

[doris] branch master updated: [fix](multi-catalog)fix minio default region and throw minio error msg, support s3 bucket root path (#21994)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7d488688b4 [fix](multi-catalog)fix minio default region and throw minio error msg, support s3 bucket root path (#21994)
7d488688b4 is described below

commit 7d488688b4c21ca1eb98b8afdf9144642b18bcdc
Author: slothever <18...@users.noreply.github.com>
AuthorDate: Thu Jul 20 20:48:55 2023 +0800

    [fix](multi-catalog)fix minio default region and throw minio error msg, support s3 bucket root path (#21994)
    
    1. check minio region, set default region if user region is not provided, and throw minio error msg
    2. support read root path s3://bucket1
    3. fix max compute public access
---
 be/src/util/s3_uri.cpp                                     |  5 +++--
 docs/en/docs/lakehouse/multi-catalog/hive.md               |  1 +
 docs/en/docs/lakehouse/multi-catalog/iceberg.md            |  1 +
 .../en/docs/sql-manual/sql-functions/table-functions/s3.md |  2 ++
 .../Backup-and-Restore/CREATE-REPOSITORY.md                |  1 +
 docs/zh-CN/docs/lakehouse/multi-catalog/hive.md            |  1 +
 docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md         |  1 +
 .../docs/sql-manual/sql-functions/table-functions/s3.md    |  2 ++
 .../org/apache/doris/maxcompute/MaxComputeTableScan.java   | 12 +++++++-----
 .../apache/doris/datasource/MaxComputeExternalCatalog.java | 14 +++++++++-----
 .../doris/datasource/property/PropertyConverter.java       |  5 +++--
 .../doris/datasource/property/S3ClientBEProperties.java    |  6 +++---
 .../datasource/property/constants/MinioProperties.java     |  1 +
 .../doris/datasource/property/constants/S3Properties.java  |  2 +-
 .../main/java/org/apache/doris/fs/remote/S3FileSystem.java | 12 ++++++++++++
 .../apache/doris/tablefunction/S3TableValuedFunction.java  |  8 ++++++--
 16 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/be/src/util/s3_uri.cpp b/be/src/util/s3_uri.cpp
index c2e4a72098..3162b63f0e 100644
--- a/be/src/util/s3_uri.cpp
+++ b/be/src/util/s3_uri.cpp
@@ -50,11 +50,12 @@ Status S3URI::parse() {
             rest = scheme_split[1];
             std::vector<std::string> authority_split =
                     strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1));
-            if (authority_split.size() != 2) {
+            if (authority_split.size() < 1) {
                 return Status::InvalidArgument("Invalid S3 URI: {}", _location);
             }
             _bucket = authority_split[0];
-            _key = authority_split[1];
+            // support s3://bucket1
+            _key = authority_split.size() == 1 ? "/" : authority_split[1];
         } else if (scheme_split[0] == _SCHEME_HTTP || scheme_split[0] == _SCHEME_HTTPS) {
             // has scheme, eg: http(s)://host/bucket1/path/to/file.txt
             rest = scheme_split[1];
diff --git a/docs/en/docs/lakehouse/multi-catalog/hive.md b/docs/en/docs/lakehouse/multi-catalog/hive.md
index 9daacbcd22..26c1b98042 100644
--- a/docs/en/docs/lakehouse/multi-catalog/hive.md
+++ b/docs/en/docs/lakehouse/multi-catalog/hive.md
@@ -119,6 +119,7 @@ CREATE CATALOG hive PROPERTIES (
     "type"="hms",
     "hive.metastore.uris" = "thrift://172.0.0.1:9083",
     "s3.endpoint" = "s3.us-east-1.amazonaws.com",
+    "s3.region" = "us-east-1",
     "s3.access_key" = "ak",
     "s3.secret_key" = "sk"
     "use_path_style" = "true"
diff --git a/docs/en/docs/lakehouse/multi-catalog/iceberg.md b/docs/en/docs/lakehouse/multi-catalog/iceberg.md
index 18d66a5350..4509fbc4fa 100644
--- a/docs/en/docs/lakehouse/multi-catalog/iceberg.md
+++ b/docs/en/docs/lakehouse/multi-catalog/iceberg.md
@@ -126,6 +126,7 @@ If the data is stored on S3, the following parameters can be used in properties:
 "s3.access_key" = "ak"
 "s3.secret_key" = "sk"
 "s3.endpoint" = "http://endpoint-uri"
+"s3.region" = "your-region"
 "s3.credentials.provider" = "provider-class-name" // 可选,默认凭证类基于BasicAWSCredentials实现。
 ```
 
diff --git a/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md b/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
index dcdd092bd2..e793daec79 100644
--- a/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
+++ b/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
@@ -40,6 +40,7 @@ s3(
   "uri" = "..",
   "s3.access_key" = "...",
   "s3.secret_key" = "...",
+  "s3.region" = "...",
   "format" = "csv",
   "keyn" = "valuen",
   ...
@@ -55,6 +56,7 @@ Related parameters for accessing S3:
 - `uri`: (required) The S3 tvf will decide whether to use the path style access method according to the `use_path_style` parameter, and the default access method is the virtual-hosted style method.
 - `s3.access_key`: (required)
 - `s3.secret_key`: (required)
+- `s3.region`: (optional). Mandatory if the Minio has set another region. Otherwise, `us-east-1` is used by default.
 - `s3.session_token`: (optional)
 - `use_path_style`: (optional) default `false` . The S3 SDK uses the virtual-hosted style by default. However, some object storage systems may not be enabled or support virtual-hosted style access. At this time, we can add the `use_path_style` parameter to force the use of path style access method.
 
diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md b/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
index 438bac678b..2308fd6b9c 100644
--- a/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
+++ b/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
@@ -102,6 +102,7 @@ ON LOCATION "s3://s3-repo"
 PROPERTIES
 (
     "s3.endpoint" = "http://s3-REGION.amazonaws.com",
+    "s3.region" = "s3-REGION",
     "s3.access_key" = "AWS_ACCESS_KEY",
     "s3.secret_key"="AWS_SECRET_KEY",
     "s3.region" = "REGION"
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
index 25c5c6bd26..779442b81a 100644
--- a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
@@ -118,6 +118,7 @@ CREATE CATALOG hive PROPERTIES (
     "type"="hms",
     "hive.metastore.uris" = "thrift://172.0.0.1:9083",
     "s3.endpoint" = "s3.us-east-1.amazonaws.com",
+    "s3.region" = "us-east-1",
     "s3.access_key" = "ak",
     "s3.secret_key" = "sk"
     "use_path_style" = "true"
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md
index bf5333388b..c93fca28e5 100644
--- a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md
@@ -126,6 +126,7 @@ CREATE CATALOG iceberg PROPERTIES (
 "s3.access_key" = "ak"
 "s3.secret_key" = "sk"
 "s3.endpoint" = "http://endpoint-uri"
+"s3.region" = "your-region"
 "s3.credentials.provider" = "provider-class-name" // 可选,默认凭证类基于BasicAWSCredentials实现。
 ```
 
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
index fe1f246bd9..f5c65e3f41 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
@@ -45,6 +45,7 @@ s3(
   "uri" = "..",
   "s3.access_key" = "...",
   "s3.secret_key" = "...",
+  "s3.region" = "...",
   "format" = "csv",
   "keyn" = "valuen",
   ...
@@ -58,6 +59,7 @@ S3 tvf中的每一个参数都是一个 `"key"="value"` 对。
 - `uri`: (必填) 访问S3的uri,S3表函数会根据 `use_path_style` 参数来决定是否使用 path style 访问方式,默认为 virtual-hosted style 方式
 - `s3.access_key`: (必填)
 - `s3.secret_key`: (必填)
+- `s3.region`: (选填)。如果Minio服务设置了其他的region,那么必填,否则默认使用`us-east-1`。
 - `s3.session_token`: (选填)
 - `use_path_style`:(选填) 默认为`false` 。S3 SDK 默认使用 virtual-hosted style 方式。但某些对象存储系统可能没开启或没支持virtual-hosted style 方式的访问,此时我们可以添加 use_path_style 参数来强制使用 path style 方式。比如 `minio`默认情况下只允许`path style`访问方式,所以在访问minio时要加上`use_path_style=true`。
 
diff --git a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java
index 5102330a4d..da67196a3a 100644
--- a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java
+++ b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java
@@ -29,7 +29,7 @@ import java.io.IOException;
  * MaxComputeJ JniScanner. BE will read data from the scanner object.
  */
 public class MaxComputeTableScan {
-    private static final String odpsUrlTemplate = "http://service.{}.maxcompute.aliyun.com/api";
+    private static final String odpsUrlTemplate = "http://service.{}.maxcompute.aliyun-inc.com/api";
     private static final String tunnelUrlTemplate = "http://dt.{}.maxcompute.aliyun-inc.com";
     private final Odps odps;
     private final TableTunnel tunnel;
@@ -43,13 +43,15 @@ public class MaxComputeTableScan {
         this.project = project;
         this.table = table;
         odps = new Odps(new AliyunAccount(accessKey, secretKey));
-        odps.setEndpoint(odpsUrlTemplate.replace("{}", region));
-        odps.setDefaultProject(this.project);
-        tunnel = new TableTunnel(odps);
+        String odpsUrl = odpsUrlTemplate.replace("{}", region);
         String tunnelUrl = tunnelUrlTemplate.replace("{}", region);
         if (enablePublicAccess) {
-            tunnelUrl = tunnelUrlTemplate.replace("-inc", "");
+            odpsUrl = odpsUrl.replace("-inc", "");
+            tunnelUrl = tunnelUrl.replace("-inc", "");
         }
+        odps.setEndpoint(odpsUrl);
+        odps.setDefaultProject(this.project);
+        tunnel = new TableTunnel(odps);
         tunnel.setEndpoint(tunnelUrl);
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java
index c62abf9ada..bfe6b2c18c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java
@@ -43,11 +43,11 @@ public class MaxComputeExternalCatalog extends ExternalCatalog {
     private String secretKey;
     @SerializedName(value = "publicAccess")
     private boolean enablePublicAccess;
-    private static final String odpsUrlTemplate = "http://service.{}.maxcompute.aliyun.com/api";
+    private static final String odpsUrlTemplate = "http://service.{}.maxcompute.aliyun-inc.com/api";
     private static final String tunnelUrlTemplate = "http://dt.{}.maxcompute.aliyun-inc.com";
 
     public MaxComputeExternalCatalog(long catalogId, String name, String resource, Map<String, String> props,
-            String comment) {
+                                     String comment) {
         super(catalogId, name, InitCatalogLog.Type.MAX_COMPUTE, comment);
         catalogProperty = new CatalogProperty(resource, props);
     }
@@ -77,9 +77,13 @@ public class MaxComputeExternalCatalog extends ExternalCatalog {
         secretKey = credential.getSecretKey();
         Account account = new AliyunAccount(accessKey, secretKey);
         this.odps = new Odps(account);
-        odps.setEndpoint(odpsUrlTemplate.replace("{}", region));
-        odps.setDefaultProject(defaultProject);
         enablePublicAccess = Boolean.parseBoolean(props.getOrDefault(MCProperties.PUBLIC_ACCESS, "false"));
+        String odpsUrl = odpsUrlTemplate.replace("{}", region);
+        if (enablePublicAccess) {
+            odpsUrl = odpsUrl.replace("-inc", "");
+        }
+        odps.setEndpoint(odpsUrl);
+        odps.setDefaultProject(defaultProject);
     }
 
     public long getTotalRows(String project, String table) throws TunnelException {
@@ -87,7 +91,7 @@ public class MaxComputeExternalCatalog extends ExternalCatalog {
         TableTunnel tunnel = new TableTunnel(odps);
         String tunnelUrl = tunnelUrlTemplate.replace("{}", region);
         if (enablePublicAccess) {
-            tunnelUrl = tunnelUrlTemplate.replace("-inc", "");
+            tunnelUrl = tunnelUrl.replace("-inc", "");
         }
         tunnel.setEndpoint(tunnelUrl);
         return tunnel.createDownloadSession(project, table).getRecordCount();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
index 8787233e7d..304cc95882 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
@@ -309,8 +309,9 @@ public class PropertyConverter {
     }
 
     private static Map<String, String> convertToMinioProperties(Map<String, String> props, CloudCredential credential) {
-        // minio does not have region, use an arbitrary one.
-        props.put(MinioProperties.REGION, "us-east-1");
+        if (!props.containsKey(MinioProperties.REGION)) {
+            props.put(MinioProperties.REGION, MinioProperties.DEFAULT_REGION);
+        }
         return convertToS3Properties(S3Properties.prefixToS3(props), credential);
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
index b0639be2df..fe912c1038 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
@@ -28,15 +28,15 @@ import java.util.HashMap;
 import java.util.Map;
 
 public class S3ClientBEProperties {
-
     /**
      *  convert FE properties to BE S3 client properties
      *  On BE, should use properties like AWS_XXX.
      */
     public static Map<String, String> getBeFSProperties(Map<String, String> properties) {
         if (properties.containsKey(MinioProperties.ENDPOINT)) {
-            // minio does not have region, use an arbitrary one.
-            properties.put(MinioProperties.REGION, "us-east-1");
+            if (!properties.containsKey(MinioProperties.REGION)) {
+                properties.put(MinioProperties.REGION, MinioProperties.DEFAULT_REGION);
+            }
             return getBeAWSPropertiesFromS3(S3Properties.prefixToS3(properties));
         } else if (properties.containsKey(S3Properties.ENDPOINT)) {
             // s3,oss,cos,obs use this.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MinioProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MinioProperties.java
index c444384220..a286718ea8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MinioProperties.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MinioProperties.java
@@ -31,6 +31,7 @@ public class MinioProperties extends BaseProperties {
     public static final String ACCESS_KEY = "minio.access_key";
     public static final String SECRET_KEY = "minio.secret_key";
     public static final String SESSION_TOKEN = "minio.session_token";
+    public static final String DEFAULT_REGION = "us-east-1";
     public static final List<String> REQUIRED_FIELDS = Arrays.asList(ENDPOINT, ACCESS_KEY, SECRET_KEY, REGION);
 
     public static CloudCredential getCredential(Map<String, String> props) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java
index b5a96ddcec..13fb6ee336 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java
@@ -126,7 +126,7 @@ public class S3Properties extends BaseProperties {
         if (endpointSplit.length < 2) {
             return null;
         }
-        if (endpointSplit[0].startsWith("oss-")) {
+        if (endpointSplit[0].contains("oss-")) {
             // compatible with the endpoint: oss-cn-bejing.aliyuncs.com
             return endpointSplit[0];
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java
index 0d09037c81..f04abae879 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java
@@ -23,6 +23,7 @@ import org.apache.doris.common.UserException;
 import org.apache.doris.datasource.property.PropertyConverter;
 import org.apache.doris.fs.obj.S3ObjStorage;
 
+import com.amazonaws.services.s3.model.AmazonS3Exception;
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
@@ -86,6 +87,17 @@ public class S3FileSystem extends ObjFileSystem {
             LOG.info("file not found: " + e.getMessage());
             return new Status(Status.ErrCode.NOT_FOUND, "file not found: " + e.getMessage());
         } catch (Exception e) {
+            if (e.getCause() instanceof AmazonS3Exception) {
+                // process minio error msg
+                AmazonS3Exception ea = (AmazonS3Exception) e.getCause();
+                Map<String, String> callbackHeaders = ea.getHttpHeaders();
+                if (callbackHeaders != null && !callbackHeaders.isEmpty()) {
+                    String minioErrMsg = callbackHeaders.get("X-Minio-Error-Desc");
+                    if (minioErrMsg != null) {
+                        return new Status(Status.ErrCode.COMMON_ERROR, "Minio request error: " + minioErrMsg);
+                    }
+                }
+            }
             LOG.error("errors while get file status ", e);
             return new Status(Status.ErrCode.COMMON_ERROR, "errors while get file status " + e.getMessage());
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
index a54b0e80d6..9f6339b0c2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
@@ -67,7 +67,7 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction {
 
     private final S3URI s3uri;
     private final boolean forceVirtualHosted;
-    private String virtualBucket;
+    private String virtualBucket = "";
     private String virtualKey;
 
     public S3TableValuedFunction(Map<String, String> params) throws AnalysisException {
@@ -77,8 +77,12 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction {
         final String endpoint = forceVirtualHosted
                 ? getEndpointAndSetVirtualBucket(params)
                 : s3uri.getBucketScheme();
+        if (!tvfParams.containsKey(S3Properties.REGION)) {
+            String region = S3Properties.getRegionOfEndpoint(endpoint);
+            tvfParams.put(S3Properties.REGION, region);
+        }
         CloudCredentialWithEndpoint credential = new CloudCredentialWithEndpoint(endpoint,
-                tvfParams.getOrDefault(S3Properties.REGION, S3Properties.getRegionOfEndpoint(endpoint)),
+                tvfParams.get(S3Properties.REGION),
                 tvfParams.get(S3Properties.ACCESS_KEY),
                 tvfParams.get(S3Properties.SECRET_KEY));
         if (tvfParams.containsKey(S3Properties.SESSION_TOKEN)) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org