You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/07/20 12:49:02 UTC
[doris] branch master updated: [fix](multi-catalog)fix minio default region and throw minio error msg, support s3 bucket root path (#21994)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7d488688b4 [fix](multi-catalog)fix minio default region and throw minio error msg, support s3 bucket root path (#21994)
7d488688b4 is described below
commit 7d488688b4c21ca1eb98b8afdf9144642b18bcdc
Author: slothever <18...@users.noreply.github.com>
AuthorDate: Thu Jul 20 20:48:55 2023 +0800
[fix](multi-catalog)fix minio default region and throw minio error msg, support s3 bucket root path (#21994)
1. check minio region, set default region if user region is not provided, and throw minio error msg
2. support read root path s3://bucket1
3. fix max compute public access
---
be/src/util/s3_uri.cpp | 5 +++--
docs/en/docs/lakehouse/multi-catalog/hive.md | 1 +
docs/en/docs/lakehouse/multi-catalog/iceberg.md | 1 +
.../en/docs/sql-manual/sql-functions/table-functions/s3.md | 2 ++
.../Backup-and-Restore/CREATE-REPOSITORY.md | 1 +
docs/zh-CN/docs/lakehouse/multi-catalog/hive.md | 1 +
docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md | 1 +
.../docs/sql-manual/sql-functions/table-functions/s3.md | 2 ++
.../org/apache/doris/maxcompute/MaxComputeTableScan.java | 12 +++++++-----
.../apache/doris/datasource/MaxComputeExternalCatalog.java | 14 +++++++++-----
.../doris/datasource/property/PropertyConverter.java | 5 +++--
.../doris/datasource/property/S3ClientBEProperties.java | 6 +++---
.../datasource/property/constants/MinioProperties.java | 1 +
.../doris/datasource/property/constants/S3Properties.java | 2 +-
.../main/java/org/apache/doris/fs/remote/S3FileSystem.java | 12 ++++++++++++
.../apache/doris/tablefunction/S3TableValuedFunction.java | 8 ++++++--
16 files changed, 54 insertions(+), 20 deletions(-)
diff --git a/be/src/util/s3_uri.cpp b/be/src/util/s3_uri.cpp
index c2e4a72098..3162b63f0e 100644
--- a/be/src/util/s3_uri.cpp
+++ b/be/src/util/s3_uri.cpp
@@ -50,11 +50,12 @@ Status S3URI::parse() {
rest = scheme_split[1];
std::vector<std::string> authority_split =
strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1));
- if (authority_split.size() != 2) {
+ if (authority_split.size() < 1) {
return Status::InvalidArgument("Invalid S3 URI: {}", _location);
}
_bucket = authority_split[0];
- _key = authority_split[1];
+ // support s3://bucket1
+ _key = authority_split.size() == 1 ? "/" : authority_split[1];
} else if (scheme_split[0] == _SCHEME_HTTP || scheme_split[0] == _SCHEME_HTTPS) {
// has scheme, eg: http(s)://host/bucket1/path/to/file.txt
rest = scheme_split[1];
diff --git a/docs/en/docs/lakehouse/multi-catalog/hive.md b/docs/en/docs/lakehouse/multi-catalog/hive.md
index 9daacbcd22..26c1b98042 100644
--- a/docs/en/docs/lakehouse/multi-catalog/hive.md
+++ b/docs/en/docs/lakehouse/multi-catalog/hive.md
@@ -119,6 +119,7 @@ CREATE CATALOG hive PROPERTIES (
"type"="hms",
"hive.metastore.uris" = "thrift://172.0.0.1:9083",
"s3.endpoint" = "s3.us-east-1.amazonaws.com",
+ "s3.region" = "us-east-1",
"s3.access_key" = "ak",
"s3.secret_key" = "sk"
"use_path_style" = "true"
diff --git a/docs/en/docs/lakehouse/multi-catalog/iceberg.md b/docs/en/docs/lakehouse/multi-catalog/iceberg.md
index 18d66a5350..4509fbc4fa 100644
--- a/docs/en/docs/lakehouse/multi-catalog/iceberg.md
+++ b/docs/en/docs/lakehouse/multi-catalog/iceberg.md
@@ -126,6 +126,7 @@ If the data is stored on S3, the following parameters can be used in properties:
"s3.access_key" = "ak"
"s3.secret_key" = "sk"
"s3.endpoint" = "http://endpoint-uri"
+"s3.region" = "your-region"
"s3.credentials.provider" = "provider-class-name" // 可选,默认凭证类基于BasicAWSCredentials实现。
```
diff --git a/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md b/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
index dcdd092bd2..e793daec79 100644
--- a/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
+++ b/docs/en/docs/sql-manual/sql-functions/table-functions/s3.md
@@ -40,6 +40,7 @@ s3(
"uri" = "..",
"s3.access_key" = "...",
"s3.secret_key" = "...",
+ "s3.region" = "...",
"format" = "csv",
"keyn" = "valuen",
...
@@ -55,6 +56,7 @@ Related parameters for accessing S3:
- `uri`: (required) The S3 tvf will decide whether to use the path style access method according to the `use_path_style` parameter, and the default access method is the virtual-hosted style method.
- `s3.access_key`: (required)
- `s3.secret_key`: (required)
+- `s3.region`: (optional). Mandatory if the Minio has set another region. Otherwise, `us-east-1` is used by default.
- `s3.session_token`: (optional)
- `use_path_style`: (optional) default `false` . The S3 SDK uses the virtual-hosted style by default. However, some object storage systems may not be enabled or support virtual-hosted style access. At this time, we can add the `use_path_style` parameter to force the use of path style access method.
diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md b/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
index 438bac678b..2308fd6b9c 100644
--- a/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
+++ b/docs/en/docs/sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md
@@ -102,6 +102,7 @@ ON LOCATION "s3://s3-repo"
PROPERTIES
(
"s3.endpoint" = "http://s3-REGION.amazonaws.com",
+ "s3.region" = "s3-REGION",
"s3.access_key" = "AWS_ACCESS_KEY",
"s3.secret_key"="AWS_SECRET_KEY",
"s3.region" = "REGION"
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
index 25c5c6bd26..779442b81a 100644
--- a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md
@@ -118,6 +118,7 @@ CREATE CATALOG hive PROPERTIES (
"type"="hms",
"hive.metastore.uris" = "thrift://172.0.0.1:9083",
"s3.endpoint" = "s3.us-east-1.amazonaws.com",
+ "s3.region" = "us-east-1",
"s3.access_key" = "ak",
"s3.secret_key" = "sk"
"use_path_style" = "true"
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md
index bf5333388b..c93fca28e5 100644
--- a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md
@@ -126,6 +126,7 @@ CREATE CATALOG iceberg PROPERTIES (
"s3.access_key" = "ak"
"s3.secret_key" = "sk"
"s3.endpoint" = "http://endpoint-uri"
+"s3.region" = "your-region"
"s3.credentials.provider" = "provider-class-name" // 可选,默认凭证类基于BasicAWSCredentials实现。
```
diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
index fe1f246bd9..f5c65e3f41 100644
--- a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/s3.md
@@ -45,6 +45,7 @@ s3(
"uri" = "..",
"s3.access_key" = "...",
"s3.secret_key" = "...",
+ "s3.region" = "...",
"format" = "csv",
"keyn" = "valuen",
...
@@ -58,6 +59,7 @@ S3 tvf中的每一个参数都是一个 `"key"="value"` 对。
- `uri`: (必填) 访问S3的uri,S3表函数会根据 `use_path_style` 参数来决定是否使用 path style 访问方式,默认为 virtual-hosted style 方式
- `s3.access_key`: (必填)
- `s3.secret_key`: (必填)
+- `s3.region`: (选填)。如果Minio服务设置了其他的region,那么必填,否则默认使用`us-east-1`。
- `s3.session_token`: (选填)
- `use_path_style`:(选填) 默认为`false` 。S3 SDK 默认使用 virtual-hosted style 方式。但某些对象存储系统可能没开启或没支持virtual-hosted style 方式的访问,此时我们可以添加 use_path_style 参数来强制使用 path style 方式。比如 `minio`默认情况下只允许`path style`访问方式,所以在访问minio时要加上`use_path_style=true`。
diff --git a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java
index 5102330a4d..da67196a3a 100644
--- a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java
+++ b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeTableScan.java
@@ -29,7 +29,7 @@ import java.io.IOException;
* MaxComputeJ JniScanner. BE will read data from the scanner object.
*/
public class MaxComputeTableScan {
- private static final String odpsUrlTemplate = "http://service.{}.maxcompute.aliyun.com/api";
+ private static final String odpsUrlTemplate = "http://service.{}.maxcompute.aliyun-inc.com/api";
private static final String tunnelUrlTemplate = "http://dt.{}.maxcompute.aliyun-inc.com";
private final Odps odps;
private final TableTunnel tunnel;
@@ -43,13 +43,15 @@ public class MaxComputeTableScan {
this.project = project;
this.table = table;
odps = new Odps(new AliyunAccount(accessKey, secretKey));
- odps.setEndpoint(odpsUrlTemplate.replace("{}", region));
- odps.setDefaultProject(this.project);
- tunnel = new TableTunnel(odps);
+ String odpsUrl = odpsUrlTemplate.replace("{}", region);
String tunnelUrl = tunnelUrlTemplate.replace("{}", region);
if (enablePublicAccess) {
- tunnelUrl = tunnelUrlTemplate.replace("-inc", "");
+ odpsUrl = odpsUrl.replace("-inc", "");
+ tunnelUrl = tunnelUrl.replace("-inc", "");
}
+ odps.setEndpoint(odpsUrl);
+ odps.setDefaultProject(this.project);
+ tunnel = new TableTunnel(odps);
tunnel.setEndpoint(tunnelUrl);
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java
index c62abf9ada..bfe6b2c18c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java
@@ -43,11 +43,11 @@ public class MaxComputeExternalCatalog extends ExternalCatalog {
private String secretKey;
@SerializedName(value = "publicAccess")
private boolean enablePublicAccess;
- private static final String odpsUrlTemplate = "http://service.{}.maxcompute.aliyun.com/api";
+ private static final String odpsUrlTemplate = "http://service.{}.maxcompute.aliyun-inc.com/api";
private static final String tunnelUrlTemplate = "http://dt.{}.maxcompute.aliyun-inc.com";
public MaxComputeExternalCatalog(long catalogId, String name, String resource, Map<String, String> props,
- String comment) {
+ String comment) {
super(catalogId, name, InitCatalogLog.Type.MAX_COMPUTE, comment);
catalogProperty = new CatalogProperty(resource, props);
}
@@ -77,9 +77,13 @@ public class MaxComputeExternalCatalog extends ExternalCatalog {
secretKey = credential.getSecretKey();
Account account = new AliyunAccount(accessKey, secretKey);
this.odps = new Odps(account);
- odps.setEndpoint(odpsUrlTemplate.replace("{}", region));
- odps.setDefaultProject(defaultProject);
enablePublicAccess = Boolean.parseBoolean(props.getOrDefault(MCProperties.PUBLIC_ACCESS, "false"));
+ String odpsUrl = odpsUrlTemplate.replace("{}", region);
+ if (enablePublicAccess) {
+ odpsUrl = odpsUrl.replace("-inc", "");
+ }
+ odps.setEndpoint(odpsUrl);
+ odps.setDefaultProject(defaultProject);
}
public long getTotalRows(String project, String table) throws TunnelException {
@@ -87,7 +91,7 @@ public class MaxComputeExternalCatalog extends ExternalCatalog {
TableTunnel tunnel = new TableTunnel(odps);
String tunnelUrl = tunnelUrlTemplate.replace("{}", region);
if (enablePublicAccess) {
- tunnelUrl = tunnelUrlTemplate.replace("-inc", "");
+ tunnelUrl = tunnelUrl.replace("-inc", "");
}
tunnel.setEndpoint(tunnelUrl);
return tunnel.createDownloadSession(project, table).getRecordCount();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
index 8787233e7d..304cc95882 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java
@@ -309,8 +309,9 @@ public class PropertyConverter {
}
private static Map<String, String> convertToMinioProperties(Map<String, String> props, CloudCredential credential) {
- // minio does not have region, use an arbitrary one.
- props.put(MinioProperties.REGION, "us-east-1");
+ if (!props.containsKey(MinioProperties.REGION)) {
+ props.put(MinioProperties.REGION, MinioProperties.DEFAULT_REGION);
+ }
return convertToS3Properties(S3Properties.prefixToS3(props), credential);
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
index b0639be2df..fe912c1038 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java
@@ -28,15 +28,15 @@ import java.util.HashMap;
import java.util.Map;
public class S3ClientBEProperties {
-
/**
* convert FE properties to BE S3 client properties
* On BE, should use properties like AWS_XXX.
*/
public static Map<String, String> getBeFSProperties(Map<String, String> properties) {
if (properties.containsKey(MinioProperties.ENDPOINT)) {
- // minio does not have region, use an arbitrary one.
- properties.put(MinioProperties.REGION, "us-east-1");
+ if (!properties.containsKey(MinioProperties.REGION)) {
+ properties.put(MinioProperties.REGION, MinioProperties.DEFAULT_REGION);
+ }
return getBeAWSPropertiesFromS3(S3Properties.prefixToS3(properties));
} else if (properties.containsKey(S3Properties.ENDPOINT)) {
// s3,oss,cos,obs use this.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MinioProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MinioProperties.java
index c444384220..a286718ea8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MinioProperties.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/MinioProperties.java
@@ -31,6 +31,7 @@ public class MinioProperties extends BaseProperties {
public static final String ACCESS_KEY = "minio.access_key";
public static final String SECRET_KEY = "minio.secret_key";
public static final String SESSION_TOKEN = "minio.session_token";
+ public static final String DEFAULT_REGION = "us-east-1";
public static final List<String> REQUIRED_FIELDS = Arrays.asList(ENDPOINT, ACCESS_KEY, SECRET_KEY, REGION);
public static CloudCredential getCredential(Map<String, String> props) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java
index b5a96ddcec..13fb6ee336 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java
@@ -126,7 +126,7 @@ public class S3Properties extends BaseProperties {
if (endpointSplit.length < 2) {
return null;
}
- if (endpointSplit[0].startsWith("oss-")) {
+ if (endpointSplit[0].contains("oss-")) {
// compatible with the endpoint: oss-cn-bejing.aliyuncs.com
return endpointSplit[0];
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java
index 0d09037c81..f04abae879 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java
@@ -23,6 +23,7 @@ import org.apache.doris.common.UserException;
import org.apache.doris.datasource.property.PropertyConverter;
import org.apache.doris.fs.obj.S3ObjStorage;
+import com.amazonaws.services.s3.model.AmazonS3Exception;
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -86,6 +87,17 @@ public class S3FileSystem extends ObjFileSystem {
LOG.info("file not found: " + e.getMessage());
return new Status(Status.ErrCode.NOT_FOUND, "file not found: " + e.getMessage());
} catch (Exception e) {
+ if (e.getCause() instanceof AmazonS3Exception) {
+ // process minio error msg
+ AmazonS3Exception ea = (AmazonS3Exception) e.getCause();
+ Map<String, String> callbackHeaders = ea.getHttpHeaders();
+ if (callbackHeaders != null && !callbackHeaders.isEmpty()) {
+ String minioErrMsg = callbackHeaders.get("X-Minio-Error-Desc");
+ if (minioErrMsg != null) {
+ return new Status(Status.ErrCode.COMMON_ERROR, "Minio request error: " + minioErrMsg);
+ }
+ }
+ }
LOG.error("errors while get file status ", e);
return new Status(Status.ErrCode.COMMON_ERROR, "errors while get file status " + e.getMessage());
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
index a54b0e80d6..9f6339b0c2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/S3TableValuedFunction.java
@@ -67,7 +67,7 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction {
private final S3URI s3uri;
private final boolean forceVirtualHosted;
- private String virtualBucket;
+ private String virtualBucket = "";
private String virtualKey;
public S3TableValuedFunction(Map<String, String> params) throws AnalysisException {
@@ -77,8 +77,12 @@ public class S3TableValuedFunction extends ExternalFileTableValuedFunction {
final String endpoint = forceVirtualHosted
? getEndpointAndSetVirtualBucket(params)
: s3uri.getBucketScheme();
+ if (!tvfParams.containsKey(S3Properties.REGION)) {
+ String region = S3Properties.getRegionOfEndpoint(endpoint);
+ tvfParams.put(S3Properties.REGION, region);
+ }
CloudCredentialWithEndpoint credential = new CloudCredentialWithEndpoint(endpoint,
- tvfParams.getOrDefault(S3Properties.REGION, S3Properties.getRegionOfEndpoint(endpoint)),
+ tvfParams.get(S3Properties.REGION),
tvfParams.get(S3Properties.ACCESS_KEY),
tvfParams.get(S3Properties.SECRET_KEY));
if (tvfParams.containsKey(S3Properties.SESSION_TOKEN)) {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org