You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/19 04:12:37 UTC
[doris] 02/03: [feature](multi-catalog) support connecting to hive metastore with ke… (#15026)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
commit d052138985ed7127e1edeae398bb7bca7a8430c6
Author: wxy <du...@gmail.com>
AuthorDate: Tue Dec 13 16:48:46 2022 +0800
[feature](multi-catalog) support connecting to hive metastore with ke… (#15026)
Support kerberos authentication on hive external catalog
---
.../docs/ecosystem/external-table/multi-catalog.md | 28 ++++++++++++++++---
.../docs/ecosystem/external-table/multi-catalog.md | 28 ++++++++++++++++---
.../org/apache/doris/catalog/HdfsResource.java | 3 ++
.../doris/catalog/external/HMSExternalTable.java | 4 +--
.../apache/doris/datasource/CatalogProperty.java | 10 ++++---
.../doris/datasource/HMSExternalCatalog.java | 32 ++++++++++++++++++++--
.../doris/planner/external/HiveScanProvider.java | 2 +-
7 files changed, 90 insertions(+), 17 deletions(-)
diff --git a/docs/en/docs/ecosystem/external-table/multi-catalog.md b/docs/en/docs/ecosystem/external-table/multi-catalog.md
index dabfcf4fb9..3be2f3bba0 100644
--- a/docs/en/docs/ecosystem/external-table/multi-catalog.md
+++ b/docs/en/docs/ecosystem/external-table/multi-catalog.md
@@ -85,11 +85,31 @@ CREATE CATALOG hive PROPERTIES (
"type"="hms",
'hive.metastore.uris' = 'thrift://172.21.0.1:7004',
'hadoop.username' = 'hive'
- 'dfs.nameservices'='service1',
+ 'dfs.nameservices'='your-nameservice',
+ 'dfs.ha.namenodes.your-nameservice'='nn1,nn2',
+ 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007',
+ 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007',
+ 'dfs.client.failover.proxy.provider.your-nameservice'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'
+);
+```
+
+If you want to connect to a Hive MetaStore with kerberos authentication, you can do like this:
+
+```
+CREATE CATALOG hive PROPERTIES (
+ "type"="hms",
+ 'hive.metastore.uris' = 'thrift://172.21.0.1:7004',
+ 'hive.metastore.sasl.enabled' = 'true',
+ 'dfs.nameservices'='your-nameservice',
'dfs.ha.namenodes. service1'='nn1,nn2',
- 'dfs.namenode.rpc-address.HDFS8000871.nn1'='172.21.0.2:4007',
- 'dfs.namenode.rpc-address.HDFS8000871.nn2'='172.21.0.3:4007',
- 'dfs.client.failover.proxy.provider.HDFS8000871'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'
+ 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007',
+ 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007',
+ 'dfs.client.failover.proxy.provider.your-nameservice'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider',
+ 'hadoop.security.authentication' = 'kerberos',
+ 'hadoop.kerberos.keytab' = '/your-keytab-filepath/your.keytab',
+ 'hadoop.kerberos.principal' = 'your-principal@YOUR.COM',
+ 'yarn.resourcemanager.address' = 'your-rm-address:your-rm-port',
+ 'yarn.resourcemanager.principal' = 'your-rm-principal/_HOST@YOUR.COM'
);
```
diff --git a/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md b/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md
index 841c6b6766..3627bb9221 100644
--- a/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md
+++ b/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md
@@ -85,14 +85,34 @@ CREATE CATALOG hive PROPERTIES (
"type"="hms",
'hive.metastore.uris' = 'thrift://172.21.0.1:7004',
'hadoop.username' = 'hive'
- 'dfs.nameservices'='service1',
- 'dfs.ha.namenodes. service1'='nn1,nn2',
- 'dfs.namenode.rpc-address.HDFS8000871.nn1'='172.21.0.2:4007',
- 'dfs.namenode.rpc-address.HDFS8000871.nn2'='172.21.0.3:4007',
+ 'dfs.nameservices'='your-nameservice',
+ 'dfs.ha.namenodes.service1'='nn1,nn2',
+ 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007',
+ 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007',
'dfs.client.failover.proxy.provider.HDFS8000871'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'
);
```
+如果需要连接开启了 Kerberos 认证的 Hive MetaStore,示例如下:
+
+```
+CREATE CATALOG hive PROPERTIES (
+ "type"="hms",
+ 'hive.metastore.uris' = 'thrift://172.21.0.1:7004',
+ 'hive.metastore.sasl.enabled' = 'true',
+ 'dfs.nameservices'='your-nameservice',
+ 'dfs.ha.namenodes.service1'='nn1,nn2',
+ 'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007',
+ 'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007',
+ 'dfs.client.failover.proxy.provider.your-nameservice'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider',
+ 'hadoop.security.authentication' = 'kerberos',
+ 'hadoop.kerberos.keytab' = '/your-keytab-filepath/your.keytab',
+ 'hadoop.kerberos.principal' = 'your-principal@YOUR.COM',
+ 'yarn.resourcemanager.address' = 'your-rm-address:your-rm-port',
+ 'yarn.resourcemanager.principal' = 'your-rm-principal/_HOST@YOUR.COM'
+);
+```
+
创建后,可以通过 `SHOW CATALOGS` 命令查看 catalog:
```
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java
index 467cc1b6ec..868f032a44 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java
@@ -44,7 +44,10 @@ import java.util.Map;
* );
*/
public class HdfsResource extends Resource {
+ public static final String HADOOP_PREFIX = "hadoop.";
public static final String HADOOP_FS_PREFIX = "dfs.";
+ public static final String HIVE_PREFIX = "hive.";
+ public static final String YARN_PREFIX = "yarn.";
public static String HADOOP_FS_NAME = "fs.defaultFS";
// simple or kerberos
public static String HADOOP_USER_NAME = "hadoop.username";
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index bfefe1f829..d732c52206 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -284,8 +284,8 @@ public class HMSExternalTable extends ExternalTable {
return ((HMSExternalCatalog) catalog).getHiveMetastoreUris();
}
- public Map<String, String> getDfsProperties() {
- return catalog.getCatalogProperty().getDfsProperties();
+ public Map<String, String> getHdfsProperties() {
+ return catalog.getCatalogProperty().getHdfsProperties();
}
public Map<String, String> getS3Properties() {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogProperty.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogProperty.java
index e20afc33f5..27928e3ffe 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogProperty.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogProperty.java
@@ -44,13 +44,15 @@ public class CatalogProperty implements Writable {
return properties.getOrDefault(key, defaultVal);
}
- // todo: remove and use HdfsResource
- public Map<String, String> getDfsProperties() {
+ // get all properties with dfs.* hadoop.* yarn.* hive.*
+ // besides dfs.* and hadoop.username, we need other properties when enable kerberos
+ public Map<String, String> getHdfsProperties() {
Map<String, String> dfsProperties = Maps.newHashMap();
for (Map.Entry<String, String> entry : properties.entrySet()) {
if (entry.getKey().startsWith(HdfsResource.HADOOP_FS_PREFIX)
- || entry.getKey().equals(HdfsResource.HADOOP_USER_NAME)) {
- // todo: still missing properties like hadoop.xxx
+ || entry.getKey().startsWith(HdfsResource.HADOOP_PREFIX)
+ || entry.getKey().startsWith(HdfsResource.HIVE_PREFIX)
+ || entry.getKey().startsWith(HdfsResource.YARN_PREFIX)) {
dfsProperties.put(entry.getKey(), entry.getValue());
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java
index 15cb40fe5c..0dfffe19fb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java
@@ -17,19 +17,25 @@
package org.apache.doris.datasource;
+import org.apache.doris.catalog.AuthType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.HMSResource;
+import org.apache.doris.catalog.HdfsResource;
import org.apache.doris.catalog.HiveMetaStoreClientHelper;
import org.apache.doris.catalog.external.ExternalDatabase;
import org.apache.doris.catalog.external.HMSExternalDatabase;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.security.UserGroupInformation;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import java.io.IOException;
import java.util.List;
import java.util.Map;
@@ -54,7 +60,7 @@ public class HMSExternalCatalog extends ExternalCatalog {
}
public String getHiveMetastoreUris() {
- return catalogProperty.getOrDefault("hive.metastore.uris", "");
+ return catalogProperty.getOrDefault(HMSResource.HIVE_METASTORE_URIS, "");
}
@Override
@@ -91,8 +97,30 @@ public class HMSExternalCatalog extends ExternalCatalog {
@Override
protected void initLocalObjectsImpl() {
HiveConf hiveConf = new HiveConf();
- hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, getHiveMetastoreUris());
+ for (String key : catalogProperty.getHdfsProperties().keySet()) {
+ String val = catalogProperty.getOrDefault(key, "");
+ hiveConf.set(key, val);
+ }
+ String authentication = catalogProperty.getOrDefault(
+ HdfsResource.HADOOP_SECURITY_AUTHENTICATION, "");
+ if (AuthType.KERBEROS.getDesc().equals(authentication)) {
+ Configuration conf = new Configuration();
+ conf.set(HdfsResource.HADOOP_SECURITY_AUTHENTICATION, authentication);
+ UserGroupInformation.setConfiguration(conf);
+ try {
+ /**
+ * Because metastore client is created by using
+ * {@link org.apache.hadoop.hive.metastore.RetryingMetaStoreClient#getProxy}
+ * it will relogin when TGT is expired, so we don't need to relogin manually.
+ */
+ UserGroupInformation.loginUserFromKeytab(
+ catalogProperty.getOrDefault(HdfsResource.HADOOP_KERBEROS_PRINCIPAL, ""),
+ catalogProperty.getOrDefault(HdfsResource.HADOOP_KERBEROS_KEYTAB, ""));
+ } catch (IOException e) {
+ throw new HMSClientException("login with kerberos auth failed for catalog %s", e, this.getName());
+ }
+ }
// 1. read properties from hive-site.xml.
// and then use properties in CatalogProperty to override properties got from hive-site.xml
Map<String, String> properties = HiveMetaStoreClientHelper.getPropertiesForDLF(name, hiveConf);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
index 0ee3db195c..ace3693907 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
@@ -244,7 +244,7 @@ public class HiveScanProvider extends HMSTableScanProvider {
if (locationType == TFileType.FILE_S3) {
return hmsTable.getS3Properties();
} else if (locationType == TFileType.FILE_HDFS) {
- return hmsTable.getDfsProperties();
+ return hmsTable.getHdfsProperties();
} else {
return Maps.newHashMap();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org