You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/19 04:12:37 UTC

[doris] 02/03: [feature](multi-catalog) support connecting to hive metastore with ke… (#15026)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit d052138985ed7127e1edeae398bb7bca7a8430c6
Author: wxy <du...@gmail.com>
AuthorDate: Tue Dec 13 16:48:46 2022 +0800

    [feature](multi-catalog) support connecting to hive metastore with ke… (#15026)
    
    Support kerberos authentication on hive external catalog
---
 .../docs/ecosystem/external-table/multi-catalog.md | 28 ++++++++++++++++---
 .../docs/ecosystem/external-table/multi-catalog.md | 28 ++++++++++++++++---
 .../org/apache/doris/catalog/HdfsResource.java     |  3 ++
 .../doris/catalog/external/HMSExternalTable.java   |  4 +--
 .../apache/doris/datasource/CatalogProperty.java   | 10 ++++---
 .../doris/datasource/HMSExternalCatalog.java       | 32 ++++++++++++++++++++--
 .../doris/planner/external/HiveScanProvider.java   |  2 +-
 7 files changed, 90 insertions(+), 17 deletions(-)

diff --git a/docs/en/docs/ecosystem/external-table/multi-catalog.md b/docs/en/docs/ecosystem/external-table/multi-catalog.md
index dabfcf4fb9..3be2f3bba0 100644
--- a/docs/en/docs/ecosystem/external-table/multi-catalog.md
+++ b/docs/en/docs/ecosystem/external-table/multi-catalog.md
@@ -85,11 +85,31 @@ CREATE CATALOG hive PROPERTIES (
     "type"="hms",
     'hive.metastore.uris' = 'thrift://172.21.0.1:7004',
     'hadoop.username' = 'hive'
-    'dfs.nameservices'='service1',
+    'dfs.nameservices'='your-nameservice',
+    'dfs.ha.namenodes.your-nameservice'='nn1,nn2',
+    'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007',
+    'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007',
+    'dfs.client.failover.proxy.provider.your-nameservice'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'
+);
+```
+
+If you want to connect to a Hive MetaStore with kerberos authentication, you can do like this:
+
+```
+CREATE CATALOG hive PROPERTIES (
+    "type"="hms",
+    'hive.metastore.uris' = 'thrift://172.21.0.1:7004',
+    'hive.metastore.sasl.enabled' = 'true',
+    'dfs.nameservices'='your-nameservice',
     'dfs.ha.namenodes. service1'='nn1,nn2',
-    'dfs.namenode.rpc-address.HDFS8000871.nn1'='172.21.0.2:4007',
-    'dfs.namenode.rpc-address.HDFS8000871.nn2'='172.21.0.3:4007',
-    'dfs.client.failover.proxy.provider.HDFS8000871'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'
+    'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007',
+    'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007',
+    'dfs.client.failover.proxy.provider.your-nameservice'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider',
+    'hadoop.security.authentication' = 'kerberos',
+    'hadoop.kerberos.keytab' = '/your-keytab-filepath/your.keytab',   
+    'hadoop.kerberos.principal' = 'your-principal@YOUR.COM',
+    'yarn.resourcemanager.address' = 'your-rm-address:your-rm-port',    
+    'yarn.resourcemanager.principal' = 'your-rm-principal/_HOST@YOUR.COM'
 );
 ```
 
diff --git a/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md b/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md
index 841c6b6766..3627bb9221 100644
--- a/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md
+++ b/docs/zh-CN/docs/ecosystem/external-table/multi-catalog.md
@@ -85,14 +85,34 @@ CREATE CATALOG hive PROPERTIES (
     "type"="hms",
     'hive.metastore.uris' = 'thrift://172.21.0.1:7004',
     'hadoop.username' = 'hive'
-    'dfs.nameservices'='service1',
-    'dfs.ha.namenodes. service1'='nn1,nn2',
-    'dfs.namenode.rpc-address.HDFS8000871.nn1'='172.21.0.2:4007',
-    'dfs.namenode.rpc-address.HDFS8000871.nn2'='172.21.0.3:4007',
+    'dfs.nameservices'='your-nameservice',
+    'dfs.ha.namenodes.service1'='nn1,nn2',
+    'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007',
+    'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007',
     'dfs.client.failover.proxy.provider.HDFS8000871'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider'
 );
 ```
 
+如果需要连接开启了 Kerberos 认证的 Hive MetaStore,示例如下:
+
+```
+CREATE CATALOG hive PROPERTIES (
+    "type"="hms",
+    'hive.metastore.uris' = 'thrift://172.21.0.1:7004',
+    'hive.metastore.sasl.enabled' = 'true',
+    'dfs.nameservices'='your-nameservice',
+    'dfs.ha.namenodes.service1'='nn1,nn2',
+    'dfs.namenode.rpc-address.your-nameservice.nn1'='172.21.0.2:4007',
+    'dfs.namenode.rpc-address.your-nameservice.nn2'='172.21.0.3:4007',
+    'dfs.client.failover.proxy.provider.your-nameservice'='org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider',
+    'hadoop.security.authentication' = 'kerberos',
+    'hadoop.kerberos.keytab' = '/your-keytab-filepath/your.keytab',   
+    'hadoop.kerberos.principal' = 'your-principal@YOUR.COM',
+    'yarn.resourcemanager.address' = 'your-rm-address:your-rm-port',    
+    'yarn.resourcemanager.principal' = 'your-rm-principal/_HOST@YOUR.COM'
+);
+```
+
 创建后,可以通过 `SHOW CATALOGS` 命令查看 catalog:
 
 ```
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java
index 467cc1b6ec..868f032a44 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java
@@ -44,7 +44,10 @@ import java.util.Map;
  * );
  */
 public class HdfsResource extends Resource {
+    public static final String HADOOP_PREFIX = "hadoop.";
     public static final String HADOOP_FS_PREFIX = "dfs.";
+    public static final String HIVE_PREFIX = "hive.";
+    public static final String YARN_PREFIX = "yarn.";
     public static String HADOOP_FS_NAME = "fs.defaultFS";
     // simple or kerberos
     public static String HADOOP_USER_NAME = "hadoop.username";
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index bfefe1f829..d732c52206 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -284,8 +284,8 @@ public class HMSExternalTable extends ExternalTable {
         return ((HMSExternalCatalog) catalog).getHiveMetastoreUris();
     }
 
-    public Map<String, String> getDfsProperties() {
-        return catalog.getCatalogProperty().getDfsProperties();
+    public Map<String, String> getHdfsProperties() {
+        return catalog.getCatalogProperty().getHdfsProperties();
     }
 
     public Map<String, String> getS3Properties() {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogProperty.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogProperty.java
index e20afc33f5..27928e3ffe 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogProperty.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogProperty.java
@@ -44,13 +44,15 @@ public class CatalogProperty implements Writable {
         return properties.getOrDefault(key, defaultVal);
     }
 
-    // todo: remove and use HdfsResource
-    public Map<String, String> getDfsProperties() {
+    // get all properties with dfs.*  hadoop.*  yarn.*  hive.*
+    // besides dfs.* and hadoop.username, we need other properties when enable kerberos
+    public Map<String, String> getHdfsProperties() {
         Map<String, String> dfsProperties = Maps.newHashMap();
         for (Map.Entry<String, String> entry : properties.entrySet()) {
             if (entry.getKey().startsWith(HdfsResource.HADOOP_FS_PREFIX)
-                    || entry.getKey().equals(HdfsResource.HADOOP_USER_NAME)) {
-                // todo: still missing properties like hadoop.xxx
+                    || entry.getKey().startsWith(HdfsResource.HADOOP_PREFIX)
+                    || entry.getKey().startsWith(HdfsResource.HIVE_PREFIX)
+                    || entry.getKey().startsWith(HdfsResource.YARN_PREFIX)) {
                 dfsProperties.put(entry.getKey(), entry.getValue());
             }
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java
index 15cb40fe5c..0dfffe19fb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/HMSExternalCatalog.java
@@ -17,19 +17,25 @@
 
 package org.apache.doris.datasource;
 
+import org.apache.doris.catalog.AuthType;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.HMSResource;
+import org.apache.doris.catalog.HdfsResource;
 import org.apache.doris.catalog.HiveMetaStoreClientHelper;
 import org.apache.doris.catalog.external.ExternalDatabase;
 import org.apache.doris.catalog.external.HMSExternalDatabase;
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
+import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 
@@ -54,7 +60,7 @@ public class HMSExternalCatalog extends ExternalCatalog {
     }
 
     public String getHiveMetastoreUris() {
-        return catalogProperty.getOrDefault("hive.metastore.uris", "");
+        return catalogProperty.getOrDefault(HMSResource.HIVE_METASTORE_URIS, "");
     }
 
     @Override
@@ -91,8 +97,30 @@ public class HMSExternalCatalog extends ExternalCatalog {
     @Override
     protected void initLocalObjectsImpl() {
         HiveConf hiveConf = new HiveConf();
-        hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, getHiveMetastoreUris());
+        for (String key : catalogProperty.getHdfsProperties().keySet()) {
+            String val = catalogProperty.getOrDefault(key, "");
+            hiveConf.set(key, val);
+        }
 
+        String authentication = catalogProperty.getOrDefault(
+                HdfsResource.HADOOP_SECURITY_AUTHENTICATION, "");
+        if (AuthType.KERBEROS.getDesc().equals(authentication)) {
+            Configuration conf = new Configuration();
+            conf.set(HdfsResource.HADOOP_SECURITY_AUTHENTICATION, authentication);
+            UserGroupInformation.setConfiguration(conf);
+            try {
+                /**
+                 * Because metastore client is created by using
+                 * {@link org.apache.hadoop.hive.metastore.RetryingMetaStoreClient#getProxy}
+                 * it will relogin when TGT is expired, so we don't need to relogin manually.
+                 */
+                UserGroupInformation.loginUserFromKeytab(
+                        catalogProperty.getOrDefault(HdfsResource.HADOOP_KERBEROS_PRINCIPAL, ""),
+                        catalogProperty.getOrDefault(HdfsResource.HADOOP_KERBEROS_KEYTAB, ""));
+            } catch (IOException e) {
+                throw new HMSClientException("login with kerberos auth failed for catalog %s", e, this.getName());
+            }
+        }
         // 1. read properties from hive-site.xml.
         // and then use properties in CatalogProperty to override properties got from hive-site.xml
         Map<String, String> properties = HiveMetaStoreClientHelper.getPropertiesForDLF(name, hiveConf);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
index 0ee3db195c..ace3693907 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java
@@ -244,7 +244,7 @@ public class HiveScanProvider extends HMSTableScanProvider {
         if (locationType == TFileType.FILE_S3) {
             return hmsTable.getS3Properties();
         } else if (locationType == TFileType.FILE_HDFS) {
-            return hmsTable.getDfsProperties();
+            return hmsTable.getHdfsProperties();
         } else {
             return Maps.newHashMap();
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org