You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/05/11 15:18:36 UTC

[GitHub] [incubator-doris] morningman commented on a diff in pull request #9512: [improvement](hive) Support read/write external hive table that authenticated by kerberos use libhdfs3

morningman commented on code in PR #9512:
URL: https://github.com/apache/incubator-doris/pull/9512#discussion_r870431183


##########
fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java:
##########
@@ -245,14 +248,24 @@ private static List<RemoteIterator<LocatedFileStatus>> getRemoteIterator(List<Pa
     private static List<RemoteIterator<LocatedFileStatus>> getRemoteIterator(Table table, Map<String, String> properties) throws DdlException {
         List<RemoteIterator<LocatedFileStatus>> iterators = new ArrayList<>();
         Configuration configuration = new Configuration(false);
+        boolean isSecurityEnabled = false;
         for (Map.Entry<String, String> entry : properties.entrySet()) {
             if (!entry.getKey().equals(HiveTable.HIVE_METASTORE_URIS)) {
                 configuration.set(entry.getKey(), entry.getValue());
             }
+            // login user from keytab
+            if (entry.getKey().contains("kerberos")) {
+                isSecurityEnabled = true;
+            }
         }
         String location = table.getSd().getLocation();
         org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(location);
         try {
+            if (isSecurityEnabled) {
+                UserGroupInformation.setConfiguration(configuration);
+                UserGroupInformation.loginUserFromKeytab(properties.get(BrokerUtil.HDFS_KERBEROS_PRINCIPAL),
+                    properties.get(BrokerUtil.HDFS_KERBEROS_KEYTAB));

Review Comment:
   the `HDFS_KERBEROS_KEYTAB` may not exist if user use `HDFS_KERBEROS_KEYTAB_WITH_BASE64`?



##########
be/src/exec/hdfs_builder.cpp:
##########
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/hdfs_builder.h"
+
+#include <fstream>
+
+#include "agent/utils.h"
+#include "common/logging.h"
+#include "util/url_coding.h"
+
+namespace doris {
+
+Status HDFSCommonBuilder::runKinit() {
+    if (!hdfs_kerberos_keytab_base64.empty()) {
+        // write keytab file
+        std::ofstream fp("./doris.keytab");
+        if (!fp) {
+            LOG(WARNING) << "create keytab file failed";
+            return Status::InternalError("Create keytab file failed");
+        }
+        fp << hdfs_kerberos_keytab_base64 << std::endl;
+        fp.close();
+        hdfs_kerberos_keytab = "./doris.keytab";
+    }
+    if (hdfs_kerberos_principal.empty() || hdfs_kerberos_keytab.empty()) {
+        return Status::InvalidArgument("Invalid hdfs_kerberos_principal or hdfs_kerberos_keytab");
+    }
+    std::stringstream ss;
+    std::string ticket_cache_path = "/tmp/krb5cc_doris";
+    ss << "kinit -c \""<< ticket_cache_path <<"\" -R -t \"" << 
+       hdfs_kerberos_keytab << "\" -k " << hdfs_kerberos_principal;
+    LOG(INFO) << "kinit command: " << ss.str();
+    hdfsBuilderSetKerbTicketCachePath(hdfs_builder, ticket_cache_path.c_str());
+    std::string msg;
+    AgentUtils util;
+    bool rc = util.exec_cmd(ss.str(), &msg);
+    if (!rc) {
+        return Status::InternalError("Kinit failed, errMsg: " + msg);
+    }
+    return Status::OK();
+}
+
+HDFSCommonBuilder createHDFSBuilder(THdfsParams hdfsParams) {

Review Comment:
   ```suggestion
   HDFSCommonBuilder createHDFSBuilder(const THdfsParams& hdfsParams) {
   ```



##########
be/src/exec/hdfs_builder.cpp:
##########
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/hdfs_builder.h"
+
+#include <fstream>
+
+#include "agent/utils.h"
+#include "common/logging.h"
+#include "util/url_coding.h"
+
+namespace doris {
+
+Status HDFSCommonBuilder::runKinit() {
+    if (!hdfs_kerberos_keytab_base64.empty()) {
+        // write keytab file
+        std::ofstream fp("./doris.keytab");

Review Comment:
   We need a certain place to write this file.
   Is this is tmp file? If yes, there is a TmpFileMgr you can use.
   If not, maybe write it in `conf/`?
   



##########
thirdparty/build-thirdparty.sh:
##########
@@ -900,7 +900,17 @@ build_gsasl() {
     check_if_source_exist $GSASL_SOURCE
     cd $TP_SOURCE_DIR/$GSASL_SOURCE
     mkdir -p $BUILD_DIR && cd $BUILD_DIR
-    ../configure --prefix=$TP_INSTALL_DIR --enable-shared=no --with-pic --with-libidn-prefix=$TP_INSTALL_DIR

Review Comment:
   `--with-libidn-prefix` why remove this?



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/HiveTable.java:
##########
@@ -94,17 +96,57 @@ private void validate(Map<String, String> properties) throws DdlException {
         copiedProps.remove(HIVE_TABLE);
 
         // check hive properties
-        // hive.metastore.uris
-        String hiveMetastoreUris = copiedProps.get(HIVE_METASTORE_URIS);
-        if (Strings.isNullOrEmpty(hiveMetastoreUris)) {
+        // hive.metastore.uris 
+        String hiveMetaStoreUris = copiedProps.get(HIVE_METASTORE_URIS);
+        if (Strings.isNullOrEmpty(hiveMetaStoreUris)) {
             throw new DdlException(String.format(PROPERTY_MISSING_MSG, HIVE_METASTORE_URIS, HIVE_METASTORE_URIS));
         }
         copiedProps.remove(HIVE_METASTORE_URIS);
-        hiveProperties.put(HIVE_METASTORE_URIS, hiveMetastoreUris);
+        hiveProperties.put(HIVE_METASTORE_URIS, hiveMetaStoreUris);
 
+        // check auth type
+        String authType = copiedProps.get(BrokerUtil.HDFS_SECURITY_AUTHENTICATION);
+        if (Strings.isNullOrEmpty(authType)) {
+            authType = AuthType.SIMPLE.getDesc();
+        }
+        if (!AuthType.isSupportedAuthType(authType)) {
+            throw new DdlException(String.format(PROPERTY_ERROR_MSG, BrokerUtil.HDFS_SECURITY_AUTHENTICATION, authType));
+        }
+        copiedProps.remove(BrokerUtil.HDFS_SECURITY_AUTHENTICATION);
+        hiveProperties.put(BrokerUtil.HDFS_SECURITY_AUTHENTICATION, authType);
+
+        if (AuthType.KERBEROS.getDesc().equals(authType)) {
+            // check principal
+            String principal = copiedProps.get(BrokerUtil.HDFS_KERBEROS_PRINCIPAL);
+            if (Strings.isNullOrEmpty(principal)) {
+                throw new DdlException(String.format(PROPERTY_MISSING_MSG, BrokerUtil.HDFS_KERBEROS_PRINCIPAL, BrokerUtil.HDFS_KERBEROS_PRINCIPAL));
+            }
+            hiveProperties.put(BrokerUtil.HDFS_KERBEROS_PRINCIPAL, principal);
+            copiedProps.remove(BrokerUtil.HDFS_KERBEROS_PRINCIPAL);
+            // check keytab
+            String keytabPath = copiedProps.get(BrokerUtil.HDFS_KERBEROS_KEYTAB);
+            String keytabContent = copiedProps.get(BrokerUtil.HDFS_KERBEROS_KEYTAB_WITH_BASE64);
+            if (Strings.isNullOrEmpty(keytabPath) && Strings.isNullOrEmpty(keytabContent)) {
+                throw new DdlException(String.format(PROPERTY_MISSING_MSG, BrokerUtil.HDFS_KERBEROS_KEYTAB, BrokerUtil.HDFS_KERBEROS_KEYTAB));
+            }
+            if (!Strings.isNullOrEmpty(keytabPath)) {
+                hiveProperties.put(BrokerUtil.HDFS_KERBEROS_KEYTAB, keytabPath);

Review Comment:
   Is it ok if user set both `keytab` and `keytab_with_base64`?



##########
docs/zh-CN/ecosystem/external-table/hive-of-doris.md:
##########
@@ -108,6 +135,16 @@ PROPERTIES (
     - `dfs.namenode.rpc-address.[nameservice ID].[name node ID]`:Name node的rpc地址,数量与namenode数量相同,与hdfs-site.xml保持一致
     - `dfs.client.failover.proxy.provider.[nameservice ID] `:HDFS客户端连接活跃namenode的java类,通常是"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
 
+- 访问开启kerberos的Hive数据源,需要为Hive外表额外配置如下 PROPERTIES 属性:
+    - `dfs.namenode.kerberos.principal`:HDFS namenode 服务的principal名称
+    - `hadoop.security.authentication`:认证方式,可选值为 simple 或者 kerberos,默认为simple
+    - `hadoop.kerberos.principal`:设置用户的 pincipal名称
+    - `hadoop.kerberos.keytab`:设置用户的keytab 本地文件路径
+
+**注意:**
+- 若要使 Doris 访问开启kerberos认证方式的hadoop集群,需要在 Doris 运行节点上部署 Kerberos 客户端 kinit,并配置 krb5.conf,填写KDC 服务信息等。
+- PROPERTIES 属性 `hadoop.kerberos.keytab` 的值需要指定 keytab 本地文件的绝对路径,并允许 Doris 进程访问该本地文件。

Review Comment:
   要说明本地是指FE 还是BE?



##########
thirdparty/patches/libhdfs3-enable-kerberos.patch:
##########
@@ -0,0 +1,13 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 56c648d..1998edb 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -2,6 +2,8 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
+
+ PROJECT(libhdfs3)
+
++SET(WITH_KERBEROS 1)

Review Comment:
   I think  this can be set in build-thirdparty.sh, no need to add this patch?



##########
be/CMakeLists.txt:
##########
@@ -647,6 +669,7 @@ set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS}
     -static-libstdc++
     -static-libgcc
     -lstdc++fs
+    -lresolv

Review Comment:
   What is this for?



##########
be/src/exec/hdfs_builder.cpp:
##########
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/hdfs_builder.h"
+
+#include <fstream>
+
+#include "agent/utils.h"
+#include "common/logging.h"
+#include "util/url_coding.h"
+
+namespace doris {
+
+Status HDFSCommonBuilder::runKinit() {
+    if (!hdfs_kerberos_keytab_base64.empty()) {
+        // write keytab file
+        std::ofstream fp("./doris.keytab");

Review Comment:
   And I think we should check if there is already a file.
   If yes, no need to write again.



##########
docs/zh-CN/ecosystem/external-table/hive-of-doris.md:
##########
@@ -108,6 +135,16 @@ PROPERTIES (
     - `dfs.namenode.rpc-address.[nameservice ID].[name node ID]`:Name node的rpc地址,数量与namenode数量相同,与hdfs-site.xml保持一致
     - `dfs.client.failover.proxy.provider.[nameservice ID] `:HDFS客户端连接活跃namenode的java类,通常是"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
 
+- 访问开启kerberos的Hive数据源,需要为Hive外表额外配置如下 PROPERTIES 属性:
+    - `dfs.namenode.kerberos.principal`:HDFS namenode 服务的principal名称
+    - `hadoop.security.authentication`:认证方式,可选值为 simple 或者 kerberos,默认为simple
+    - `hadoop.kerberos.principal`:设置用户的 pincipal名称
+    - `hadoop.kerberos.keytab`:设置用户的keytab 本地文件路径
+
+**注意:**
+- 若要使 Doris 访问开启kerberos认证方式的hadoop集群,需要在 Doris 运行节点上部署 Kerberos 客户端 kinit,并配置 krb5.conf,填写KDC 服务信息等。
+- PROPERTIES 属性 `hadoop.kerberos.keytab` 的值需要指定 keytab 本地文件的绝对路径,并允许 Doris 进程访问该本地文件。

Review Comment:
   还有,keytab_with_base64 也要说明



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org