You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uniffle.apache.org by ro...@apache.org on 2022/08/26 09:34:48 UTC

[incubator-uniffle] branch master updated: [FOLLOWUP] Add the conf of rss.security.hadoop.krb5-conf.file (#184)

This is an automated email from the ASF dual-hosted git repository.

roryqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
     new e1b9b80e [FOLLOWUP] Add the conf of rss.security.hadoop.krb5-conf.file (#184)
e1b9b80e is described below

commit e1b9b80e506eb5079c16cbb71096896f2f647b84
Author: Junfan Zhang <ju...@outlook.com>
AuthorDate: Fri Aug 26 17:34:43 2022 +0800

    [FOLLOWUP] Add the conf of rss.security.hadoop.krb5-conf.file (#184)
    
    ### What changes were proposed in this pull request?
    Add the conf of `rss.security.hadoop.krb5-conf.file`
    
    ### Why are the changes needed?
    Follow up #53 , the krb5 conf file can be configured in `SecurityConfig`, and also should be configured in shuffle-server/coordinator conf for users.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes
    
    ### How was this patch tested?
    Not necessary.
---
 README.md                                          | 20 +++++++++++++
 .../apache/uniffle/common/config/RssBaseConf.java  |  7 +++++
 .../common/security/HadoopSecurityContext.java     |  1 +
 .../org/apache/uniffle/common/KerberizedHdfs.java  | 35 ++++++++++++++++++++--
 .../common/security/HadoopSecurityContextTest.java | 28 +++++++++++++++--
 .../uniffle/coordinator/CoordinatorServer.java     |  2 ++
 .../org/apache/uniffle/server/ShuffleServer.java   |  2 ++
 7 files changed, 91 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 334c0574..214cfbc4 100644
--- a/README.md
+++ b/README.md
@@ -252,6 +252,26 @@ For more details of advanced configuration, please see [Uniffle Coordinator Guid
 
 For more details of advanced configuration, please see [Uniffle Shuffle Client Guide](https://github.com/apache/incubator-uniffle/blob/master/docs/client_guide.md).
 
+## Security:Hadoop kerberos authentication
+The primary goals of the Uniffle Kerberos security are:
+1. to enable secure data access for coordinator/shuffle-servers, like dynamic conf/exclude-node files stored in secured dfs cluster
+2. to write shuffle data to kerberos secured dfs cluster for shuffle-servers.
+
+The following security configurations are introduced.
+
+|Property Name|Default|Description|
+|---|---|---|
+|rss.security.hadoop.kerberos.enable|false|Whether enable access secured hadoop cluster|
+|rss.security.hadoop.kerberos.krb5-conf.file|-|The file path of krb5.conf. And only when rss.security.hadoop.kerberos.enable is enabled, the option will be valid|
+|rss.security.hadoop.kerberos.keytab.file|-|The kerberos keytab file path. And only when rss.security.hadoop.kerberos.enable is enabled, the option will be valid|
+|rss.security.hadoop.kerberos.principal|-|The kerberos keytab principal. And only when rss.security.hadoop.kerberos.enable is enabled, the option will be valid|
+|rss.security.hadoop.kerberos.relogin.interval.sec|60|The kerberos authentication relogin interval. unit: sec|
+
+* The proxy user mechanism is used to keep the data isolation in uniffle, which means the shuffle-data written by 
+  shuffle-servers is owned by spark app's user. To achieve the this, the login user specified by above config should 
+  be as the superuser for HDFS. For more details of related sections, 
+  please see [Proxy user - Superusers Acting On Behalf Of Other Users](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/Superusers.html)
+
 ## LICENSE
 
 Uniffle is under the Apache License Version 2.0. See the [LICENSE](https://github.com/apache/incubator-uniffle/blob/master/LICENSE) file for details.
diff --git a/common/src/main/java/org/apache/uniffle/common/config/RssBaseConf.java b/common/src/main/java/org/apache/uniffle/common/config/RssBaseConf.java
index 7901c5d3..94a26268 100644
--- a/common/src/main/java/org/apache/uniffle/common/config/RssBaseConf.java
+++ b/common/src/main/java/org/apache/uniffle/common/config/RssBaseConf.java
@@ -163,6 +163,13 @@ public class RssBaseConf extends RssConf {
       .defaultValue(false)
       .withDescription("Whether enable visiting secured hadoop cluster.");
 
+  public static final ConfigOption<String> RSS_SECURITY_HADOOP_KRB5_CONF_FILE = ConfigOptions
+      .key("rss.security.hadoop.kerberos.krb5-conf.file")
+      .stringType()
+      .noDefaultValue()
+      .withDescription("The file path of krb5.conf. And only when "
+          + RSS_SECURITY_HADOOP_KERBEROS_ENABLE.key() + " enabled, the option will be valid.");
+
   public static final ConfigOption<String> RSS_SECURITY_HADOOP_KERBEROS_KEYTAB_FILE = ConfigOptions
       .key("rss.security.hadoop.kerberos.keytab.file")
       .stringType()
diff --git a/common/src/main/java/org/apache/uniffle/common/security/HadoopSecurityContext.java b/common/src/main/java/org/apache/uniffle/common/security/HadoopSecurityContext.java
index 41f5d413..35c98f39 100644
--- a/common/src/main/java/org/apache/uniffle/common/security/HadoopSecurityContext.java
+++ b/common/src/main/java/org/apache/uniffle/common/security/HadoopSecurityContext.java
@@ -56,6 +56,7 @@ public class HadoopSecurityContext implements SecurityContext {
 
     if (StringUtils.isNotEmpty(krb5ConfPath)) {
       System.setProperty(KRB5_CONF_KEY, krb5ConfPath);
+      sun.security.krb5.Config.refresh();
     }
 
     Configuration conf = new Configuration(false);
diff --git a/common/src/test/java/org/apache/uniffle/common/KerberizedHdfs.java b/common/src/test/java/org/apache/uniffle/common/KerberizedHdfs.java
index bc1fe1e2..729be6c6 100644
--- a/common/src/test/java/org/apache/uniffle/common/KerberizedHdfs.java
+++ b/common/src/test/java/org/apache/uniffle/common/KerberizedHdfs.java
@@ -22,9 +22,12 @@ import java.io.File;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.Serializable;
+import java.net.ServerSocket;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Properties;
 
 import org.apache.hadoop.conf.Configuration;
@@ -45,6 +48,8 @@ import org.apache.hadoop.security.ssl.KeyStoreTestUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.uniffle.common.util.RssUtils;
+
 import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SASL_KEY;
 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY;
@@ -92,7 +97,7 @@ public class KerberizedHdfs implements Serializable {
   }
 
   private void setupDFSData() throws Exception {
-    String principal = "alex/localhost";
+    String principal = "alex/" + RssUtils.getHostIp();
     File keytab = new File(workDir, "alex.keytab");
     kdc.createPrincipal(keytab, principal);
     alexKeytab = keytab.getAbsolutePath();
@@ -157,7 +162,7 @@ public class KerberizedHdfs implements Serializable {
     String krb5Conf = kdc.getKrb5conf().getAbsolutePath();
     System.setProperty("java.security.krb5.conf", krb5Conf);
 
-    String principal = "hdfs" + "/localhost";
+    String principal = "hdfs/" + RssUtils.getHostIp();
     File keytab = new File(workDir, "hdfs.keytab");
     kdc.createPrincipal(keytab, principal);
     hdfsKeytab = keytab.getPath();
@@ -176,11 +181,20 @@ public class KerberizedHdfs implements Serializable {
     hdfsConf.set("hadoop.proxyuser.hdfs.groups", "*");
     hdfsConf.set("hadoop.proxyuser.hdfs.users", "*");
 
+    List<Integer> ports = findAvailablePorts(5);
+    LOGGER.info("Find available ports: {}", ports);
+
+    hdfsConf.set("dfs.datanode.ipc.address", "0.0.0.0:" + ports.get(0));
+    hdfsConf.set("dfs.datanode.address", "0.0.0.0:" + ports.get(1));
+    hdfsConf.set("dfs.datanode.http.address", "0.0.0.0:" + ports.get(2));
+    hdfsConf.set("dfs.datanode.http.address", "0.0.0.0:" + ports.get(3));
+
     kerberizedDfsCluster = ugi.doAs(new PrivilegedExceptionAction<MiniDFSCluster>() {
       @Override
       public MiniDFSCluster run() throws Exception {
         return new MiniDFSCluster
             .Builder(hdfsConf)
+            .nameNodePort(ports.get(4))
             .numDataNodes(1)
             .clusterId("kerberized-cluster-1")
             .checkDataNodeAddrConfig(true)
@@ -216,6 +230,23 @@ public class KerberizedHdfs implements Serializable {
     UserGroupInformation.reset();
   }
 
+  private List<Integer> findAvailablePorts(int num) throws IOException {
+    List<ServerSocket> sockets = new ArrayList<>();
+    List<Integer> ports = new ArrayList<>();
+
+    for (int i = 0; i < num; i++) {
+      ServerSocket socket = new ServerSocket(0);
+      ports.add(socket.getLocalPort());
+      sockets.add(socket);
+    }
+
+    for (ServerSocket socket : sockets) {
+      socket.close();
+    }
+
+    return ports;
+  }
+
   public String getSchemeAndAuthorityPrefix() {
     return String.format("hdfs://localhost:%s/", kerberizedDfsCluster.getNameNodePort());
   }
diff --git a/common/src/test/java/org/apache/uniffle/common/security/HadoopSecurityContextTest.java b/common/src/test/java/org/apache/uniffle/common/security/HadoopSecurityContextTest.java
index 9a737558..ecbffb98 100644
--- a/common/src/test/java/org/apache/uniffle/common/security/HadoopSecurityContextTest.java
+++ b/common/src/test/java/org/apache/uniffle/common/security/HadoopSecurityContextTest.java
@@ -41,8 +41,6 @@ public class HadoopSecurityContextTest extends KerberizedHdfsBase {
 
   @Test
   public void testSecuredCallable() throws Exception {
-    String val = System.getProperty("java.security.krb5.conf");
-
     HadoopSecurityContext context = new HadoopSecurityContext(
         null,
         kerberizedHdfs.getHdfsKeytab(),
@@ -119,5 +117,31 @@ public class HadoopSecurityContextTest extends KerberizedHdfsBase {
     } catch (Exception e) {
       assertTrue(e.getMessage().contains("refreshIntervalSec must be not negative"));
     }
+
+    // case4: lack krb5 conf, should throw exception
+    String krbConfFilePath = System.getProperty("java.security.krb5.conf");
+    System.clearProperty("java.security.krb5.conf");
+    try {
+      HadoopSecurityContext context = new HadoopSecurityContext(
+          null,
+          kerberizedHdfs.getHdfsKeytab(),
+          kerberizedHdfs.getHdfsPrincipal(),
+          100
+      );
+      fail();
+    } catch (Exception e) {
+      assertTrue(e.getMessage().contains("Cannot locate KDC"));
+    }
+
+    // case5: After setting the krb5 conf, it should pass
+    HadoopSecurityContext context = new HadoopSecurityContext(
+        krbConfFilePath,
+        kerberizedHdfs.getHdfsKeytab(),
+        kerberizedHdfs.getHdfsPrincipal(),
+        100
+    );
+
+    // recover System property of krb5 conf
+    System.setProperty("java.security.krb5.conf", krbConfFilePath);
   }
 }
diff --git a/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java b/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java
index 8ce10eb1..cc8c9e84 100644
--- a/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java
+++ b/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java
@@ -36,6 +36,7 @@ import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_K
 import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_KEYTAB_FILE;
 import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_PRINCIPAL;
 import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_RELOGIN_INTERVAL_SEC;
+import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KRB5_CONF_FILE;
 
 /**
  * The main entrance of coordinator service
@@ -125,6 +126,7 @@ public class CoordinatorServer {
     SecurityConfig securityConfig = null;
     if (coordinatorConf.getBoolean(RSS_SECURITY_HADOOP_KERBEROS_ENABLE)) {
       securityConfig = SecurityConfig.newBuilder()
+          .krb5ConfPath(coordinatorConf.getString(RSS_SECURITY_HADOOP_KRB5_CONF_FILE))
           .keytabFilePath(coordinatorConf.getString(RSS_SECURITY_HADOOP_KERBEROS_KEYTAB_FILE))
           .principal(coordinatorConf.getString(RSS_SECURITY_HADOOP_KERBEROS_PRINCIPAL))
           .reloginIntervalSec(coordinatorConf.getLong(RSS_SECURITY_HADOOP_KERBEROS_RELOGIN_INTERVAL_SEC))
diff --git a/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java b/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
index 262673d0..c0e92aa0 100644
--- a/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
+++ b/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
@@ -49,6 +49,7 @@ import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_K
 import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_KEYTAB_FILE;
 import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_PRINCIPAL;
 import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_RELOGIN_INTERVAL_SEC;
+import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KRB5_CONF_FILE;
 
 /**
  * Server that manages startup/shutdown of a {@code Greeter} server.
@@ -155,6 +156,7 @@ public class ShuffleServer {
     SecurityConfig securityConfig = null;
     if (shuffleServerConf.getBoolean(RSS_SECURITY_HADOOP_KERBEROS_ENABLE)) {
       securityConfig = securityConfig.newBuilder()
+          .krb5ConfPath(shuffleServerConf.getString(RSS_SECURITY_HADOOP_KRB5_CONF_FILE))
           .keytabFilePath(shuffleServerConf.getString(RSS_SECURITY_HADOOP_KERBEROS_KEYTAB_FILE))
           .principal(shuffleServerConf.getString(RSS_SECURITY_HADOOP_KERBEROS_PRINCIPAL))
           .reloginIntervalSec(shuffleServerConf.getLong(RSS_SECURITY_HADOOP_KERBEROS_RELOGIN_INTERVAL_SEC))