You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uniffle.apache.org by ro...@apache.org on 2022/08/26 09:34:48 UTC
[incubator-uniffle] branch master updated: [FOLLOWUP] Add the conf of rss.security.hadoop.krb5-conf.file (#184)
This is an automated email from the ASF dual-hosted git repository.
roryqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git
The following commit(s) were added to refs/heads/master by this push:
new e1b9b80e [FOLLOWUP] Add the conf of rss.security.hadoop.krb5-conf.file (#184)
e1b9b80e is described below
commit e1b9b80e506eb5079c16cbb71096896f2f647b84
Author: Junfan Zhang <ju...@outlook.com>
AuthorDate: Fri Aug 26 17:34:43 2022 +0800
[FOLLOWUP] Add the conf of rss.security.hadoop.krb5-conf.file (#184)
### What changes were proposed in this pull request?
Add the conf of `rss.security.hadoop.krb5-conf.file`
### Why are the changes needed?
Follow up #53 , the krb5 conf file can be configured in `SecurityConfig`, and also should be configured in shuffle-server/coordinator conf for users.
### Does this PR introduce _any_ user-facing change?
Yes
### How was this patch tested?
Not necessary.
---
README.md | 20 +++++++++++++
.../apache/uniffle/common/config/RssBaseConf.java | 7 +++++
.../common/security/HadoopSecurityContext.java | 1 +
.../org/apache/uniffle/common/KerberizedHdfs.java | 35 ++++++++++++++++++++--
.../common/security/HadoopSecurityContextTest.java | 28 +++++++++++++++--
.../uniffle/coordinator/CoordinatorServer.java | 2 ++
.../org/apache/uniffle/server/ShuffleServer.java | 2 ++
7 files changed, 91 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index 334c0574..214cfbc4 100644
--- a/README.md
+++ b/README.md
@@ -252,6 +252,26 @@ For more details of advanced configuration, please see [Uniffle Coordinator Guid
For more details of advanced configuration, please see [Uniffle Shuffle Client Guide](https://github.com/apache/incubator-uniffle/blob/master/docs/client_guide.md).
+## Security:Hadoop kerberos authentication
+The primary goals of the Uniffle Kerberos security are:
+1. to enable secure data access for coordinator/shuffle-servers, like dynamic conf/exclude-node files stored in secured dfs cluster
+2. to write shuffle data to kerberos secured dfs cluster for shuffle-servers.
+
+The following security configurations are introduced.
+
+|Property Name|Default|Description|
+|---|---|---|
+|rss.security.hadoop.kerberos.enable|false|Whether enable access secured hadoop cluster|
+|rss.security.hadoop.kerberos.krb5-conf.file|-|The file path of krb5.conf. And only when rss.security.hadoop.kerberos.enable is enabled, the option will be valid|
+|rss.security.hadoop.kerberos.keytab.file|-|The kerberos keytab file path. And only when rss.security.hadoop.kerberos.enable is enabled, the option will be valid|
+|rss.security.hadoop.kerberos.principal|-|The kerberos keytab principal. And only when rss.security.hadoop.kerberos.enable is enabled, the option will be valid|
+|rss.security.hadoop.kerberos.relogin.interval.sec|60|The kerberos authentication relogin interval. unit: sec|
+
+* The proxy user mechanism is used to keep the data isolation in uniffle, which means the shuffle-data written by
+ shuffle-servers is owned by spark app's user. To achieve the this, the login user specified by above config should
+ be as the superuser for HDFS. For more details of related sections,
+ please see [Proxy user - Superusers Acting On Behalf Of Other Users](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/Superusers.html)
+
## LICENSE
Uniffle is under the Apache License Version 2.0. See the [LICENSE](https://github.com/apache/incubator-uniffle/blob/master/LICENSE) file for details.
diff --git a/common/src/main/java/org/apache/uniffle/common/config/RssBaseConf.java b/common/src/main/java/org/apache/uniffle/common/config/RssBaseConf.java
index 7901c5d3..94a26268 100644
--- a/common/src/main/java/org/apache/uniffle/common/config/RssBaseConf.java
+++ b/common/src/main/java/org/apache/uniffle/common/config/RssBaseConf.java
@@ -163,6 +163,13 @@ public class RssBaseConf extends RssConf {
.defaultValue(false)
.withDescription("Whether enable visiting secured hadoop cluster.");
+ public static final ConfigOption<String> RSS_SECURITY_HADOOP_KRB5_CONF_FILE = ConfigOptions
+ .key("rss.security.hadoop.kerberos.krb5-conf.file")
+ .stringType()
+ .noDefaultValue()
+ .withDescription("The file path of krb5.conf. And only when "
+ + RSS_SECURITY_HADOOP_KERBEROS_ENABLE.key() + " enabled, the option will be valid.");
+
public static final ConfigOption<String> RSS_SECURITY_HADOOP_KERBEROS_KEYTAB_FILE = ConfigOptions
.key("rss.security.hadoop.kerberos.keytab.file")
.stringType()
diff --git a/common/src/main/java/org/apache/uniffle/common/security/HadoopSecurityContext.java b/common/src/main/java/org/apache/uniffle/common/security/HadoopSecurityContext.java
index 41f5d413..35c98f39 100644
--- a/common/src/main/java/org/apache/uniffle/common/security/HadoopSecurityContext.java
+++ b/common/src/main/java/org/apache/uniffle/common/security/HadoopSecurityContext.java
@@ -56,6 +56,7 @@ public class HadoopSecurityContext implements SecurityContext {
if (StringUtils.isNotEmpty(krb5ConfPath)) {
System.setProperty(KRB5_CONF_KEY, krb5ConfPath);
+ sun.security.krb5.Config.refresh();
}
Configuration conf = new Configuration(false);
diff --git a/common/src/test/java/org/apache/uniffle/common/KerberizedHdfs.java b/common/src/test/java/org/apache/uniffle/common/KerberizedHdfs.java
index bc1fe1e2..729be6c6 100644
--- a/common/src/test/java/org/apache/uniffle/common/KerberizedHdfs.java
+++ b/common/src/test/java/org/apache/uniffle/common/KerberizedHdfs.java
@@ -22,9 +22,12 @@ import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Serializable;
+import java.net.ServerSocket;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
@@ -45,6 +48,8 @@ import org.apache.hadoop.security.ssl.KeyStoreTestUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.uniffle.common.util.RssUtils;
+
import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SASL_KEY;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY;
@@ -92,7 +97,7 @@ public class KerberizedHdfs implements Serializable {
}
private void setupDFSData() throws Exception {
- String principal = "alex/localhost";
+ String principal = "alex/" + RssUtils.getHostIp();
File keytab = new File(workDir, "alex.keytab");
kdc.createPrincipal(keytab, principal);
alexKeytab = keytab.getAbsolutePath();
@@ -157,7 +162,7 @@ public class KerberizedHdfs implements Serializable {
String krb5Conf = kdc.getKrb5conf().getAbsolutePath();
System.setProperty("java.security.krb5.conf", krb5Conf);
- String principal = "hdfs" + "/localhost";
+ String principal = "hdfs/" + RssUtils.getHostIp();
File keytab = new File(workDir, "hdfs.keytab");
kdc.createPrincipal(keytab, principal);
hdfsKeytab = keytab.getPath();
@@ -176,11 +181,20 @@ public class KerberizedHdfs implements Serializable {
hdfsConf.set("hadoop.proxyuser.hdfs.groups", "*");
hdfsConf.set("hadoop.proxyuser.hdfs.users", "*");
+ List<Integer> ports = findAvailablePorts(5);
+ LOGGER.info("Find available ports: {}", ports);
+
+ hdfsConf.set("dfs.datanode.ipc.address", "0.0.0.0:" + ports.get(0));
+ hdfsConf.set("dfs.datanode.address", "0.0.0.0:" + ports.get(1));
+ hdfsConf.set("dfs.datanode.http.address", "0.0.0.0:" + ports.get(2));
+ hdfsConf.set("dfs.datanode.http.address", "0.0.0.0:" + ports.get(3));
+
kerberizedDfsCluster = ugi.doAs(new PrivilegedExceptionAction<MiniDFSCluster>() {
@Override
public MiniDFSCluster run() throws Exception {
return new MiniDFSCluster
.Builder(hdfsConf)
+ .nameNodePort(ports.get(4))
.numDataNodes(1)
.clusterId("kerberized-cluster-1")
.checkDataNodeAddrConfig(true)
@@ -216,6 +230,23 @@ public class KerberizedHdfs implements Serializable {
UserGroupInformation.reset();
}
+ private List<Integer> findAvailablePorts(int num) throws IOException {
+ List<ServerSocket> sockets = new ArrayList<>();
+ List<Integer> ports = new ArrayList<>();
+
+ for (int i = 0; i < num; i++) {
+ ServerSocket socket = new ServerSocket(0);
+ ports.add(socket.getLocalPort());
+ sockets.add(socket);
+ }
+
+ for (ServerSocket socket : sockets) {
+ socket.close();
+ }
+
+ return ports;
+ }
+
public String getSchemeAndAuthorityPrefix() {
return String.format("hdfs://localhost:%s/", kerberizedDfsCluster.getNameNodePort());
}
diff --git a/common/src/test/java/org/apache/uniffle/common/security/HadoopSecurityContextTest.java b/common/src/test/java/org/apache/uniffle/common/security/HadoopSecurityContextTest.java
index 9a737558..ecbffb98 100644
--- a/common/src/test/java/org/apache/uniffle/common/security/HadoopSecurityContextTest.java
+++ b/common/src/test/java/org/apache/uniffle/common/security/HadoopSecurityContextTest.java
@@ -41,8 +41,6 @@ public class HadoopSecurityContextTest extends KerberizedHdfsBase {
@Test
public void testSecuredCallable() throws Exception {
- String val = System.getProperty("java.security.krb5.conf");
-
HadoopSecurityContext context = new HadoopSecurityContext(
null,
kerberizedHdfs.getHdfsKeytab(),
@@ -119,5 +117,31 @@ public class HadoopSecurityContextTest extends KerberizedHdfsBase {
} catch (Exception e) {
assertTrue(e.getMessage().contains("refreshIntervalSec must be not negative"));
}
+
+ // case4: lack krb5 conf, should throw exception
+ String krbConfFilePath = System.getProperty("java.security.krb5.conf");
+ System.clearProperty("java.security.krb5.conf");
+ try {
+ HadoopSecurityContext context = new HadoopSecurityContext(
+ null,
+ kerberizedHdfs.getHdfsKeytab(),
+ kerberizedHdfs.getHdfsPrincipal(),
+ 100
+ );
+ fail();
+ } catch (Exception e) {
+ assertTrue(e.getMessage().contains("Cannot locate KDC"));
+ }
+
+ // case5: After setting the krb5 conf, it should pass
+ HadoopSecurityContext context = new HadoopSecurityContext(
+ krbConfFilePath,
+ kerberizedHdfs.getHdfsKeytab(),
+ kerberizedHdfs.getHdfsPrincipal(),
+ 100
+ );
+
+ // recover System property of krb5 conf
+ System.setProperty("java.security.krb5.conf", krbConfFilePath);
}
}
diff --git a/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java b/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java
index 8ce10eb1..cc8c9e84 100644
--- a/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java
+++ b/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java
@@ -36,6 +36,7 @@ import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_K
import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_KEYTAB_FILE;
import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_PRINCIPAL;
import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_RELOGIN_INTERVAL_SEC;
+import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KRB5_CONF_FILE;
/**
* The main entrance of coordinator service
@@ -125,6 +126,7 @@ public class CoordinatorServer {
SecurityConfig securityConfig = null;
if (coordinatorConf.getBoolean(RSS_SECURITY_HADOOP_KERBEROS_ENABLE)) {
securityConfig = SecurityConfig.newBuilder()
+ .krb5ConfPath(coordinatorConf.getString(RSS_SECURITY_HADOOP_KRB5_CONF_FILE))
.keytabFilePath(coordinatorConf.getString(RSS_SECURITY_HADOOP_KERBEROS_KEYTAB_FILE))
.principal(coordinatorConf.getString(RSS_SECURITY_HADOOP_KERBEROS_PRINCIPAL))
.reloginIntervalSec(coordinatorConf.getLong(RSS_SECURITY_HADOOP_KERBEROS_RELOGIN_INTERVAL_SEC))
diff --git a/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java b/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
index 262673d0..c0e92aa0 100644
--- a/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
+++ b/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
@@ -49,6 +49,7 @@ import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_K
import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_KEYTAB_FILE;
import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_PRINCIPAL;
import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KERBEROS_RELOGIN_INTERVAL_SEC;
+import static org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KRB5_CONF_FILE;
/**
* Server that manages startup/shutdown of a {@code Greeter} server.
@@ -155,6 +156,7 @@ public class ShuffleServer {
SecurityConfig securityConfig = null;
if (shuffleServerConf.getBoolean(RSS_SECURITY_HADOOP_KERBEROS_ENABLE)) {
securityConfig = securityConfig.newBuilder()
+ .krb5ConfPath(shuffleServerConf.getString(RSS_SECURITY_HADOOP_KRB5_CONF_FILE))
.keytabFilePath(shuffleServerConf.getString(RSS_SECURITY_HADOOP_KERBEROS_KEYTAB_FILE))
.principal(shuffleServerConf.getString(RSS_SECURITY_HADOOP_KERBEROS_PRINCIPAL))
.reloginIntervalSec(shuffleServerConf.getLong(RSS_SECURITY_HADOOP_KERBEROS_RELOGIN_INTERVAL_SEC))