You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by om...@apache.org on 2022/03/23 22:08:42 UTC
[hadoop] branch branch-2.10 updated: HDFS-13248: Namenode needs to use the actual client IP when going through the RBF proxy. There is a new configuration knob dfs.namenode.ip-proxy-users that configures the list of users than can set their client ip address using the client context.
This is an automated email from the ASF dual-hosted git repository.
omalley pushed a commit to branch branch-2.10
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-2.10 by this push:
new ce208b1 HDFS-13248: Namenode needs to use the actual client IP when going through the RBF proxy. There is a new configuration knob dfs.namenode.ip-proxy-users that configures the list of users than can set their client ip address using the client context.
ce208b1 is described below
commit ce208b1a1e220dfc78c20c85ccd6beea6f7f5e6e
Author: Owen O'Malley <oo...@linkedin.com>
AuthorDate: Thu Mar 17 14:06:55 2022 -0700
HDFS-13248: Namenode needs to use the actual client IP when going through the
RBF proxy. There is a new configuration knob dfs.namenode.ip-proxy-users that configures
the list of users than can set their client ip address using the client context.
Fixes #4081
---
.../java/org/apache/hadoop/ipc/CallerContext.java | 6 ++
.../hadoop/security/UserGroupInformation.java | 14 +++-
.../src/site/markdown/HDFSRouterFederation.md | 14 ++++
.../java/org/apache/hadoop/hdfs/DFSConfigKeys.java | 2 +
.../hdfs/server/namenode/NameNodeRpcServer.java | 31 ++++++-
.../src/main/resources/hdfs-default.xml | 11 +++
.../server/namenode/TestNameNodeRpcServer.java | 97 +++++++++++++++++++++-
7 files changed, 172 insertions(+), 3 deletions(-)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallerContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallerContext.java
index e984821..322cc89 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallerContext.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallerContext.java
@@ -37,6 +37,11 @@ import java.util.Arrays;
@InterfaceStability.Evolving
public class CallerContext {
public static final Charset SIGNATURE_ENCODING = StandardCharsets.UTF_8;
+
+ // field names
+ public static final String CLIENT_IP_STR = "clientIp";
+ public static final String CLIENT_PORT_STR = "clientPort";
+
/** The caller context.
*
* It will be truncated if it exceeds the maximum allowed length in
@@ -110,6 +115,7 @@ public class CallerContext {
/** The caller context builder. */
public static final class Builder {
+ public static final String KEY_VALUE_SEPARATOR = ":";
private final String context;
private byte[] signature;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
index bc8b47a..99a3332 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
@@ -1559,7 +1559,19 @@ public class UserGroupInformation {
return null;
}
-
+ /**
+ * If this is a proxy user, get the real user. Otherwise, return
+ * this user.
+ * @param user the user to check
+ * @return the real user or self
+ */
+ public static UserGroupInformation getRealUserOrSelf(UserGroupInformation user) {
+ if (user == null) {
+ return null;
+ }
+ UserGroupInformation real = user.getRealUser();
+ return real != null ? real : user;
+ }
/**
* This class is used for storing the groups for testing. It stores a local
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md
index c5bf5e1..4ff383a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md
@@ -286,6 +286,20 @@ With this setting a user can interact with `ns-fed` as a regular namespace:
This federated namespace can also be set as the default one at **core-site.xml** using `fs.defaultFS`.
+NameNode configuration
+--------------------
+
+In order for the system to support data-locality, you must configure your NameNodes so that they will trust the routers to supply the user's client IP address. `dfs.namenode.ip-proxy-users` defines a comma separated list of users that are allowed to provide the client ip address via the caller context.
+
+```xml
+<configuration>
+ <property>
+ <name>dfs.namenode.ip-proxy-users</name>
+ <value>hdfs</value>
+ </property>
+</configuration>
+```
+
Router configuration
--------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 980ce9b..77e9f98 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -645,6 +645,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final boolean DFS_SUPPORT_APPEND_DEFAULT = true;
public static final String DFS_HTTPS_ENABLE_KEY = "dfs.https.enable";
public static final boolean DFS_HTTPS_ENABLE_DEFAULT = false;
+ // List of users that can override their client ip
+ public static final String DFS_NAMENODE_IP_PROXY_USERS = "dfs.namenode.ip-proxy-users";
public static final String DFS_HTTP_POLICY_KEY = "dfs.http.policy";
public static final String DFS_HTTP_POLICY_DEFAULT = HttpConfig.Policy.HTTP_ONLY.name();
public static final String DFS_DEFAULT_CHUNK_VIEW_SIZE_KEY = "dfs.default.chunk.view.size";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index a14ff23..8f25df4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -21,6 +21,7 @@ import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENG
import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HANDLER_COUNT_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HANDLER_COUNT_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_IP_PROXY_USERS;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIFELINE_HANDLER_COUNT_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIFELINE_HANDLER_RATIO_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIFELINE_HANDLER_RATIO_KEY;
@@ -47,6 +48,8 @@ import java.util.Set;
import java.util.concurrent.Callable;
import com.google.common.collect.Lists;
+import org.apache.commons.lang3.ArrayUtils;
+import org.apache.hadoop.ipc.CallerContext;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience;
@@ -253,6 +256,9 @@ public class NameNodeRpcServer implements NamenodeProtocols {
private final String minimumDataNodeVersion;
+ // Users who can override the client ip
+ private final String[] ipProxyUsers;
+
public NameNodeRpcServer(Configuration conf, NameNode nn)
throws IOException {
this.nn = nn;
@@ -263,6 +269,7 @@ public class NameNodeRpcServer implements NamenodeProtocols {
int handlerCount =
conf.getInt(DFS_NAMENODE_HANDLER_COUNT_KEY,
DFS_NAMENODE_HANDLER_COUNT_DEFAULT);
+ ipProxyUsers = conf.getStrings(DFS_NAMENODE_IP_PROXY_USERS);
RPC.setProtocolEngine(conf, ClientNamenodeProtocolPB.class,
ProtobufRpcEngine.class);
@@ -1770,7 +1777,29 @@ public class NameNodeRpcServer implements NamenodeProtocols {
}
}
- private static String getClientMachine() {
+ private String getClientMachine() {
+ if (ipProxyUsers != null) {
+ // Get the real user (or effective if it isn't a proxy user)
+ UserGroupInformation user =
+ UserGroupInformation.getRealUserOrSelf(Server.getRemoteUser());
+ if (user != null &&
+ ArrayUtils.contains(ipProxyUsers, user.getShortUserName())) {
+ CallerContext context = CallerContext.getCurrent();
+ if (context != null && context.isContextValid()) {
+ String cc = context.getContext();
+ // if the rpc has a caller context of "clientIp:1.2.3.4,CLI",
+ // return "1.2.3.4" as the client machine.
+ String key = CallerContext.CLIENT_IP_STR +
+ CallerContext.Builder.KEY_VALUE_SEPARATOR;
+ int posn = cc.indexOf(key);
+ if (posn != -1) {
+ posn += key.length();
+ int end = cc.indexOf(",", posn);
+ return end == -1 ? cc.substring(posn) : cc.substring(posn, end);
+ }
+ }
+ }
+ }
String clientMachine = Server.getRemoteAddress();
if (clientMachine == null) { //not a RPC client
clientMachine = "";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 2cbd486..87275059 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -485,6 +485,17 @@
</property>
<property>
+ <name>dfs.namenode.ip-proxy-users</name>
+ <value></value>
+ <description>A comma separated list of user names that are allowed by the
+ NameNode to specify a different client IP address in the caller context.
+ This is used by Router-Based Federation (RBF) to provide the actual client's
+ IP address to the NameNode, which is critical to preserve data locality when
+ using RBF. If you are using RBF, add the user that runs the routers.
+ </description>
+</property>
+
+<property>
<name>dfs.namenode.acls.enabled</name>
<value>false</value>
<description>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRpcServer.java
index ada93e8..7fc9c8b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRpcServer.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -24,14 +24,25 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_IP_PROXY_USERS;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_BIND_HOST_KEY;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.ipc.CallerContext;
+import org.apache.hadoop.security.UserGroupInformation;
import org.junit.Test;
public class TestNameNodeRpcServer {
@@ -59,5 +70,89 @@ public class TestNameNodeRpcServer {
conf.unset(DFS_NAMENODE_RPC_BIND_HOST_KEY);
}
}
+
+ /**
+ * Get the preferred DataNode location for the first block of the
+ * given file.
+ * @param fs The file system to use
+ * @param p The path to use
+ * @return the preferred host to get the data
+ */
+ private static String getPreferredLocation(DistributedFileSystem fs,
+ Path p) throws IOException{
+ // Use getLocatedBlocks because it is the basis for HDFS open,
+ // but provides visibility into which host will be used.
+ LocatedBlocks blocks = fs.getClient()
+ .getLocatedBlocks(p.toUri().getPath(), 0);
+ return blocks.get(0).getLocations()[0].getHostName();
+ }
+
+ // Because of the randomness of the NN assigning DN, we run multiple
+ // trials. 1/3^20=3e-10, so that should be good enough.
+ static final int ITERATIONS_TO_USE = 20;
+
+ /**
+ * A test to make sure that if an authorized user adds "clientIp:" to their
+ * caller context, it will be used to make locality decisions on the NN.
+ */
+ @Test
+ public void testNamenodeRpcClientIpProxy() throws IOException {
+ Configuration conf = new HdfsConfiguration();
+
+ conf.set(DFS_NAMENODE_IP_PROXY_USERS, "fake_joe");
+ // Make 3 nodes & racks so that we have a decent shot of detecting when
+ // our change overrides the random choice of datanode.
+ final String[] racks = new String[]{"/rack1", "/rack2", "/rack3"};
+ final String[] hosts = new String[]{"node1", "node2", "node3"};
+ MiniDFSCluster cluster = null;
+ final CallerContext original = CallerContext.getCurrent();
+
+ try {
+ cluster = new MiniDFSCluster.Builder(conf)
+ .racks(racks).hosts(hosts).numDataNodes(hosts.length)
+ .build();
+ cluster.waitActive();
+ DistributedFileSystem fs = cluster.getFileSystem();
+ // Write a sample file
+ final Path fooName = fs.makeQualified(new Path("/foo"));
+ FSDataOutputStream stream = fs.create(fooName);
+ stream.write("Hello world!\n".getBytes(StandardCharsets.UTF_8));
+ stream.close();
+ // Set the caller context to set the ip address
+ StringBuilder contextStr = new StringBuilder("test,")
+ .append(CallerContext.CLIENT_IP_STR)
+ .append(CallerContext.Builder.KEY_VALUE_SEPARATOR)
+ .append(hosts[0]);
+ CallerContext.setCurrent(new CallerContext.Builder(contextStr.toString()).build());
+ // Should get a random mix of DataNodes since we aren't joe.
+ for (int trial = 0; trial < ITERATIONS_TO_USE; ++trial) {
+ String host = getPreferredLocation(fs, fooName);
+ if (!hosts[0].equals(host)) {
+ // found some other host, so things are good
+ break;
+ } else if (trial == ITERATIONS_TO_USE - 1) {
+ assertNotEquals("Failed to get non-node1", hosts[0], host);
+ }
+ }
+ // Run as fake joe to authorize the test
+ UserGroupInformation joe =
+ UserGroupInformation.createUserForTesting("fake_joe",
+ new String[]{"fake_group"});
+ DistributedFileSystem joeFs =
+ (DistributedFileSystem) DFSTestUtil.getFileSystemAs(joe, conf);
+ // As joe, we should get all node1.
+ for (int trial = 0; trial < ITERATIONS_TO_USE; ++trial) {
+ String host = getPreferredLocation(joeFs, fooName);
+ assertEquals("Trial " + trial + " failed", hosts[0], host);
+ }
+ } finally {
+ CallerContext.setCurrent(original);
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ // Reset the config
+ conf.unset(DFS_NAMENODE_IP_PROXY_USERS);
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org