You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cn...@apache.org on 2021/09/03 18:46:15 UTC
[hadoop] branch trunk updated: HADOOP-15129. Datanode caches
namenode DNS lookup failure and cannot startup (#3348) Co-authored-by:
Karthik Palaniappan
This is an automated email from the ASF dual-hosted git repository.
cnauroth pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 1d808f5 HADOOP-15129. Datanode caches namenode DNS lookup failure and cannot startup (#3348) Co-authored-by: Karthik Palaniappan
1d808f5 is described below
commit 1d808f59d79194f0491938c4421dc518fd3e56b8
Author: Chris Nauroth <cn...@apache.org>
AuthorDate: Fri Sep 3 18:41:56 2021 +0000
HADOOP-15129. Datanode caches namenode DNS lookup failure and cannot startup (#3348)
Co-authored-by: Karthik Palaniappan
Change-Id: Id079a5319e5e83939d5dcce5fb9ebe3715ee864f
---
.../main/java/org/apache/hadoop/ipc/Client.java | 19 ++++----
.../test/java/org/apache/hadoop/ipc/TestIPC.java | 52 ++++++++++++++++++++++
2 files changed, 62 insertions(+), 9 deletions(-)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
index 3c737ba..712db04 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
@@ -655,6 +655,16 @@ public class Client implements AutoCloseable {
short timeoutFailures = 0;
while (true) {
try {
+ if (server.isUnresolved()) {
+ // Jump into the catch block. updateAddress() will re-resolve
+ // the address if this is just a temporary DNS failure. If not,
+ // it will timeout after max ipc client retries
+ throw NetUtils.wrapException(server.getHostName(),
+ server.getPort(),
+ NetUtils.getHostname(),
+ 0,
+ new UnknownHostException());
+ }
this.socket = socketFactory.createSocket();
this.socket.setTcpNoDelay(tcpNoDelay);
this.socket.setKeepAlive(true);
@@ -1604,15 +1614,6 @@ public class Client implements AutoCloseable {
private Connection getConnection(ConnectionId remoteId,
Call call, int serviceClass, AtomicBoolean fallbackToSimpleAuth)
throws IOException {
- final InetSocketAddress address = remoteId.getAddress();
- if (address.isUnresolved()) {
- throw NetUtils.wrapException(address.getHostName(),
- address.getPort(),
- null,
- 0,
- new UnknownHostException());
- }
-
final Consumer<Connection> removeMethod = c -> {
final boolean removed = connections.remove(remoteId, c);
if (removed && connections.isEmpty()) {
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java
index d486c7e..99047ff 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java
@@ -47,6 +47,7 @@ import java.net.Socket;
import java.net.SocketAddress;
import java.net.SocketException;
import java.net.SocketTimeoutException;
+import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
@@ -54,6 +55,7 @@ import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.BrokenBarrierException;
+import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.ExecutorService;
@@ -88,6 +90,7 @@ import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.LambdaTestUtils;
import org.apache.hadoop.test.Whitebox;
import org.apache.hadoop.util.StringUtils;
import org.junit.Assert;
@@ -789,6 +792,55 @@ public class TestIPC {
}
}
+ @Test(timeout=60000)
+ public void testIpcHostResolutionTimeout() throws Exception {
+ final InetSocketAddress addr = new InetSocketAddress("host.invalid", 80);
+
+ // start client
+ Client.setConnectTimeout(conf, 100);
+ final Client client = new Client(LongWritable.class, conf);
+ // set the rpc timeout to twice the MIN_SLEEP_TIME
+ try {
+ LambdaTestUtils.intercept(UnknownHostException.class,
+ new Callable<Void>() {
+ @Override
+ public Void call() throws IOException {
+ TestIPC.this.call(client, new LongWritable(RANDOM.nextLong()),
+ addr, MIN_SLEEP_TIME * 2, conf);
+ return null;
+ }
+ });
+ } finally {
+ client.stop();
+ }
+ }
+
+ @Test(timeout=60000)
+ public void testIpcFlakyHostResolution() throws IOException {
+ // start server
+ Server server = new TestServer(5, false);
+ server.start();
+
+ // Leave host unresolved to start. Use "localhost" as opposed
+ // to local IP from NetUtils.getConnectAddress(server) to force
+ // resolution later
+ InetSocketAddress unresolvedAddr = InetSocketAddress.createUnresolved(
+ "localhost", NetUtils.getConnectAddress(server).getPort());
+
+ // start client
+ Client.setConnectTimeout(conf, 100);
+ Client client = new Client(LongWritable.class, conf);
+
+ try {
+ // Should re-resolve host and succeed
+ call(client, new LongWritable(RANDOM.nextLong()), unresolvedAddr,
+ MIN_SLEEP_TIME * 2, conf);
+ } finally {
+ client.stop();
+ server.stop();
+ }
+ }
+
/**
* Check that reader queueing works
* @throws BrokenBarrierException
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org