You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cn...@apache.org on 2021/09/03 18:46:15 UTC

[hadoop] branch trunk updated: HADOOP-15129. Datanode caches namenode DNS lookup failure and cannot startup (#3348) Co-authored-by: Karthik Palaniappan

This is an automated email from the ASF dual-hosted git repository.

cnauroth pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 1d808f5  HADOOP-15129. Datanode caches namenode DNS lookup failure and cannot startup (#3348) Co-authored-by:  Karthik Palaniappan
1d808f5 is described below

commit 1d808f59d79194f0491938c4421dc518fd3e56b8
Author: Chris Nauroth <cn...@apache.org>
AuthorDate: Fri Sep 3 18:41:56 2021 +0000

    HADOOP-15129. Datanode caches namenode DNS lookup failure and cannot startup (#3348)
    Co-authored-by:  Karthik Palaniappan
    
    Change-Id: Id079a5319e5e83939d5dcce5fb9ebe3715ee864f
---
 .../main/java/org/apache/hadoop/ipc/Client.java    | 19 ++++----
 .../test/java/org/apache/hadoop/ipc/TestIPC.java   | 52 ++++++++++++++++++++++
 2 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
index 3c737ba..712db04 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
@@ -655,6 +655,16 @@ public class Client implements AutoCloseable {
       short timeoutFailures = 0;
       while (true) {
         try {
+          if (server.isUnresolved()) {
+            // Jump into the catch block. updateAddress() will re-resolve
+            // the address if this is just a temporary DNS failure. If not,
+            // it will timeout after max ipc client retries
+            throw NetUtils.wrapException(server.getHostName(),
+                server.getPort(),
+                NetUtils.getHostname(),
+                0,
+                new UnknownHostException());
+          }
           this.socket = socketFactory.createSocket();
           this.socket.setTcpNoDelay(tcpNoDelay);
           this.socket.setKeepAlive(true);
@@ -1604,15 +1614,6 @@ public class Client implements AutoCloseable {
   private Connection getConnection(ConnectionId remoteId,
       Call call, int serviceClass, AtomicBoolean fallbackToSimpleAuth)
       throws IOException {
-    final InetSocketAddress address = remoteId.getAddress();
-    if (address.isUnresolved()) {
-      throw NetUtils.wrapException(address.getHostName(),
-          address.getPort(),
-          null,
-          0,
-          new UnknownHostException());
-    }
-
     final Consumer<Connection> removeMethod = c -> {
       final boolean removed = connections.remove(remoteId, c);
       if (removed && connections.isEmpty()) {
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java
index d486c7e..99047ff 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java
@@ -47,6 +47,7 @@ import java.net.Socket;
 import java.net.SocketAddress;
 import java.net.SocketException;
 import java.net.SocketTimeoutException;
+import java.net.UnknownHostException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
@@ -54,6 +55,7 @@ import java.util.List;
 import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.BrokenBarrierException;
+import java.util.concurrent.Callable;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.CyclicBarrier;
 import java.util.concurrent.ExecutorService;
@@ -88,6 +90,7 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.LambdaTestUtils;
 import org.apache.hadoop.test.Whitebox;
 import org.apache.hadoop.util.StringUtils;
 import org.junit.Assert;
@@ -789,6 +792,55 @@ public class TestIPC {
     }
   }
 
+  @Test(timeout=60000)
+  public void testIpcHostResolutionTimeout() throws Exception {
+    final InetSocketAddress addr = new InetSocketAddress("host.invalid", 80);
+
+    // start client
+    Client.setConnectTimeout(conf, 100);
+    final Client client = new Client(LongWritable.class, conf);
+    // set the rpc timeout to twice the MIN_SLEEP_TIME
+    try {
+      LambdaTestUtils.intercept(UnknownHostException.class,
+          new Callable<Void>() {
+            @Override
+            public Void call() throws IOException {
+              TestIPC.this.call(client, new LongWritable(RANDOM.nextLong()),
+                  addr, MIN_SLEEP_TIME * 2, conf);
+              return null;
+            }
+          });
+    } finally {
+      client.stop();
+    }
+  }
+
+  @Test(timeout=60000)
+  public void testIpcFlakyHostResolution() throws IOException {
+    // start server
+    Server server = new TestServer(5, false);
+    server.start();
+
+    // Leave host unresolved to start. Use "localhost" as opposed
+    // to local IP from NetUtils.getConnectAddress(server) to force
+    // resolution later
+    InetSocketAddress unresolvedAddr = InetSocketAddress.createUnresolved(
+        "localhost", NetUtils.getConnectAddress(server).getPort());
+
+    // start client
+    Client.setConnectTimeout(conf, 100);
+    Client client = new Client(LongWritable.class, conf);
+
+    try {
+      // Should re-resolve host and succeed
+      call(client, new LongWritable(RANDOM.nextLong()), unresolvedAddr,
+          MIN_SLEEP_TIME * 2, conf);
+    } finally {
+      client.stop();
+      server.stop();
+    }
+  }
+
   /**
    * Check that reader queueing works
    * @throws BrokenBarrierException 

---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org