You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by in...@apache.org on 2019/03/03 18:37:00 UTC

[hadoop] 21/45: HDFS-14161. RBF: Throw StandbyException instead of IOException so that client can retry when can not get connection. Contributed by Fei Hui.

This is an automated email from the ASF dual-hosted git repository.

inigoiri pushed a commit to branch HDFS-13891
in repository https://gitbox.apache.org/repos/asf/hadoop.git

commit 8693e6a12391afea06eeec43298f982fbab69dd2
Author: Inigo Goiri <in...@apache.org>
AuthorDate: Wed Jan 2 10:49:00 2019 -0800

    HDFS-14161. RBF: Throw StandbyException instead of IOException so that client can retry when can not get connection. Contributed by Fei Hui.
---
 .../federation/router/ConnectionNullException.java | 33 ++++++++++++++++++
 .../server/federation/router/RouterRpcClient.java  | 20 ++++++++---
 .../server/federation/FederationTestUtils.java     | 31 +++++++++++++++++
 .../router/TestRouterClientRejectOverload.java     | 40 ++++++++++++++++++++++
 4 files changed, 120 insertions(+), 4 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionNullException.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionNullException.java
new file mode 100644
index 0000000..53de602
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionNullException.java
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.federation.router;
+
+import java.io.IOException;
+
+
+/**
+ * Exception when can not get a non-null connection.
+ */
+public class ConnectionNullException extends IOException {
+
+  private static final long serialVersionUID = 1L;
+
+  public ConnectionNullException(String msg) {
+    super(msg);
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java
index a21e980..c4d3a20 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java
@@ -270,7 +270,8 @@ public class RouterRpcClient {
     }
 
     if (connection == null) {
-      throw new IOException("Cannot get a connection to " + rpcAddress);
+      throw new ConnectionNullException("Cannot get a connection to "
+          + rpcAddress);
     }
     return connection;
   }
@@ -363,9 +364,9 @@ public class RouterRpcClient {
     Map<FederationNamenodeContext, IOException> ioes = new LinkedHashMap<>();
     for (FederationNamenodeContext namenode : namenodes) {
       ConnectionContext connection = null;
+      String nsId = namenode.getNameserviceId();
+      String rpcAddress = namenode.getRpcAddress();
       try {
-        String nsId = namenode.getNameserviceId();
-        String rpcAddress = namenode.getRpcAddress();
         connection = this.getConnection(ugi, nsId, rpcAddress, protocol);
         ProxyAndInfo<?> client = connection.getClient();
         final Object proxy = client.getProxy();
@@ -394,6 +395,16 @@ public class RouterRpcClient {
           }
           // RemoteException returned by NN
           throw (RemoteException) ioe;
+        } else if (ioe instanceof ConnectionNullException) {
+          if (this.rpcMonitor != null) {
+            this.rpcMonitor.proxyOpFailureCommunicate();
+          }
+          LOG.error("Get connection for {} {} error: {}", nsId, rpcAddress,
+              ioe.getMessage());
+          // Throw StandbyException so that client can retry
+          StandbyException se = new StandbyException(ioe.getMessage());
+          se.initCause(ioe);
+          throw se;
         } else {
           // Other communication error, this is a failure
           // Communication retries are handled by the retry policy
@@ -425,7 +436,8 @@ public class RouterRpcClient {
       String addr = namenode.getRpcAddress();
       IOException ioe = entry.getValue();
       if (ioe instanceof StandbyException) {
-        LOG.error("{} {} at {} is in Standby", nsId, nnId, addr);
+        LOG.error("{} {} at {} is in Standby: {}", nsId, nnId, addr,
+            ioe.getMessage());
       } else {
         LOG.error("{} {} at {} error: \"{}\"",
             nsId, nnId, addr, ioe.getMessage());
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/FederationTestUtils.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/FederationTestUtils.java
index 5095c6b..d92edac 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/FederationTestUtils.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/FederationTestUtils.java
@@ -52,6 +52,9 @@ import org.apache.hadoop.hdfs.server.federation.resolver.ActiveNamenodeResolver;
 import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeContext;
 import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeServiceState;
 import org.apache.hadoop.hdfs.server.federation.resolver.NamenodeStatusReport;
+import org.apache.hadoop.hdfs.server.federation.router.ConnectionManager;
+import org.apache.hadoop.hdfs.server.federation.router.RouterRpcClient;
+import org.apache.hadoop.hdfs.server.federation.router.RouterRpcServer;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
@@ -60,6 +63,7 @@ import org.apache.hadoop.hdfs.server.federation.store.RouterStore;
 import org.apache.hadoop.hdfs.server.federation.store.records.RouterState;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.Whitebox;
 import org.mockito.invocation.InvocationOnMock;
@@ -343,4 +347,31 @@ public final class FederationTestUtils {
       }
     }, 100, timeout);
   }
+
+  /**
+   * Simulate that a RouterRpcServer, the ConnectionManager of its
+   * RouterRpcClient throws IOException when call getConnection. So the
+   * RouterRpcClient will get a null Connection.
+   * @param server RouterRpcServer
+   * @throws IOException
+   */
+  public static void simulateThrowExceptionRouterRpcServer(
+      final RouterRpcServer server) throws IOException {
+    RouterRpcClient rpcClient = server.getRPCClient();
+    ConnectionManager connectionManager =
+        new ConnectionManager(server.getConfig());
+    ConnectionManager spyConnectionManager = spy(connectionManager);
+    doAnswer(new Answer() {
+      @Override
+      public Object answer(InvocationOnMock invocation) throws Throwable {
+        LOG.info("Simulating connectionManager throw IOException {}",
+            invocation.getMock());
+        throw new IOException("Simulate connectionManager throw IOException");
+      }
+    }).when(spyConnectionManager).getConnection(
+        any(UserGroupInformation.class), any(String.class), any(Class.class));
+
+    Whitebox.setInternalState(rpcClient, "connectionManager",
+        spyConnectionManager);
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterClientRejectOverload.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterClientRejectOverload.java
index 3c51e13..0664159 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterClientRejectOverload.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterClientRejectOverload.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.federation.router;
 
 import static org.apache.hadoop.hdfs.server.federation.FederationTestUtils.simulateSlowNamenode;
+import static org.apache.hadoop.hdfs.server.federation.FederationTestUtils.simulateThrowExceptionRouterRpcServer;
 import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
@@ -240,4 +241,43 @@ public class TestRouterClientRejectOverload {
           num <= expOverloadMax);
     }
   }
+
+  @Test
+  public void testConnectionNullException() throws Exception {
+    setupCluster(false);
+
+    // Choose 1st router
+    RouterContext routerContext = cluster.getRouters().get(0);
+    Router router = routerContext.getRouter();
+    // This router will throw ConnectionNullException
+    simulateThrowExceptionRouterRpcServer(router.getRpcServer());
+
+    // Set dfs.client.failover.random.order false, to pick 1st router at first
+    Configuration conf = cluster.getRouterClientConf();
+    conf.setBoolean("dfs.client.failover.random.order", false);
+    // Client to access Router Cluster
+    DFSClient routerClient =
+        new DFSClient(new URI("hdfs://fed"), conf);
+
+    // Get router0 metrics
+    FederationRPCMetrics rpcMetrics0 = cluster.getRouters().get(0)
+        .getRouter().getRpcServer().getRPCMetrics();
+    // Get router1 metrics
+    FederationRPCMetrics rpcMetrics1 = cluster.getRouters().get(1)
+        .getRouter().getRpcServer().getRPCMetrics();
+
+    // Original failures
+    long originalRouter0Failures = rpcMetrics0.getProxyOpFailureCommunicate();
+    long originalRouter1Failures = rpcMetrics1.getProxyOpFailureCommunicate();
+
+    // RPC call must be successful
+    routerClient.getFileInfo("/");
+
+    // Router 0 failures will increase
+    assertEquals(originalRouter0Failures + 1,
+        rpcMetrics0.getProxyOpFailureCommunicate());
+    // Router 1 failures will not change
+    assertEquals(originalRouter1Failures,
+        rpcMetrics1.getProxyOpFailureCommunicate());
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org