You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by bo...@apache.org on 2018/09/19 21:00:19 UTC

[09/50] [abbrv] hadoop git commit: HDDS-464. Fix TestCloseContainerHandlingByClient. Contributed by Lokesh Jain.

HDDS-464. Fix TestCloseContainerHandlingByClient.
Contributed by Lokesh Jain.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/f938925b
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/f938925b
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/f938925b

Branch: refs/heads/YARN-7402
Commit: f938925bde1d481bacb2546096a1e49fe796b411
Parents: 6ff509c
Author: Anu Engineer <ae...@apache.org>
Authored: Tue Sep 18 10:44:56 2018 -0700
Committer: Anu Engineer <ae...@apache.org>
Committed: Tue Sep 18 10:45:51 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hdds/scm/XceiverClientRatis.java     |  4 ++
 .../apache/hadoop/hdds/scm/ScmConfigKeys.java   | 13 +++++++
 .../apache/hadoop/ozone/OzoneConfigKeys.java    | 14 +++++++
 .../main/java/org/apache/ratis/RatisHelper.java | 40 +++++++++++++++++---
 .../common/src/main/resources/ozone-default.xml | 19 ++++++++++
 .../server/ratis/XceiverServerRatis.java        | 13 +++++++
 .../rpc/TestCloseContainerHandlingByClient.java |  5 +--
 7 files changed, 100 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/f938925b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java
----------------------------------------------------------------------
diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java
index f0db7b5..946abfb 100644
--- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java
+++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java
@@ -208,6 +208,10 @@ public final class XceiverClientRatis extends XceiverClientSpi {
   public ContainerCommandResponseProto sendCommand(
       ContainerCommandRequestProto request) throws IOException {
     final RaftClientReply reply = sendRequest(request);
+    if (reply == null) {
+      throw new IOException(
+          String.format("Could not execute the request %s", request));
+    }
     Preconditions.checkState(reply.isSuccess());
     return ContainerCommandResponseProto.parseFrom(
         reply.getMessage().getContent());

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f938925b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
----------------------------------------------------------------------
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
index 5b25779..63f5916 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
@@ -75,6 +75,19 @@ public final class ScmConfigKeys {
   public static final TimeDuration
       DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_DEFAULT =
       TimeDuration.valueOf(3000, TimeUnit.MILLISECONDS);
+  public static final String DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY =
+      "dfs.ratis.client.request.max.retries";
+  public static final int DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_DEFAULT = 180;
+  public static final String DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY =
+      "dfs.ratis.client.request.retry.interval";
+  public static final TimeDuration
+      DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_DEFAULT =
+      TimeDuration.valueOf(100, TimeUnit.MILLISECONDS);
+  public static final String DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_KEY =
+      "dfs.ratis.server.retry-cache.timeout.duration";
+  public static final TimeDuration
+      DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT =
+      TimeDuration.valueOf(600000, TimeUnit.MILLISECONDS);
   public static final String DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY =
       "dfs.ratis.server.request.timeout.duration";
   public static final TimeDuration

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f938925b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
----------------------------------------------------------------------
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
index 54ec139..599b4e8 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
@@ -237,6 +237,20 @@ public final class OzoneConfigKeys {
   public static final TimeDuration
       DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_DEFAULT =
       ScmConfigKeys.DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_DEFAULT;
+  public static final String DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY =
+      ScmConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY;
+  public static final int DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_DEFAULT =
+      ScmConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_DEFAULT;
+  public static final String DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY =
+      ScmConfigKeys.DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY;
+  public static final TimeDuration
+      DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_DEFAULT =
+      ScmConfigKeys.DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_DEFAULT;
+  public static final String DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_KEY =
+      ScmConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_KEY;
+  public static final TimeDuration
+      DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT =
+      ScmConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT;
   public static final String DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY =
       ScmConfigKeys.DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_KEY;
   public static final TimeDuration

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f938925b/hadoop-hdds/common/src/main/java/org/apache/ratis/RatisHelper.java
----------------------------------------------------------------------
diff --git a/hadoop-hdds/common/src/main/java/org/apache/ratis/RatisHelper.java b/hadoop-hdds/common/src/main/java/org/apache/ratis/RatisHelper.java
index d851992..04bfeb2 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/ratis/RatisHelper.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/ratis/RatisHelper.java
@@ -34,6 +34,7 @@ import org.apache.ratis.retry.RetryPolicy;
 import org.apache.ratis.rpc.RpcType;
 import org.apache.ratis.shaded.com.google.protobuf.ByteString;
 import org.apache.ratis.shaded.proto.RaftProtos;
+import org.apache.ratis.util.Preconditions;
 import org.apache.ratis.util.SizeInBytes;
 import org.apache.ratis.util.TimeDuration;
 import org.slf4j.Logger;
@@ -48,6 +49,9 @@ import java.util.UUID;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
+import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_DEFAULT;
+import static org.apache.hadoop.ozone.OzoneConfigKeys.DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY;
+
 /**
  * Ratis helper methods.
  */
@@ -162,12 +166,38 @@ public interface RatisHelper {
 
   static RetryPolicy createRetryPolicy(Configuration conf) {
     int maxRetryCount =
-        conf.getInt(OzoneConfigKeys.OZONE_CLIENT_MAX_RETRIES, OzoneConfigKeys.
-            OZONE_CLIENT_MAX_RETRIES_DEFAULT);
+        conf.getInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY,
+            OzoneConfigKeys.
+                DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_DEFAULT);
     long retryInterval = conf.getTimeDuration(OzoneConfigKeys.
-            OZONE_CLIENT_RETRY_INTERVAL, OzoneConfigKeys.
-            OZONE_CLIENT_RETRY_INTERVAL_DEFAULT,
-        TimeUnit.MILLISECONDS.MILLISECONDS);
+        DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY, OzoneConfigKeys.
+        DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_DEFAULT
+        .toInt(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS);
+    long leaderElectionTimeout = conf.getTimeDuration(
+        DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY,
+        DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_DEFAULT
+            .toInt(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS);
+    long clientRequestTimeout = conf.getTimeDuration(
+        OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_KEY,
+        OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_TIMEOUT_DURATION_DEFAULT
+            .toInt(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS);
+    long retryCacheTimeout = conf.getTimeDuration(
+        OzoneConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_KEY,
+        OzoneConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT
+            .toInt(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS);
+    Preconditions
+        .assertTrue(maxRetryCount * retryInterval > 5 * leaderElectionTimeout,
+            "Please make sure dfs.ratis.client.request.max.retries * "
+                + "dfs.ratis.client.request.retry.interval > "
+                + "5 * dfs.ratis.leader.election.minimum.timeout.duration");
+    Preconditions.assertTrue(
+        maxRetryCount * (retryInterval + clientRequestTimeout)
+            < retryCacheTimeout,
+        "Please make sure "
+            + "(dfs.ratis.client.request.max.retries * "
+            + "(dfs.ratis.client.request.retry.interval + "
+            + "dfs.ratis.client.request.timeout.duration)) "
+            + "< dfs.ratis.server.retry-cache.timeout.duration");
     TimeDuration sleepDuration =
         TimeDuration.valueOf(retryInterval, TimeUnit.MILLISECONDS);
     RetryPolicy retryPolicy = RetryPolicies

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f938925b/hadoop-hdds/common/src/main/resources/ozone-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index e160f25..a74124e 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -158,6 +158,25 @@
     <description>The timeout duration for ratis client request.</description>
   </property>
   <property>
+    <name>dfs.ratis.client.request.max.retries</name>
+    <value>180</value>
+    <tag>OZONE, RATIS, MANAGEMENT</tag>
+    <description>Number of retries for ratis client request.</description>
+  </property>
+  <property>
+    <name>dfs.ratis.client.request.retry.interval</name>
+    <value>100ms</value>
+    <tag>OZONE, RATIS, MANAGEMENT</tag>
+    <description>Interval between successive retries for a ratis client request.
+    </description>
+  </property>
+  <property>
+    <name>dfs.ratis.server.retry-cache.timeout.duration</name>
+    <value>600000ms</value>
+    <tag>OZONE, RATIS, MANAGEMENT</tag>
+    <description>Retry Cache entry timeout for ratis server.</description>
+  </property>
+  <property>
     <name>dfs.ratis.server.request.timeout.duration</name>
     <value>3s</value>
     <tag>OZONE, RATIS, MANAGEMENT</tag>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f938925b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
----------------------------------------------------------------------
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
index a57997d..24ea0b9 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
@@ -182,6 +182,19 @@ public final class XceiverServerRatis implements XceiverServerSpi {
     RaftServerConfigKeys.Rpc
         .setRequestTimeout(properties, serverRequestTimeout);
 
+    // set timeout for a retry cache entry
+    timeUnit =
+        OzoneConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT
+            .getUnit();
+    duration = conf.getTimeDuration(
+        OzoneConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_KEY,
+        OzoneConfigKeys.DFS_RATIS_SERVER_RETRY_CACHE_TIMEOUT_DURATION_DEFAULT
+            .getDuration(), timeUnit);
+    final TimeDuration retryCacheTimeout =
+        TimeDuration.valueOf(duration, timeUnit);
+    RaftServerConfigKeys.RetryCache
+        .setExpiryTime(properties, retryCacheTimeout);
+
     // Set the ratis leader election timeout
     TimeUnit leaderElectionMinTimeoutUnit =
         OzoneConfigKeys.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f938925b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCloseContainerHandlingByClient.java
----------------------------------------------------------------------
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCloseContainerHandlingByClient.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCloseContainerHandlingByClient.java
index cf38982..83421b2 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCloseContainerHandlingByClient.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestCloseContainerHandlingByClient.java
@@ -55,7 +55,6 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.UUID;
-import java.util.Random;
 
 /**
  * Tests Close Container Exception handling by Ozone Client.
@@ -83,9 +82,9 @@ public class TestCloseContainerHandlingByClient {
   @BeforeClass
   public static void init() throws Exception {
     conf = new OzoneConfiguration();
-    // generate a no between 1 to 10
-    maxRetries = new Random().nextInt(10);
+    maxRetries = 100;
     conf.setInt(OzoneConfigKeys.OZONE_CLIENT_MAX_RETRIES, maxRetries);
+    conf.set(OzoneConfigKeys.OZONE_CLIENT_RETRY_INTERVAL, "200ms");
     chunkSize = (int) OzoneConsts.MB;
     blockSize = 4 * chunkSize;
     conf.setInt(ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY, chunkSize);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org