You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ha...@apache.org on 2019/04/26 17:39:16 UTC

[hadoop] branch trunk updated: HDDS-1403. KeyOutputStream writes fails after max retries while writing to a closed container (#753)

This is an automated email from the ASF dual-hosted git repository.

hanishakoneru pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 3758270  HDDS-1403. KeyOutputStream writes fails after max retries while writing to a closed container (#753)
3758270 is described below

commit 37582705fa6697b744b301d999c9952194e9fc40
Author: Hanisha Koneru <ko...@gmail.com>
AuthorDate: Fri Apr 26 10:39:03 2019 -0700

    HDDS-1403. KeyOutputStream writes fails after max retries while writing to a closed container (#753)
---
 .../org/apache/hadoop/ozone/OzoneConfigKeys.java   | 11 ++++++++---
 .../common/src/main/resources/ozone-default.xml    | 11 ++++++++++-
 .../hadoop/ozone/client/OzoneClientUtils.java      |  5 +++--
 .../hadoop/ozone/client/io/KeyOutputStream.java    | 14 +++++++++++---
 .../apache/hadoop/ozone/client/rpc/RpcClient.java  | 10 +++++++++-
 .../apache/hadoop/ozone/web/utils/OzoneUtils.java  | 22 ++++++++++++++++++++++
 .../web/storage/DistributedStorageHandler.java     |  6 ++++++
 7 files changed, 69 insertions(+), 10 deletions(-)

diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
index a28ae3a..adcd090 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
@@ -27,7 +27,9 @@ import org.apache.hadoop.hdds.scm.ScmConfigKeys;
 import org.apache.ratis.proto.RaftProtos.ReplicationLevel;
 import org.apache.ratis.util.TimeDuration;
 
-/**
+import java.util.concurrent.TimeUnit;
+
+ /**
  * This class contains constants for configuration keys used in Ozone.
  */
 @InterfaceAudience.Public
@@ -140,8 +142,11 @@ public final class OzoneConfigKeys {
 
   public static final String OZONE_CLIENT_MAX_RETRIES =
       "ozone.client.max.retries";
-  public static final int OZONE_CLIENT_MAX_RETRIES_DEFAULT = 5;
-
+  public static final int OZONE_CLIENT_MAX_RETRIES_DEFAULT = 100;
+  public static final String OZONE_CLIENT_RETRY_INTERVAL =
+      "ozone.client.retry.interval";
+  public static final TimeDuration OZONE_CLIENT_RETRY_INTERVAL_DEFAULT =
+      TimeDuration.valueOf(0, TimeUnit.MILLISECONDS);
 
   // This defines the overall connection limit for the connection pool used in
   // RestClient.
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 162c93f..3eb0157 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -429,13 +429,22 @@
   </property>
   <property>
     <name>ozone.client.max.retries</name>
-    <value>5</value>
+    <value>100</value>
     <tag>OZONE, CLIENT</tag>
     <description>Maximum number of retries by Ozone Client on encountering
       exception while writing a key.
     </description>
   </property>
   <property>
+    <name>ozone.client.retry.interval</name>
+    <value>0ms</value>
+    <tag>OZONE, CLIENT</tag>
+    <description>Indicates the time duration a client will wait before
+      retrying a write key request on encountering an exception. By default
+      there is no wait.
+    </description>
+  </property>
+  <property>
     <name>ozone.client.protocol</name>
     <value>org.apache.hadoop.ozone.client.rpc.RpcClient</value>
     <tag>OZONE, CLIENT, MANAGEMENT</tag>
diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneClientUtils.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneClientUtils.java
index 9e9bb39..b52f393 100644
--- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneClientUtils.java
+++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/OzoneClientUtils.java
@@ -127,10 +127,11 @@ public final class OzoneClientUtils {
     return keyInfo;
   }
 
-  public static RetryPolicy createRetryPolicy(int maxRetryCount) {
+  public static RetryPolicy createRetryPolicy(int maxRetryCount,
+      long retryInterval) {
     // just retry without sleep
     RetryPolicy retryPolicy = RetryPolicies
-        .retryUpToMaximumCountWithFixedSleep(maxRetryCount, 0,
+        .retryUpToMaximumCountWithFixedSleep(maxRetryCount, retryInterval,
             TimeUnit.MILLISECONDS);
     return retryPolicy;
   }
diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java
index b12ef51..8ab7738 100644
--- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java
+++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java
@@ -164,7 +164,8 @@ public class KeyOutputStream extends OutputStream {
       String requestId, ReplicationFactor factor, ReplicationType type,
       long bufferFlushSize, long bufferMaxSize, long size, long watchTimeout,
       ChecksumType checksumType, int bytesPerChecksum,
-      String uploadID, int partNumber, boolean isMultipart, int maxRetryCount) {
+      String uploadID, int partNumber, boolean isMultipart,
+      int maxRetryCount, long retryInterval) {
     this.streamEntries = new ArrayList<>();
     this.currentStreamIndex = 0;
     this.omClient = omClient;
@@ -199,7 +200,8 @@ public class KeyOutputStream extends OutputStream {
     this.bufferPool =
         new BufferPool(chunkSize, (int)streamBufferMaxSize / chunkSize);
     this.excludeList = new ExcludeList();
-    this.retryPolicy = OzoneClientUtils.createRetryPolicy(maxRetryCount);
+    this.retryPolicy = OzoneClientUtils.createRetryPolicy(maxRetryCount,
+        retryInterval);
     this.retryCount = 0;
   }
 
@@ -726,6 +728,7 @@ public class KeyOutputStream extends OutputStream {
     private int multipartNumber;
     private boolean isMultipartKey;
     private int maxRetryCount;
+    private long retryInterval;
 
 
     public Builder setMultipartUploadID(String uploadID) {
@@ -814,12 +817,17 @@ public class KeyOutputStream extends OutputStream {
       return this;
     }
 
+    public Builder setRetryInterval(long retryIntervalInMS) {
+      this.retryInterval = retryIntervalInMS;
+      return this;
+    }
+
     public KeyOutputStream build() throws IOException {
       return new KeyOutputStream(openHandler, xceiverManager,
           omClient, chunkSize, requestID, factor, type, streamBufferFlushSize,
           streamBufferMaxSize, blockSize, watchTimeout, checksumType,
           bytesPerChecksum, multipartUploadID, multipartNumber, isMultipartKey,
-          maxRetryCount);
+          maxRetryCount, retryInterval);
     }
   }
 
diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java
index 445c000..66d9eac 100644
--- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java
+++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java
@@ -85,6 +85,7 @@ import org.apache.hadoop.hdds.scm.protocolPB
 import org.apache.hadoop.hdds.scm.protocolPB
     .StorageContainerLocationProtocolPB;
 import org.apache.hadoop.ozone.security.OzoneTokenIdentifier;
+import org.apache.hadoop.ozone.web.utils.OzoneUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.io.Text;
@@ -128,6 +129,7 @@ public class RpcClient implements ClientProtocol, KeyProviderTokenIssuer {
   private final long watchTimeout;
   private final ClientId clientId = ClientId.randomId();
   private final int maxRetryCount;
+  private final long retryInterval;
   private Text dtService;
 
    /**
@@ -214,6 +216,9 @@ public class RpcClient implements ClientProtocol, KeyProviderTokenIssuer {
     maxRetryCount =
         conf.getInt(OzoneConfigKeys.OZONE_CLIENT_MAX_RETRIES, OzoneConfigKeys.
             OZONE_CLIENT_MAX_RETRIES_DEFAULT);
+    retryInterval = OzoneUtils.getTimeDurationInMS(conf,
+        OzoneConfigKeys.OZONE_CLIENT_RETRY_INTERVAL,
+        OzoneConfigKeys.OZONE_CLIENT_RETRY_INTERVAL_DEFAULT);
     dtService =
         getOMProxyProvider().getProxy().getDelegationTokenService();
     boolean isUnsafeByteOperationsEnabled = conf.getBoolean(
@@ -861,6 +866,7 @@ public class RpcClient implements ClientProtocol, KeyProviderTokenIssuer {
             .setMultipartUploadID(uploadID)
             .setIsMultipartKey(true)
             .setMaxRetryCount(maxRetryCount)
+            .setRetryInterval(retryInterval)
             .build();
     keyOutputStream.addPreallocateBlocks(
         openKey.getKeyInfo().getLatestVersionLocations(),
@@ -1022,7 +1028,9 @@ public class RpcClient implements ClientProtocol, KeyProviderTokenIssuer {
             .setBlockSize(blockSize)
             .setChecksumType(checksumType)
             .setBytesPerChecksum(bytesPerChecksum)
-            .setMaxRetryCount(maxRetryCount).build();
+            .setMaxRetryCount(maxRetryCount)
+            .setRetryInterval(retryInterval)
+            .build();
     keyOutputStream
         .addPreallocateBlocks(openKey.getKeyInfo().getLatestVersionLocations(),
             openKey.getOpenVersion());
diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/web/utils/OzoneUtils.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/web/utils/OzoneUtils.java
index 31188ee..2484091 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/web/utils/OzoneUtils.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/web/utils/OzoneUtils.java
@@ -26,6 +26,7 @@ import java.text.SimpleDateFormat;
 import java.util.Locale;
 import java.util.TimeZone;
 import java.util.UUID;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
@@ -33,6 +34,7 @@ import org.apache.hadoop.hdds.HddsUtils;
 import org.apache.hadoop.ozone.OzoneConsts;
 
 import com.google.common.base.Preconditions;
+import org.apache.ratis.util.TimeDuration;
 
 /**
  * Set of Utility functions used in ozone.
@@ -214,4 +216,24 @@ public final class OzoneUtils {
     }
   }
 
+  /**
+   * Return the TimeDuration configured for the given key. If not configured,
+   * return the default value.
+   */
+  public static TimeDuration getTimeDuration(Configuration conf, String key,
+      TimeDuration defaultValue) {
+    TimeUnit defaultTimeUnit = defaultValue.getUnit();
+    long timeDurationInDefaultUnit = conf.getTimeDuration(key,
+        defaultValue.getDuration(), defaultTimeUnit);
+    return TimeDuration.valueOf(timeDurationInDefaultUnit, defaultTimeUnit);
+  }
+
+  /**
+   * Return the time configured for the given key in milliseconds.
+   */
+  public static long getTimeDurationInMS(Configuration conf, String key,
+      TimeDuration defaultValue) {
+    return getTimeDuration(conf, key, defaultValue)
+        .toLong(TimeUnit.MILLISECONDS);
+  }
 }
diff --git a/hadoop-ozone/objectstore-service/src/main/java/org/apache/hadoop/ozone/web/storage/DistributedStorageHandler.java b/hadoop-ozone/objectstore-service/src/main/java/org/apache/hadoop/ozone/web/storage/DistributedStorageHandler.java
index 20b1971..d0ab5e2 100644
--- a/hadoop-ozone/objectstore-service/src/main/java/org/apache/hadoop/ozone/web/storage/DistributedStorageHandler.java
+++ b/hadoop-ozone/objectstore-service/src/main/java/org/apache/hadoop/ozone/web/storage/DistributedStorageHandler.java
@@ -57,6 +57,7 @@ import org.apache.hadoop.ozone.web.handlers.VolumeArgs;
 import org.apache.hadoop.ozone.web.handlers.UserArgs;
 import org.apache.hadoop.ozone.web.interfaces.StorageHandler;
 import org.apache.hadoop.ozone.web.response.*;
+import org.apache.hadoop.ozone.web.utils.OzoneUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -90,6 +91,7 @@ public final class DistributedStorageHandler implements StorageHandler {
   private final int bytesPerChecksum;
   private final boolean verifyChecksum;
   private final int maxRetryCount;
+  private final long retryInterval;
 
   /**
    * Creates a new DistributedStorageHandler.
@@ -159,6 +161,9 @@ public final class DistributedStorageHandler implements StorageHandler {
     this.maxRetryCount =
         conf.getInt(OzoneConfigKeys.OZONE_CLIENT_MAX_RETRIES, OzoneConfigKeys.
             OZONE_CLIENT_MAX_RETRIES_DEFAULT);
+    this.retryInterval = OzoneUtils.getTimeDurationInMS(conf,
+        OzoneConfigKeys.OZONE_CLIENT_RETRY_INTERVAL,
+        OzoneConfigKeys.OZONE_CLIENT_RETRY_INTERVAL_DEFAULT);
     boolean isUnsafeByteOperationsEnabled = conf.getBoolean(
         OzoneConfigKeys.OZONE_UNSAFEBYTEOPERATIONS_ENABLED,
         OzoneConfigKeys.OZONE_UNSAFEBYTEOPERATIONS_ENABLED_DEFAULT);
@@ -464,6 +469,7 @@ public final class DistributedStorageHandler implements StorageHandler {
             .setChecksumType(checksumType)
             .setBytesPerChecksum(bytesPerChecksum)
             .setMaxRetryCount(maxRetryCount)
+            .setRetryInterval(retryInterval)
             .build();
     keyOutputStream.addPreallocateBlocks(
         openKey.getKeyInfo().getLatestVersionLocations(),


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org