You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by bh...@apache.org on 2021/04/02 02:00:59 UTC

[ozone] branch master updated: HDDS-5058. Make getScmInfo retry for a duration.

This is an automated email from the ASF dual-hosted git repository.

bharat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new d100652  HDDS-5058. Make getScmInfo retry for a duration.
d100652 is described below

commit d100652ff1a42f97e0046e492ad46def636a9579
Author: Bharat Viswanadham <bv...@cloudera.com>
AuthorDate: Thu Apr 1 15:58:53 2021 +0530

    HDDS-5058. Make getScmInfo retry for a duration.
---
 .../org/apache/hadoop/hdds/scm/ScmConfigKeys.java   |  5 +++++
 .../common/src/main/resources/ozone-default.xml     |  9 +++++++++
 .../hadoop/hdds/scm/proxy/SCMClientConfig.java      |  2 +-
 .../java/org/apache/hadoop/hdds/utils/HAUtils.java  | 21 ++++++++++++++++++++-
 .../org/apache/hadoop/ozone/om/OzoneManager.java    |  9 ++-------
 5 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
index d96eb50..8f47756 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
@@ -510,6 +510,11 @@ public final class ScmConfigKeys {
       "hdds.scm.ha.security.enable";
   public static final boolean OZONE_SCM_HA_SECURITY_SUPPORTED_DEFAULT = false;
 
+  public static final String OZONE_SCM_INFO_WAIT_DURATION =
+      "ozone.scm.info.wait.duration";
+  public static final long OZONE_SCM_INFO_WAIT_DURATION_DEFAULT =
+      10 * 60;
+
   /**
    * Never constructed.
    */
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 98684b8..8a5ebb5 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -2751,4 +2751,13 @@
       filesystem semantics.
     </description>
   </property>
+
+  <property>
+    <name>ozone.scm.info.wait.duration</name>
+    <tag>OZONE, SCM, OM</tag>
+    <value>10m</value>
+    <description> Maximum amount of duration OM/SCM waits to get Scm Info
+      during OzoneManager init/SCM bootstrap.
+    </description>
+  </property>
 </configuration>
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java
index 99dc446..65acfae 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java
@@ -68,7 +68,7 @@ public class SCMClientConfig {
       tags = {OZONE, SCM, CLIENT},
       timeUnit = TimeUnit.MILLISECONDS,
       description = "SCM Client timeout on waiting for the next connection " +
-          "retry to other SCM IP. The default value is set to 2 minutes. "
+          "retry to other SCM IP. The default value is set to 2 seconds. "
   )
   private long retryInterval = 2 * 1000;
 
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
index db129f4..f9f88ef 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol;
 import org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdds.scm.proxy.SCMBlockLocationFailoverProxyProvider;
+import org.apache.hadoop.hdds.scm.proxy.SCMClientConfig;
 import org.apache.hadoop.hdds.scm.proxy.SCMContainerLocationFailoverProxyProvider;
 import org.apache.hadoop.hdds.security.exception.SCMSecurityException;
 import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient;
@@ -63,7 +64,10 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 
+import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_INFO_WAIT_DURATION;
+import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_INFO_WAIT_DURATION_DEFAULT;
 import static org.apache.hadoop.hdds.server.ServerUtils.getOzoneMetaDirPath;
 import static org.apache.hadoop.ozone.OzoneConsts.DB_TRANSIENT_MARKER;
 import static org.apache.hadoop.ozone.OzoneConsts.TRANSACTION_INFO_KEY;
@@ -79,8 +83,23 @@ public final class HAUtils {
 
   public static ScmInfo getScmInfo(OzoneConfiguration conf)
       throws IOException {
+    OzoneConfiguration configuration = new OzoneConfiguration(conf);
     try {
-      return getScmBlockClient(conf).getScmInfo();
+      long duration = conf.getTimeDuration(OZONE_SCM_INFO_WAIT_DURATION,
+          OZONE_SCM_INFO_WAIT_DURATION_DEFAULT, TimeUnit.SECONDS);
+      SCMClientConfig scmClientConfig =
+          configuration.getObject(SCMClientConfig.class);
+      int retryCount =
+          (int) (duration / (scmClientConfig.getRetryInterval()/1000));
+
+      // If duration is set to lesser value, fall back to actual default
+      // retry count.
+      if (retryCount > scmClientConfig.getRetryCount()) {
+        scmClientConfig.setRetryCount(retryCount);
+        configuration.setFromObject(scmClientConfig);
+      }
+
+      return getScmBlockClient(configuration).getScmInfo();
     } catch (IOException e) {
       throw e;
     } catch (Exception e) {
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
index f0fbd22..d56499b 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
@@ -403,7 +403,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
     // For testing purpose only, not hit scm from om as Hadoop UGI can't login
     // two principals in the same JVM.
     if (!testSecureOmFlag) {
-      ScmInfo scmInfo = getScmInfo(configuration);
+      ScmInfo scmInfo = HAUtils.getScmInfo(configuration);
       if (!(scmInfo.getClusterId().equals(omStorage.getClusterID()) && scmInfo
           .getScmId().equals(omStorage.getScmId()))) {
         logVersionMismatch(conf, scmInfo);
@@ -930,7 +930,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
     StorageState state = omStorage.getState();
     if (state != StorageState.INITIALIZED) {
       try {
-        ScmInfo scmInfo = getScmInfo(conf);
+        ScmInfo scmInfo = HAUtils.getScmInfo(conf);
         String clusterId = scmInfo.getClusterId();
         String scmId = scmInfo.getScmId();
         if (clusterId == null || clusterId.isEmpty()) {
@@ -1008,11 +1008,6 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
     }
   }
 
-  private static ScmInfo getScmInfo(OzoneConfiguration conf)
-      throws IOException {
-    return HAUtils.getScmInfo(conf);
-  }
-
   /**
    * Builds a message for logging startup information about an RPC server.
    *

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org