You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by bh...@apache.org on 2021/04/02 02:00:59 UTC
[ozone] branch master updated: HDDS-5058. Make getScmInfo retry for
a duration.
This is an automated email from the ASF dual-hosted git repository.
bharat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new d100652 HDDS-5058. Make getScmInfo retry for a duration.
d100652 is described below
commit d100652ff1a42f97e0046e492ad46def636a9579
Author: Bharat Viswanadham <bv...@cloudera.com>
AuthorDate: Thu Apr 1 15:58:53 2021 +0530
HDDS-5058. Make getScmInfo retry for a duration.
---
.../org/apache/hadoop/hdds/scm/ScmConfigKeys.java | 5 +++++
.../common/src/main/resources/ozone-default.xml | 9 +++++++++
.../hadoop/hdds/scm/proxy/SCMClientConfig.java | 2 +-
.../java/org/apache/hadoop/hdds/utils/HAUtils.java | 21 ++++++++++++++++++++-
.../org/apache/hadoop/ozone/om/OzoneManager.java | 9 ++-------
5 files changed, 37 insertions(+), 9 deletions(-)
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
index d96eb50..8f47756 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
@@ -510,6 +510,11 @@ public final class ScmConfigKeys {
"hdds.scm.ha.security.enable";
public static final boolean OZONE_SCM_HA_SECURITY_SUPPORTED_DEFAULT = false;
+ public static final String OZONE_SCM_INFO_WAIT_DURATION =
+ "ozone.scm.info.wait.duration";
+ public static final long OZONE_SCM_INFO_WAIT_DURATION_DEFAULT =
+ 10 * 60;
+
/**
* Never constructed.
*/
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 98684b8..8a5ebb5 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -2751,4 +2751,13 @@
filesystem semantics.
</description>
</property>
+
+ <property>
+ <name>ozone.scm.info.wait.duration</name>
+ <tag>OZONE, SCM, OM</tag>
+ <value>10m</value>
+ <description> Maximum amount of duration OM/SCM waits to get Scm Info
+ during OzoneManager init/SCM bootstrap.
+ </description>
+ </property>
</configuration>
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java
index 99dc446..65acfae 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java
@@ -68,7 +68,7 @@ public class SCMClientConfig {
tags = {OZONE, SCM, CLIENT},
timeUnit = TimeUnit.MILLISECONDS,
description = "SCM Client timeout on waiting for the next connection " +
- "retry to other SCM IP. The default value is set to 2 minutes. "
+ "retry to other SCM IP. The default value is set to 2 seconds. "
)
private long retryInterval = 2 * 1000;
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
index db129f4..f9f88ef 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol;
import org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdds.scm.proxy.SCMBlockLocationFailoverProxyProvider;
+import org.apache.hadoop.hdds.scm.proxy.SCMClientConfig;
import org.apache.hadoop.hdds.scm.proxy.SCMContainerLocationFailoverProxyProvider;
import org.apache.hadoop.hdds.security.exception.SCMSecurityException;
import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient;
@@ -63,7 +64,10 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
+import java.util.concurrent.TimeUnit;
+import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_INFO_WAIT_DURATION;
+import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_INFO_WAIT_DURATION_DEFAULT;
import static org.apache.hadoop.hdds.server.ServerUtils.getOzoneMetaDirPath;
import static org.apache.hadoop.ozone.OzoneConsts.DB_TRANSIENT_MARKER;
import static org.apache.hadoop.ozone.OzoneConsts.TRANSACTION_INFO_KEY;
@@ -79,8 +83,23 @@ public final class HAUtils {
public static ScmInfo getScmInfo(OzoneConfiguration conf)
throws IOException {
+ OzoneConfiguration configuration = new OzoneConfiguration(conf);
try {
- return getScmBlockClient(conf).getScmInfo();
+ long duration = conf.getTimeDuration(OZONE_SCM_INFO_WAIT_DURATION,
+ OZONE_SCM_INFO_WAIT_DURATION_DEFAULT, TimeUnit.SECONDS);
+ SCMClientConfig scmClientConfig =
+ configuration.getObject(SCMClientConfig.class);
+ int retryCount =
+ (int) (duration / (scmClientConfig.getRetryInterval()/1000));
+
+ // If duration is set to lesser value, fall back to actual default
+ // retry count.
+ if (retryCount > scmClientConfig.getRetryCount()) {
+ scmClientConfig.setRetryCount(retryCount);
+ configuration.setFromObject(scmClientConfig);
+ }
+
+ return getScmBlockClient(configuration).getScmInfo();
} catch (IOException e) {
throw e;
} catch (Exception e) {
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
index f0fbd22..d56499b 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
@@ -403,7 +403,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
// For testing purpose only, not hit scm from om as Hadoop UGI can't login
// two principals in the same JVM.
if (!testSecureOmFlag) {
- ScmInfo scmInfo = getScmInfo(configuration);
+ ScmInfo scmInfo = HAUtils.getScmInfo(configuration);
if (!(scmInfo.getClusterId().equals(omStorage.getClusterID()) && scmInfo
.getScmId().equals(omStorage.getScmId()))) {
logVersionMismatch(conf, scmInfo);
@@ -930,7 +930,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
StorageState state = omStorage.getState();
if (state != StorageState.INITIALIZED) {
try {
- ScmInfo scmInfo = getScmInfo(conf);
+ ScmInfo scmInfo = HAUtils.getScmInfo(conf);
String clusterId = scmInfo.getClusterId();
String scmId = scmInfo.getScmId();
if (clusterId == null || clusterId.isEmpty()) {
@@ -1008,11 +1008,6 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
}
}
- private static ScmInfo getScmInfo(OzoneConfiguration conf)
- throws IOException {
- return HAUtils.getScmInfo(conf);
- }
-
/**
* Builds a message for logging startup information about an RPC server.
*
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org