You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by ar...@apache.org on 2021/01/25 21:10:49 UTC
[ozone] branch master updated: HDDS-4430. OM failover timeout is
too short (#1807)
This is an automated email from the ASF dual-hosted git repository.
arp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 0694ea1 HDDS-4430. OM failover timeout is too short (#1807)
0694ea1 is described below
commit 0694ea12aa829a056f5b058cbaa1d7b815cc0048
Author: Hanisha Koneru <ha...@apache.org>
AuthorDate: Mon Jan 25 13:10:29 2021 -0800
HDDS-4430. OM failover timeout is too short (#1807)
---
.../common/src/main/resources/ozone-default.xml | 10 +++++-----
.../java/org/apache/hadoop/ozone/om/OMConfigKeys.java | 9 +--------
.../org/apache/hadoop/ozone/MiniOzoneCluster.java | 1 +
.../org/apache/hadoop/ozone/MiniOzoneClusterImpl.java | 2 ++
.../apache/hadoop/ozone/MiniOzoneHAClusterImpl.java | 19 +++++++++----------
.../hadoop/ozone/TestOzoneConfigurationFields.java | 2 ++
.../ozone/om/TestOzoneManagerConfiguration.java | 7 +++----
.../ozone/om/ratis/OzoneManagerRatisServer.java | 19 +------------------
.../ozone/om/ratis/TestOzoneManagerRatisServer.java | 12 +++++-------
9 files changed, 29 insertions(+), 52 deletions(-)
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index f644eec..47dbf39 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -1620,7 +1620,7 @@
<property>
<name>ozone.om.ratis.minimum.timeout</name>
- <value>1s</value>
+ <value>5s</value>
<tag>OZONE, OM, RATIS, MANAGEMENT</tag>
<description>The minimum timeout duration for OM's Ratis server rpc.
</description>
@@ -1628,10 +1628,10 @@
<property>
<name>ozone.om.leader.election.minimum.timeout.duration</name>
- <value>1s</value>
- <tag>OZONE, OM, RATIS, MANAGEMENT</tag>
- <description>The minimum timeout duration for OM ratis leader election.
- Default is 1s.
+ <value>5s</value>
+ <tag>OZONE, OM, RATIS, MANAGEMENT, DEPRECATED</tag>
+ <description>DEPRECATED. Leader election timeout uses ratis
+ rpc timeout which can be set via ozone.om.ratis.minimum.timeout.
</description>
</property>
diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
index c5b584e..3ad4ab9 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java
@@ -153,15 +153,8 @@ public final class OMConfigKeys {
public static final String OZONE_OM_RATIS_MINIMUM_TIMEOUT_KEY
= "ozone.om.ratis.minimum.timeout";
public static final TimeDuration OZONE_OM_RATIS_MINIMUM_TIMEOUT_DEFAULT
- = TimeDuration.valueOf(1, TimeUnit.SECONDS);
+ = TimeDuration.valueOf(5, TimeUnit.SECONDS);
- // OM Ratis Leader Election configurations
- public static final String
- OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY =
- "ozone.om.leader.election.minimum.timeout.duration";
- public static final TimeDuration
- OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_DEFAULT =
- TimeDuration.valueOf(1, TimeUnit.SECONDS);
public static final String OZONE_OM_RATIS_SERVER_FAILURE_TIMEOUT_DURATION_KEY
= "ozone.om.ratis.server.failure.timeout.duration";
public static final TimeDuration
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java
index d1d5162..87b2679 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneCluster.java
@@ -275,6 +275,7 @@ public interface MiniOzoneCluster {
protected static final int DEFAULT_HB_PROCESSOR_INTERVAL_MS = 100;
protected static final int ACTIVE_OMS_NOT_SET = -1;
protected static final int DEFAULT_PIPELIME_LIMIT = 3;
+ protected static final int DEFAULT_RATIS_RPC_TIMEOUT_SEC = 1;
protected final OzoneConfiguration conf;
protected String path;
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
index c955948..e9876eb 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
@@ -618,6 +618,8 @@ public class MiniOzoneClusterImpl implements MiniOzoneCluster {
conf.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PIPELINE_LIMIT,
pipelineNumLimit >= DEFAULT_PIPELIME_LIMIT ?
pipelineNumLimit : DEFAULT_PIPELIME_LIMIT);
+ conf.setTimeDuration(OMConfigKeys.OZONE_OM_RATIS_MINIMUM_TIMEOUT_KEY,
+ DEFAULT_RATIS_RPC_TIMEOUT_SEC, TimeUnit.SECONDS);
configureTrace();
}
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
index d16618a..c1a7f53 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
@@ -69,7 +69,7 @@ public class MiniOzoneHAClusterImpl extends MiniOzoneClusterImpl {
private int waitForOMToBeReadyTimeout = 120000; // 2 min
private static final Random RANDOM = new Random();
- private static final int RATIS_LEADER_ELECTION_TIMEOUT = 1000; // 1 second
+ private static final int RATIS_RPC_TIMEOUT = 1000; // 1 second
public static final int NODE_FAILURE_TIMEOUT = 2000; // 2 seconds
/**
@@ -308,19 +308,18 @@ public class MiniOzoneHAClusterImpl extends MiniOzoneClusterImpl {
protected void initOMRatisConf() {
conf.setBoolean(OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY, true);
conf.setInt(OMConfigKeys.OZONE_OM_HANDLER_COUNT_KEY, numOfOmHandlers);
+
// If test change the following config values we will respect,
// otherwise we will set lower timeout values.
- long defaultDuration =
- OMConfigKeys.OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_DEFAULT
+ long defaultDuration = OMConfigKeys.OZONE_OM_RATIS_MINIMUM_TIMEOUT_DEFAULT
.getDuration();
- long curLeaderElectionTimeout = conf.getTimeDuration(
- OMConfigKeys.OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY,
+ long curRatisRpcTimeout = conf.getTimeDuration(
+ OMConfigKeys.OZONE_OM_RATIS_MINIMUM_TIMEOUT_KEY,
defaultDuration, TimeUnit.MILLISECONDS);
- conf.setTimeDuration(
- OMConfigKeys.OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY,
- defaultDuration == curLeaderElectionTimeout ?
- RATIS_LEADER_ELECTION_TIMEOUT : curLeaderElectionTimeout,
- TimeUnit.MILLISECONDS);
+ conf.setTimeDuration(OMConfigKeys.OZONE_OM_RATIS_MINIMUM_TIMEOUT_KEY,
+ defaultDuration == curRatisRpcTimeout ?
+ RATIS_RPC_TIMEOUT : curRatisRpcTimeout, TimeUnit.MILLISECONDS);
+
long defaultNodeFailureTimeout =
OMConfigKeys.OZONE_OM_RATIS_SERVER_FAILURE_TIMEOUT_DURATION_DEFAULT.
getDuration();
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java
index da2a63c..edb6f2b 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java
@@ -54,6 +54,8 @@ public class TestOzoneConfigurationFields extends TestConfigurationFieldsBase {
errorIfMissingXmlProps = true;
xmlPropsToSkipCompare.add("hadoop.tags.custom");
xmlPropsToSkipCompare.add("ozone.om.nodes.EXAMPLEOMSERVICEID");
+ xmlPropsToSkipCompare.add("ozone.om.leader.election.minimum.timeout" +
+ ".duration"); // Deprecated config
addPropertiesNotInXml();
}
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerConfiguration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerConfiguration.java
index ef032ca..2798128 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerConfiguration.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerConfiguration.java
@@ -66,7 +66,7 @@ public class TestOzoneManagerConfiguration {
private OzoneManager om;
private OzoneManagerRatisServer omRatisServer;
- private static final long LEADER_ELECTION_TIMEOUT = 500L;
+ private static final long RATIS_RPC_TIMEOUT = 500L;
@Before
public void init() throws IOException {
@@ -79,9 +79,8 @@ public class TestOzoneManagerConfiguration {
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, metaDirPath.toString());
conf.set(ScmConfigKeys.OZONE_SCM_CLIENT_ADDRESS_KEY, "127.0.0.1:0");
conf.setBoolean(OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY, true);
- conf.setTimeDuration(
- OMConfigKeys.OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY,
- LEADER_ELECTION_TIMEOUT, TimeUnit.MILLISECONDS);
+ conf.setTimeDuration(OMConfigKeys.OZONE_OM_RATIS_MINIMUM_TIMEOUT_KEY,
+ RATIS_RPC_TIMEOUT, TimeUnit.MILLISECONDS);
OMStorage omStore = new OMStorage(conf);
omStore.setClusterId("testClusterId");
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
index 78a19cb..f16076e 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
@@ -487,7 +487,7 @@ public final class OzoneManagerRatisServer {
long serverMaxTimeoutDuration =
serverMinTimeout.toLong(TimeUnit.MILLISECONDS) + 200;
final TimeDuration serverMaxTimeout = TimeDuration.valueOf(
- serverMaxTimeoutDuration, serverMinTimeoutUnit);
+ serverMaxTimeoutDuration, TimeUnit.MILLISECONDS);
RaftServerConfigKeys.Rpc.setTimeoutMin(properties,
serverMinTimeout);
RaftServerConfigKeys.Rpc.setTimeoutMax(properties,
@@ -498,23 +498,6 @@ public final class OzoneManagerRatisServer {
// TODO: set max write buffer size
- // Set the ratis leader election timeout
- TimeUnit leaderElectionMinTimeoutUnit =
- OMConfigKeys.OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_DEFAULT
- .getUnit();
- long leaderElectionMinTimeoutduration = conf.getTimeDuration(
- OMConfigKeys.OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY,
- OMConfigKeys.OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_DEFAULT
- .getDuration(), leaderElectionMinTimeoutUnit);
- final TimeDuration leaderElectionMinTimeout = TimeDuration.valueOf(
- leaderElectionMinTimeoutduration, leaderElectionMinTimeoutUnit);
- RaftServerConfigKeys.Rpc.setTimeoutMin(properties,
- leaderElectionMinTimeout);
- long leaderElectionMaxTimeout = leaderElectionMinTimeout.toLong(
- TimeUnit.MILLISECONDS) + 200;
- RaftServerConfigKeys.Rpc.setTimeoutMax(properties,
- TimeDuration.valueOf(leaderElectionMaxTimeout, TimeUnit.MILLISECONDS));
-
TimeUnit nodeFailureTimeoutUnit =
OMConfigKeys.OZONE_OM_RATIS_SERVER_FAILURE_TIMEOUT_DURATION_DEFAULT
.getUnit();
diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerRatisServer.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerRatisServer.java
index 535bbce..c875c9c 100644
--- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerRatisServer.java
+++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerRatisServer.java
@@ -69,7 +69,7 @@ public class TestOzoneManagerRatisServer {
private OzoneManagerRatisServer omRatisServer;
private String omID;
private String clientId = UUID.randomUUID().toString();
- private static final long LEADER_ELECTION_TIMEOUT = 500L;
+ private static final long RATIS_RPC_TIMEOUT = 500L;
private OMMetadataManager omMetadataManager;
private OzoneManager ozoneManager;
private OMNodeDetails omNodeDetails;
@@ -82,9 +82,8 @@ public class TestOzoneManagerRatisServer {
final String path = GenericTestUtils.getTempPath(omID);
Path metaDirPath = Paths.get(path, "om-meta");
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, metaDirPath.toString());
- conf.setTimeDuration(
- OMConfigKeys.OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY,
- LEADER_ELECTION_TIMEOUT, TimeUnit.MILLISECONDS);
+ conf.setTimeDuration(OMConfigKeys.OZONE_OM_RATIS_MINIMUM_TIMEOUT_KEY,
+ RATIS_RPC_TIMEOUT, TimeUnit.MILLISECONDS);
int ratisPort = conf.getInt(
OMConfigKeys.OZONE_OM_RATIS_PORT_KEY,
OMConfigKeys.OZONE_OM_RATIS_PORT_DEFAULT);
@@ -202,9 +201,8 @@ public class TestOzoneManagerRatisServer {
String path = GenericTestUtils.getTempPath(newOmId);
Path metaDirPath = Paths.get(path, "om-meta");
newConf.set(HddsConfigKeys.OZONE_METADATA_DIRS, metaDirPath.toString());
- newConf.setTimeDuration(
- OMConfigKeys.OZONE_OM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY,
- LEADER_ELECTION_TIMEOUT, TimeUnit.MILLISECONDS);
+ newConf.setTimeDuration(OMConfigKeys.OZONE_OM_RATIS_MINIMUM_TIMEOUT_KEY,
+ RATIS_RPC_TIMEOUT, TimeUnit.MILLISECONDS);
int ratisPort = 9873;
InetSocketAddress rpcAddress = new InetSocketAddress(
InetAddress.getLocalHost(), 0);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org