You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ta...@apache.org on 2021/09/07 01:55:28 UTC
[iotdb] branch master updated: [IOTDB-1564] Make hearbeat and
election timeout parameters be configurable (#3797)
This is an automated email from the ASF dual-hosted git repository.
tanxinyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push:
new 1ad4b3d [IOTDB-1564] Make hearbeat and election timeout parameters be configurable (#3797)
1ad4b3d is described below
commit 1ad4b3dcdc3a99a71254b18561e64fdbfa6b79db
Author: BaiJian <er...@hotmail.com>
AuthorDate: Tue Sep 7 09:54:50 2021 +0800
[IOTDB-1564] Make hearbeat and election timeout parameters be configurable (#3797)
* Add new cluster parameters
* Add docs for new parameters
* Fix doc
* Update codes by code review
---
.../resources/conf/iotdb-cluster.properties | 13 +++++-
.../client/sync/SyncDataHeartbeatClient.java | 2 +-
.../client/sync/SyncMetaHeartbeatClient.java | 3 +-
.../apache/iotdb/cluster/config/ClusterConfig.java | 20 ++++++++++
.../iotdb/cluster/config/ClusterConstant.java | 30 +++++---------
.../iotdb/cluster/config/ClusterDescriptor.java | 10 +++++
.../apache/iotdb/cluster/server/RaftServer.java | 32 ++++++++++++---
.../cluster/server/heartbeat/HeartbeatThread.java | 46 +++++++++++++++-------
.../cluster/server/member/MetaGroupMember.java | 2 +-
.../iotdb/cluster/server/member/RaftMember.java | 2 +-
.../server/heartbeat/HeartbeatThreadTest.java | 5 ++-
.../iotdb/cluster/server/member/BaseMember.java | 10 +++--
.../cluster/server/member/MetaGroupMemberTest.java | 6 +--
docs/UserGuide/Cluster/Cluster-Setup.md | 21 +++++++++-
docs/zh/UserGuide/Cluster/Cluster-Setup.md | 22 ++++++++++-
15 files changed, 164 insertions(+), 60 deletions(-)
diff --git a/cluster/src/assembly/resources/conf/iotdb-cluster.properties b/cluster/src/assembly/resources/conf/iotdb-cluster.properties
index 2bdac88..2b01d5b 100644
--- a/cluster/src/assembly/resources/conf/iotdb-cluster.properties
+++ b/cluster/src/assembly/resources/conf/iotdb-cluster.properties
@@ -79,7 +79,9 @@ multi_raft_factor=1
# all node's cluster_name in one cluster are the same
# cluster_name=default
-# connection time out (ms) among raft nodes
+# Thrift socket and connection timeout between raft nodes, in milliseconds.
+# NOTE: the timeout of connection used for sending heartbeats and requesting votes
+# will be adjusted to min(heartbeat_interval_ms, connection_timeout_ms).
# connection_timeout_ms=20000
# write operation timeout threshold (ms), this is only for internal communications,
@@ -90,6 +92,13 @@ multi_raft_factor=1
# not for the whole operation.
# read_operation_timeout_ms=30000
+# the time interval (ms) between two rounds of heartbeat broadcast of one raft group leader.
+# Recommend to set it as 1/10 of election_timeout_ms, but larger than 1 RTT between each two nodes.
+# heartbeat_interval_ms=1000
+
+# The election timeout in follower, or the time waiting for requesting votes in elector, in milliseconds.
+# election_timeout_ms=20000
+
# catch up timeout threshold (ms), this is used for a follower behind the leader too much,
# so the leader will send logs(snapshot) to the follower,
# NOTICE, it may cost minutes of time to send a snapshot,
@@ -182,4 +191,4 @@ multi_raft_factor=1
# If the number of connections created for a node exceeds `max_client_pernode_permember_number`,
# we need to wait so much time for other connections to be released until timeout,
# or a new connection will be created.
-# wait_client_timeout_ms=5000
\ No newline at end of file
+# wait_client_timeout_ms=5000
diff --git a/cluster/src/main/java/org/apache/iotdb/cluster/client/sync/SyncDataHeartbeatClient.java b/cluster/src/main/java/org/apache/iotdb/cluster/client/sync/SyncDataHeartbeatClient.java
index 83603d4..38820fc 100644
--- a/cluster/src/main/java/org/apache/iotdb/cluster/client/sync/SyncDataHeartbeatClient.java
+++ b/cluster/src/main/java/org/apache/iotdb/cluster/client/sync/SyncDataHeartbeatClient.java
@@ -45,7 +45,7 @@ public class SyncDataHeartbeatClient extends SyncDataClient {
TConfigurationConst.defaultTConfiguration,
node.getInternalIp(),
node.getDataPort() + ClusterUtils.DATA_HEARTBEAT_PORT_OFFSET,
- RaftServer.getConnectionTimeoutInMS()))));
+ RaftServer.getHeartbeatClientConnTimeoutMs()))));
this.node = node;
this.pool = pool;
getInputProtocol().getTransport().open();
diff --git a/cluster/src/main/java/org/apache/iotdb/cluster/client/sync/SyncMetaHeartbeatClient.java b/cluster/src/main/java/org/apache/iotdb/cluster/client/sync/SyncMetaHeartbeatClient.java
index 7a06668..5972259 100644
--- a/cluster/src/main/java/org/apache/iotdb/cluster/client/sync/SyncMetaHeartbeatClient.java
+++ b/cluster/src/main/java/org/apache/iotdb/cluster/client/sync/SyncMetaHeartbeatClient.java
@@ -37,7 +37,6 @@ public class SyncMetaHeartbeatClient extends SyncMetaClient {
private SyncMetaHeartbeatClient(TProtocolFactory protocolFactory, Node node, SyncClientPool pool)
throws TTransportException {
- // the difference of the two clients lies in the port
super(
protocolFactory.getProtocol(
RpcTransportFactory.INSTANCE.getTransport(
@@ -45,7 +44,7 @@ public class SyncMetaHeartbeatClient extends SyncMetaClient {
TConfigurationConst.defaultTConfiguration,
node.getInternalIp(),
node.getMetaPort() + ClusterUtils.META_HEARTBEAT_PORT_OFFSET,
- RaftServer.getConnectionTimeoutInMS()))));
+ RaftServer.getHeartbeatClientConnTimeoutMs()))));
this.node = node;
this.pool = pool;
getInputProtocol().getTransport().open();
diff --git a/cluster/src/main/java/org/apache/iotdb/cluster/config/ClusterConfig.java b/cluster/src/main/java/org/apache/iotdb/cluster/config/ClusterConfig.java
index c7d5088..afa202d 100644
--- a/cluster/src/main/java/org/apache/iotdb/cluster/config/ClusterConfig.java
+++ b/cluster/src/main/java/org/apache/iotdb/cluster/config/ClusterConfig.java
@@ -58,6 +58,10 @@ public class ClusterConfig {
private int connectionTimeoutInMS = (int) TimeUnit.SECONDS.toMillis(20);
+ private long heartbeatIntervalMs = TimeUnit.SECONDS.toMillis(1);
+
+ private long electionTimeoutMs = TimeUnit.SECONDS.toMillis(20);
+
private int readOperationTimeoutMS = (int) TimeUnit.SECONDS.toMillis(30);
private int writeOperationTimeoutMS = (int) TimeUnit.SECONDS.toMillis(30);
@@ -512,6 +516,22 @@ public class ClusterConfig {
this.waitClientTimeoutMS = waitClientTimeoutMS;
}
+ public long getHeartbeatIntervalMs() {
+ return heartbeatIntervalMs;
+ }
+
+ public void setHeartbeatIntervalMs(long heartbeatIntervalMs) {
+ this.heartbeatIntervalMs = heartbeatIntervalMs;
+ }
+
+ public long getElectionTimeoutMs() {
+ return electionTimeoutMs;
+ }
+
+ public void setElectionTimeoutMs(long electionTimeoutMs) {
+ this.electionTimeoutMs = electionTimeoutMs;
+ }
+
public int getClusterInfoRpcPort() {
return clusterInfoRpcPort;
}
diff --git a/cluster/src/main/java/org/apache/iotdb/cluster/config/ClusterConstant.java b/cluster/src/main/java/org/apache/iotdb/cluster/config/ClusterConstant.java
index b377b1a..8f049c5 100644
--- a/cluster/src/main/java/org/apache/iotdb/cluster/config/ClusterConstant.java
+++ b/cluster/src/main/java/org/apache/iotdb/cluster/config/ClusterConstant.java
@@ -24,11 +24,12 @@ import org.apache.iotdb.db.utils.TestOnly;
public class ClusterConstant {
/**
- * We only change the two values in tests to reduce test time, so they are essentially constant.
+ * We only change the value in tests to reduce test time, so they are essentially constant. A
+ * failed election will restart in [0, max(heartbeatInterval, 50ms)). If this range is too small,
+ * a stale node may frequently issue elections and thus makes the leader step down.
*/
- private static long electionLeastTimeOutMs = 2 * 1000L;
-
- private static long electionRandomTimeOutMs = 3 * 1000L;
+ private static long electionMaxWaitMs =
+ Math.max(ClusterDescriptor.getInstance().getConfig().getHeartbeatIntervalMs(), 50L);
public static final int SLOT_NUM = 10000;
public static final int HASH_SALT = 2333;
@@ -46,25 +47,12 @@ public class ClusterConstant {
// constant class
}
- /**
- * a failed election will restart in 2s~5s, this should be at least as long as a heartbeat
- * interval, or a stale node may frequently issue elections and thus makes the leader step down
- */
- public static long getElectionLeastTimeOutMs() {
- return electionLeastTimeOutMs;
- }
-
- public static long getElectionRandomTimeOutMs() {
- return electionRandomTimeOutMs;
- }
-
- @TestOnly
- public static void setElectionLeastTimeOutMs(long electionLeastTimeOutMs) {
- ClusterConstant.electionLeastTimeOutMs = electionLeastTimeOutMs;
+ public static long getElectionMaxWaitMs() {
+ return electionMaxWaitMs;
}
@TestOnly
- public static void setElectionRandomTimeOutMs(long electionRandomTimeOutMs) {
- ClusterConstant.electionRandomTimeOutMs = electionRandomTimeOutMs;
+ public static void setElectionMaxWaitMs(long electionMaxWaitMs) {
+ ClusterConstant.electionMaxWaitMs = electionMaxWaitMs;
}
}
diff --git a/cluster/src/main/java/org/apache/iotdb/cluster/config/ClusterDescriptor.java b/cluster/src/main/java/org/apache/iotdb/cluster/config/ClusterDescriptor.java
index f734b7e..f70f08f 100644
--- a/cluster/src/main/java/org/apache/iotdb/cluster/config/ClusterDescriptor.java
+++ b/cluster/src/main/java/org/apache/iotdb/cluster/config/ClusterDescriptor.java
@@ -179,6 +179,16 @@ public class ClusterDescriptor {
properties.getProperty(
"connection_timeout_ms", String.valueOf(config.getConnectionTimeoutInMS()))));
+ config.setHeartbeatIntervalMs(
+ Long.parseLong(
+ properties.getProperty(
+ "heartbeat_interval_ms", String.valueOf(config.getHeartbeatIntervalMs()))));
+
+ config.setElectionTimeoutMs(
+ Long.parseLong(
+ properties.getProperty(
+ "election_timeout_ms", String.valueOf(config.getElectionTimeoutMs()))));
+
config.setReadOperationTimeoutMS(
Integer.parseInt(
properties.getProperty(
diff --git a/cluster/src/main/java/org/apache/iotdb/cluster/server/RaftServer.java b/cluster/src/main/java/org/apache/iotdb/cluster/server/RaftServer.java
index 09956d2..91358d4 100644
--- a/cluster/src/main/java/org/apache/iotdb/cluster/server/RaftServer.java
+++ b/cluster/src/main/java/org/apache/iotdb/cluster/server/RaftServer.java
@@ -57,6 +57,13 @@ import java.util.concurrent.atomic.AtomicLong;
public abstract class RaftServer implements RaftService.AsyncIface, RaftService.Iface {
private static final Logger logger = LoggerFactory.getLogger(RaftServer.class);
+
+ // Heartbeat client connection timeout should not be larger than heartbeat interval, otherwise
+ // the thread pool of sending heartbeats or requesting votes may be used up by waiting for
+ // establishing connection with some slow or dead nodes.
+ private static final int heartbeatClientConnTimeoutMs =
+ Math.min((int) RaftServer.getHeartbeatIntervalMs(), RaftServer.getConnectionTimeoutInMS());
+
private static int connectionTimeoutInMS =
ClusterDescriptor.getInstance().getConfig().getConnectionTimeoutInMS();
private static int readOperationTimeoutMS =
@@ -64,7 +71,10 @@ public abstract class RaftServer implements RaftService.AsyncIface, RaftService.
private static int writeOperationTimeoutMS =
ClusterDescriptor.getInstance().getConfig().getWriteOperationTimeoutMS();
private static int syncLeaderMaxWaitMs = 20 * 1000;
- private static long heartBeatIntervalMs = 1000L;
+ private static long heartbeatIntervalMs =
+ ClusterDescriptor.getInstance().getConfig().getHeartbeatIntervalMs();
+ private static long electionTimeoutMs =
+ ClusterDescriptor.getInstance().getConfig().getElectionTimeoutMs();
ClusterConfig config = ClusterDescriptor.getInstance().getConfig();
// the socket poolServer will listen to
@@ -120,12 +130,24 @@ public abstract class RaftServer implements RaftService.AsyncIface, RaftService.
RaftServer.syncLeaderMaxWaitMs = syncLeaderMaxWaitMs;
}
- public static long getHeartBeatIntervalMs() {
- return heartBeatIntervalMs;
+ public static long getHeartbeatIntervalMs() {
+ return heartbeatIntervalMs;
+ }
+
+ public static void setHeartbeatIntervalMs(long heartbeatIntervalMs) {
+ RaftServer.heartbeatIntervalMs = heartbeatIntervalMs;
+ }
+
+ public static long getElectionTimeoutMs() {
+ return electionTimeoutMs;
+ }
+
+ public static void setElectionTimeoutMs(long electionTimeoutMs) {
+ RaftServer.electionTimeoutMs = electionTimeoutMs;
}
- public static void setHeartBeatIntervalMs(long heartBeatIntervalMs) {
- RaftServer.heartBeatIntervalMs = heartBeatIntervalMs;
+ public static int getHeartbeatClientConnTimeoutMs() {
+ return heartbeatClientConnTimeoutMs;
}
/**
diff --git a/cluster/src/main/java/org/apache/iotdb/cluster/server/heartbeat/HeartbeatThread.java b/cluster/src/main/java/org/apache/iotdb/cluster/server/heartbeat/HeartbeatThread.java
index 20ac505..bc5e8d0 100644
--- a/cluster/src/main/java/org/apache/iotdb/cluster/server/heartbeat/HeartbeatThread.java
+++ b/cluster/src/main/java/org/apache/iotdb/cluster/server/heartbeat/HeartbeatThread.java
@@ -70,9 +70,7 @@ public class HeartbeatThread implements Runnable {
public void run() {
logger.info("{}: Heartbeat thread starts...", memberName);
// sleep random time to reduce first election conflicts
- long electionWait =
- ClusterConstant.getElectionLeastTimeOutMs()
- + Math.abs(random.nextLong() % ClusterConstant.getElectionRandomTimeOutMs());
+ long electionWait = getElectionRandomWaitMs();
try {
logger.info("{}: Sleep {}ms before first election", memberName, electionWait);
Thread.sleep(electionWait);
@@ -86,15 +84,17 @@ public class HeartbeatThread implements Runnable {
// send heartbeats to the followers
sendHeartbeats();
synchronized (localMember.getHeartBeatWaitObject()) {
- localMember.getHeartBeatWaitObject().wait(RaftServer.getHeartBeatIntervalMs());
+ localMember.getHeartBeatWaitObject().wait(RaftServer.getHeartbeatIntervalMs());
}
hasHadLeader = true;
break;
case FOLLOWER:
// check if heartbeat times out
- long heartBeatInterval =
+ long heartbeatInterval =
System.currentTimeMillis() - localMember.getLastHeartbeatReceivedTime();
- if (heartBeatInterval >= RaftServer.getConnectionTimeoutInMS()) {
+ long randomElectionTimeout =
+ RaftServer.getElectionTimeoutMs() + getElectionRandomWaitMs();
+ if (heartbeatInterval >= randomElectionTimeout) {
// the leader is considered dead, an election will be started in the next loop
logger.info("{}: The leader {} timed out", memberName, localMember.getLeader());
localMember.setCharacter(NodeCharacter.ELECTOR);
@@ -105,7 +105,12 @@ public class HeartbeatThread implements Runnable {
memberName,
localMember.getLeader());
synchronized (localMember.getHeartBeatWaitObject()) {
- localMember.getHeartBeatWaitObject().wait(RaftServer.getConnectionTimeoutInMS());
+ // we sleep to next possible heartbeat timeout point
+ long leastWaitTime =
+ localMember.getLastHeartbeatReceivedTime()
+ + randomElectionTimeout
+ - System.currentTimeMillis();
+ localMember.getHeartBeatWaitObject().wait(leastWaitTime);
}
}
hasHadLeader = true;
@@ -230,10 +235,15 @@ public class HeartbeatThread implements Runnable {
heartbeatHandler.onComplete(heartBeatResponse);
} catch (TTransportException e) {
logger.warn(
- "{}: Cannot send heart beat to node {} due to network", memberName, node, e);
+ memberName
+ + ": Cannot send heartbeat to node "
+ + node.toString()
+ + " due to network",
+ e);
client.getInputProtocol().getTransport().close();
} catch (Exception e) {
- logger.warn("{}: Cannot send heart beat to node {}", memberName, node, e);
+ logger.warn(
+ memberName + ": Cannot send heart beat to node " + node.toString(), e);
} finally {
ClientUtils.putBackSyncHeartbeatClient(client);
}
@@ -259,9 +269,7 @@ public class HeartbeatThread implements Runnable {
startElection();
if (localMember.getCharacter() == NodeCharacter.ELECTOR) {
// sleep random time to reduce election conflicts
- long electionWait =
- ClusterConstant.getElectionLeastTimeOutMs()
- + Math.abs(random.nextLong() % ClusterConstant.getElectionRandomTimeOutMs());
+ long electionWait = getElectionRandomWaitMs();
logger.info("{}: Sleep {}ms until next election", memberName, electionWait);
Thread.sleep(electionWait);
}
@@ -321,8 +329,8 @@ public class HeartbeatThread implements Runnable {
logger.info(
"{}: Wait for {}ms until election time out",
memberName,
- RaftServer.getConnectionTimeoutInMS());
- localMember.getTerm().wait(RaftServer.getConnectionTimeoutInMS());
+ RaftServer.getElectionTimeoutMs());
+ localMember.getTerm().wait(RaftServer.getElectionTimeoutMs());
} catch (InterruptedException e) {
logger.info(
"{}: Unexpected interruption when waiting the result of election {}",
@@ -413,7 +421,11 @@ public class HeartbeatThread implements Runnable {
} catch (TException e) {
client.getInputProtocol().getTransport().close();
logger.warn(
- "{}: Cannot request a vote from {} due to network", memberName, node, e);
+ memberName
+ + ": Cannot request a vote from "
+ + node.toString()
+ + " due to network",
+ e);
handler.onError(e);
} catch (Exception e) {
handler.onError(e);
@@ -423,4 +435,8 @@ public class HeartbeatThread implements Runnable {
}
});
}
+
+ private long getElectionRandomWaitMs() {
+ return Math.abs(random.nextLong() % ClusterConstant.getElectionMaxWaitMs());
+ }
}
diff --git a/cluster/src/main/java/org/apache/iotdb/cluster/server/member/MetaGroupMember.java b/cluster/src/main/java/org/apache/iotdb/cluster/server/member/MetaGroupMember.java
index f917916..3da9dd8 100644
--- a/cluster/src/main/java/org/apache/iotdb/cluster/server/member/MetaGroupMember.java
+++ b/cluster/src/main/java/org/apache/iotdb/cluster/server/member/MetaGroupMember.java
@@ -1807,7 +1807,7 @@ public class MetaGroupMember extends RaftMember {
new Thread(
() -> {
try {
- Thread.sleep(RaftServer.getHeartBeatIntervalMs());
+ Thread.sleep(RaftServer.getHeartbeatIntervalMs());
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
// ignore
diff --git a/cluster/src/main/java/org/apache/iotdb/cluster/server/member/RaftMember.java b/cluster/src/main/java/org/apache/iotdb/cluster/server/member/RaftMember.java
index c2c8672..774e269 100644
--- a/cluster/src/main/java/org/apache/iotdb/cluster/server/member/RaftMember.java
+++ b/cluster/src/main/java/org/apache/iotdb/cluster/server/member/RaftMember.java
@@ -981,7 +981,7 @@ public abstract class RaftMember {
// the node may have some inconsistent logs with the leader
waitedTime = System.currentTimeMillis() - startTime;
synchronized (syncLock) {
- syncLock.wait(RaftServer.getHeartBeatIntervalMs());
+ syncLock.wait(RaftServer.getHeartbeatIntervalMs());
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
diff --git a/cluster/src/test/java/org/apache/iotdb/cluster/server/heartbeat/HeartbeatThreadTest.java b/cluster/src/test/java/org/apache/iotdb/cluster/server/heartbeat/HeartbeatThreadTest.java
index dc3b14d..23c78b6 100644
--- a/cluster/src/test/java/org/apache/iotdb/cluster/server/heartbeat/HeartbeatThreadTest.java
+++ b/cluster/src/test/java/org/apache/iotdb/cluster/server/heartbeat/HeartbeatThreadTest.java
@@ -148,8 +148,9 @@ public class HeartbeatThreadTest {
@Before
public void setUp() throws Exception {
- ClusterConstant.setElectionLeastTimeOutMs(20);
- ClusterConstant.setElectionRandomTimeOutMs(30);
+ ClusterConstant.setElectionMaxWaitMs(50L);
+ RaftServer.setHeartbeatIntervalMs(100L);
+ RaftServer.setElectionTimeoutMs(1000L);
prevUseAsyncServer = ClusterDescriptor.getInstance().getConfig().isUseAsyncServer();
ClusterDescriptor.getInstance().getConfig().setUseAsyncServer(true);
logManager = new TestLogManager(1);
diff --git a/cluster/src/test/java/org/apache/iotdb/cluster/server/member/BaseMember.java b/cluster/src/test/java/org/apache/iotdb/cluster/server/member/BaseMember.java
index 7d26bea..d8cae9d 100644
--- a/cluster/src/test/java/org/apache/iotdb/cluster/server/member/BaseMember.java
+++ b/cluster/src/test/java/org/apache/iotdb/cluster/server/member/BaseMember.java
@@ -95,6 +95,7 @@ public class BaseMember {
private int syncLeaderMaxWait;
private long heartBeatInterval;
+ private long electionTimeout;
@Before
public void setUp() throws Exception, QueryProcessException {
@@ -111,10 +112,12 @@ public class BaseMember {
RaftMember.setWaitLeaderTimeMs(10);
syncLeaderMaxWait = RaftServer.getSyncLeaderMaxWaitMs();
- heartBeatInterval = RaftServer.getHeartBeatIntervalMs();
+ heartBeatInterval = RaftServer.getHeartbeatIntervalMs();
+ electionTimeout = RaftServer.getElectionTimeoutMs();
RaftServer.setSyncLeaderMaxWaitMs(100);
- RaftServer.setHeartBeatIntervalMs(100);
+ RaftServer.setHeartbeatIntervalMs(100);
+ RaftServer.setElectionTimeoutMs(1000);
allNodes = new PartitionGroup();
for (int i = 0; i < 100; i += 10) {
@@ -192,7 +195,8 @@ public class BaseMember {
IoTDBDescriptor.getInstance().getConfig().setEnableWal(prevEnableWAL);
RaftServer.setSyncLeaderMaxWaitMs(syncLeaderMaxWait);
- RaftServer.setHeartBeatIntervalMs(heartBeatInterval);
+ RaftServer.setHeartbeatIntervalMs(heartBeatInterval);
+ RaftServer.setElectionTimeoutMs(electionTimeout);
}
DataGroupMember getDataGroupMember(Node node) {
diff --git a/cluster/src/test/java/org/apache/iotdb/cluster/server/member/MetaGroupMemberTest.java b/cluster/src/test/java/org/apache/iotdb/cluster/server/member/MetaGroupMemberTest.java
index a857599..56f0bd8 100644
--- a/cluster/src/test/java/org/apache/iotdb/cluster/server/member/MetaGroupMemberTest.java
+++ b/cluster/src/test/java/org/apache/iotdb/cluster/server/member/MetaGroupMemberTest.java
@@ -696,8 +696,8 @@ public class MetaGroupMemberTest extends BaseMember {
@Test
public void testJoinClusterFailed() throws QueryProcessException {
System.out.println("Start testJoinClusterFailed()");
- long prevInterval = RaftServer.getHeartBeatIntervalMs();
- RaftServer.setHeartBeatIntervalMs(10);
+ long prevInterval = RaftServer.getHeartbeatIntervalMs();
+ RaftServer.setHeartbeatIntervalMs(10);
ClusterDescriptor.getInstance().getConfig().setJoinClusterTimeOutMs(100);
dummyResponse.set(Response.RESPONSE_NO_CONNECTION);
MetaGroupMember newMember = getMetaGroupMember(TestUtils.getNode(10));
@@ -708,7 +708,7 @@ public class MetaGroupMemberTest extends BaseMember {
assertTrue(e instanceof StartUpCheckFailureException);
} finally {
newMember.closeLogManager();
- RaftServer.setHeartBeatIntervalMs(prevInterval);
+ RaftServer.setHeartbeatIntervalMs(prevInterval);
}
}
diff --git a/docs/UserGuide/Cluster/Cluster-Setup.md b/docs/UserGuide/Cluster/Cluster-Setup.md
index 134761a..8830f65 100644
--- a/docs/UserGuide/Cluster/Cluster-Setup.md
+++ b/docs/UserGuide/Cluster/Cluster-Setup.md
@@ -298,11 +298,29 @@ To stop the services of all the nodes on a single machine, you need to execute t
| Name | connection\_timeout\_ms |
| ----------- | ------------------------------------------------------------ |
-| Description | Heartbeat timeout time period between nodes in the same raft group, in milliseconds |
+| Description | Thrift socket and connection timeout between raft nodes, in milliseconds. **Note that the timeout of the connection used for sending heartbeats and requesting votes will be adjust to min(heartbeat\_interval\_ms, connection\_timeout\_ms).** |
| Type | Int32 |
| Default | 20000 |
| Effective | After restart system |
+- heartbeat\_interval\_ms
+
+| Name | heartbeat\_interval\_ms |
+| ----------- | ------------------------------------------------------------ |
+| Description | The time period between heartbeat broadcasts in leader, in milliseconds |
+| Type | Int64 |
+| Default | 1000 |
+| Effective | After restart system |
+
+- election\_timeout\_ms
+
+| Name | election\_timeout\_ms |
+| ----------- | ------------------------------------------------------------ |
+| Description | The election timeout in follower, or the time waiting for request votes in elector, in milliseconds |
+| Type | Int64 |
+| Default | 20000 |
+| Effective | After restart system |
+
- read\_operation\_timeout\_ms
| Name | read\_operation\_timeout\_ms |
@@ -374,4 +392,3 @@ To stop the services of all the nodes on a single machine, you need to execute t
| Type | BOOLEAN |
| Default | true |
| Effective | After restart system |
-
diff --git a/docs/zh/UserGuide/Cluster/Cluster-Setup.md b/docs/zh/UserGuide/Cluster/Cluster-Setup.md
index cc552a6..622cc2e 100644
--- a/docs/zh/UserGuide/Cluster/Cluster-Setup.md
+++ b/docs/zh/UserGuide/Cluster/Cluster-Setup.md
@@ -295,10 +295,28 @@ iotdb-engines.properties 配置文件中的部分内容会不再生效:
| 名字 | connection_timeout_ms |
| ------------ | -------------------------------------------------- |
-| 描述 | 同一个 raft 组各个节点之间的心跳超时时间,单位毫秒 |
+| 描述 | raft 节点间的 thrift 连接超时和 socket 超时时间,单位毫秒. **对于发送心跳和投票请求的 thrift 连接的超时时间会被自动调整为 connection_timeout_ms 和 heartbeat_interval_ms 的最小值.** |
| 类型 | Int32 |
| 默认值 | 20000 |
-| 改后生效方式 | 重启服务生效 |
+| 改后生效方式 | 重启服务生效
+
+- heartbeat\_interval\_ms
+
+| 名字 | heartbeat\_interval\_ms |
+| ------------ | ------------------------------------------------------- |
+| 描述 | 领导者发送心跳广播的间隔时间,单位毫秒 |
+| 类型 | Int64 |
+| 默认值 | 1000 |
+| 改后生成方式 | 重启服务生效 |
+
+- election\_timeout\_ms
+
+| 名字 | election\_timeout\_ms |
+| ------------ | ------------------------------------------------------------ |
+| 描述 | 跟随者的选举超时时间, 以及选举者等待投票的超时时间, 单位毫秒 |
+| 类型 | Int64 |
+| 默认值 | 20000 |
+| 改后生成方式 | 重启服务生效 |
- read_operation_timeout_ms