You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ca...@apache.org on 2022/10/11 08:22:49 UTC
[iotdb] branch beyyes/remove_node_stability updated: add REMOVE_DATANODE_PROCESS log for remove datanode process
This is an automated email from the ASF dual-hosted git repository.
caogaofei pushed a commit to branch beyyes/remove_node_stability
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/beyyes/remove_node_stability by this push:
new 41716e0c89 add REMOVE_DATANODE_PROCESS log for remove datanode process
41716e0c89 is described below
commit 41716e0c894228b3944d9ac7990169493f69564f
Author: Beyyes <cg...@foxmail.com>
AuthorDate: Tue Oct 11 16:22:31 2022 +0800
add REMOVE_DATANODE_PROCESS log for remove datanode process
---
.../iotdb/confignode/conf/ConfigNodeConstant.java | 2 +
.../confignode/conf/ConfigNodeRemoveCheck.java | 1 +
.../iotdb/confignode/persistence/NodeInfo.java | 8 +-
.../procedure/env/DataNodeRemoveHandler.java | 131 ++++++++++++---------
.../impl/node/RemoveDataNodeProcedure.java | 7 +-
.../apache/iotdb/commons/utils/NodeUrlUtils.java | 6 +-
.../apache/iotdb/db/client/ConfigNodeClient.java | 2 +-
.../db/service/DataNodeServerCommandLine.java | 2 +-
8 files changed, 93 insertions(+), 66 deletions(-)
diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeConstant.java b/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeConstant.java
index 4f58c58331..70c2a1c620 100644
--- a/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeConstant.java
+++ b/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeConstant.java
@@ -45,6 +45,8 @@ public class ConfigNodeConstant {
public static final String METRIC_STATUS_ONLINE = "Online";
public static final String METRIC_STATUS_UNKNOWN = "Unknown";
+ public static final String REMOVE_DATANODE_PROCESS = "[REMOVE_DATANODE_PROCESS]";
+
private ConfigNodeConstant() {
// empty constructor
}
diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeRemoveCheck.java b/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeRemoveCheck.java
index 6b9031c30a..62be003b2c 100644
--- a/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeRemoveCheck.java
+++ b/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeRemoveCheck.java
@@ -40,6 +40,7 @@ import java.util.Properties;
import static org.apache.commons.lang3.StringUtils.isNumeric;
public class ConfigNodeRemoveCheck {
+
private static final Logger LOGGER = LoggerFactory.getLogger(ConfigNodeStartupCheck.class);
private static final ConfigNodeConfig CONF = ConfigNodeDescriptor.getInstance().getConf();
diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/persistence/NodeInfo.java b/confignode/src/main/java/org/apache/iotdb/confignode/persistence/NodeInfo.java
index 30c108a06f..fd64713896 100644
--- a/confignode/src/main/java/org/apache/iotdb/confignode/persistence/NodeInfo.java
+++ b/confignode/src/main/java/org/apache/iotdb/confignode/persistence/NodeInfo.java
@@ -166,7 +166,9 @@ public class NodeInfo implements SnapshotProcessor {
* @return TSStatus
*/
public TSStatus removeDataNode(RemoveDataNodePlan req) {
- LOGGER.info("there are {} data node in cluster before remove some", registeredDataNodes.size());
+ LOGGER.info(
+ "there are {} data node in cluster before executed remove-datanode.sh",
+ registeredDataNodes.size());
try {
dataNodeInfoReadWriteLock.writeLock().lock();
req.getDataNodeLocations()
@@ -178,7 +180,9 @@ public class NodeInfo implements SnapshotProcessor {
} finally {
dataNodeInfoReadWriteLock.writeLock().unlock();
}
- LOGGER.info("there are {} data node in cluster after remove some", registeredDataNodes.size());
+ LOGGER.info(
+ "there are {} data node in cluster after executed remove-datanode.sh",
+ registeredDataNodes.size());
return new TSStatus(TSStatusCode.SUCCESS_STATUS.getStatusCode());
}
diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java b/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
index 73e2b65c48..f816814bb9 100644
--- a/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
+++ b/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
@@ -53,6 +53,8 @@ import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
+import static org.apache.iotdb.confignode.conf.ConfigNodeConstant.REMOVE_DATANODE_PROCESS;
+
public class DataNodeRemoveHandler {
private static final Logger LOGGER = LoggerFactory.getLogger(DataNodeRemoveHandler.class);
@@ -109,7 +111,8 @@ public class DataNodeRemoveHandler {
DataNodeRequestType.DISABLE_DATA_NODE);
if (!isSucceed(status)) {
LOGGER.error(
- "broadcastDisableDataNode meets error, disabledDataNode: {}, error: {}",
+ "{}, BroadcastDisableDataNode meets error, disabledDataNode: {}, error: {}",
+ REMOVE_DATANODE_PROCESS,
getIdWithRpcEndpoint(disabledDataNode),
status);
return;
@@ -145,6 +148,60 @@ public class DataNodeRemoveHandler {
return newNode.get();
}
+ /**
+ * Create a new RegionReplica and build the ConsensusGroup on the destined DataNode
+ *
+ * <p>createNewRegionPeer should be invoked on a DataNode that doesn't contain any peer of the
+ * specific ConsensusGroup, in order to avoid there exists one DataNode who has more than one
+ * RegionReplica.
+ *
+ * @param regionId The given ConsensusGroup
+ * @param destDataNode The destined DataNode where the new peer will be created
+ * @return status
+ */
+ public TSStatus createNewRegionPeer(TConsensusGroupId regionId, TDataNodeLocation destDataNode) {
+ TSStatus status;
+ List<TDataNodeLocation> regionReplicaNodes = findRegionReplicaNodes(regionId);
+ if (regionReplicaNodes.isEmpty()) {
+ LOGGER.warn(
+ "{}, Not find region replica nodes in createPeer, regionId: {}",
+ REMOVE_DATANODE_PROCESS,
+ regionId);
+ status = new TSStatus(TSStatusCode.MIGRATE_REGION_ERROR.getStatusCode());
+ status.setMessage("Not find region replica nodes in createPeer, regionId: " + regionId);
+ return status;
+ }
+
+ List<TDataNodeLocation> currentPeerNodes = new ArrayList<>(regionReplicaNodes);
+ currentPeerNodes.add(destDataNode);
+ String storageGroup = configManager.getPartitionManager().getRegionStorageGroup(regionId);
+ TCreatePeerReq req = new TCreatePeerReq(regionId, currentPeerNodes, storageGroup);
+ // TODO replace with real ttl
+ req.setTtl(Long.MAX_VALUE);
+
+ status =
+ SyncDataNodeClientPool.getInstance()
+ .sendSyncRequestToDataNodeWithRetry(
+ destDataNode.getInternalEndPoint(),
+ req,
+ DataNodeRequestType.CREATE_NEW_REGION_PEER);
+
+ LOGGER.info(
+ "{}, Send action createNewRegionPeer finished, regionId: {}, destDataNode: {}",
+ REMOVE_DATANODE_PROCESS,
+ regionId,
+ destDataNode);
+ if (isFailed(status)) {
+ LOGGER.error(
+ "{}, Send action createNewRegionPeer error, regionId: {}, destDataNode: {}, result: {}",
+ REMOVE_DATANODE_PROCESS,
+ regionId,
+ destDataNode,
+ status);
+ }
+ return status;
+ }
+
/**
* Order the specific ConsensusGroup to add peer for the new RegionReplica.
*
@@ -165,13 +222,14 @@ public class DataNodeRemoveHandler {
filterDataNodeWithOtherRegionReplica(regionId, destDataNode);
if (!selectedDataNode.isPresent()) {
LOGGER.warn(
- "There are no other DataNodes could be selected to perform the add peer process, "
+ "{}, There are no other DataNodes could be selected to perform the add peer process, "
+ "please check RegionGroup: {} by SQL: show regions",
+ REMOVE_DATANODE_PROCESS,
regionId);
status = new TSStatus(TSStatusCode.MIGRATE_REGION_ERROR.getStatusCode());
status.setMessage(
"There are no other DataNodes could be selected to perform the add peer process, "
- + "please check by SQL: show regions");
+ + "please check by show regions command");
return status;
}
@@ -185,9 +243,11 @@ public class DataNodeRemoveHandler {
maintainPeerReq,
DataNodeRequestType.ADD_REGION_PEER);
LOGGER.info(
- "Send action addRegionPeer, wait it finished, regionId: {}, dataNode: {}",
+ "{}, Send action addRegionPeer finished, regionId: {}, rpcDataNode: {}, destDataNode: {}",
+ REMOVE_DATANODE_PROCESS,
regionId,
- getIdWithRpcEndpoint(selectedDataNode.get()));
+ getIdWithRpcEndpoint(selectedDataNode.get()),
+ destDataNode);
return status;
}
@@ -226,7 +286,8 @@ public class DataNodeRemoveHandler {
maintainPeerReq,
DataNodeRequestType.REMOVE_REGION_PEER);
LOGGER.info(
- "Send action removeRegionPeer, wait it finished, regionId: {}, dataNode: {}",
+ "{}, Send action removeRegionPeer finished, regionId: {}, dataNode: {}",
+ REMOVE_DATANODE_PROCESS,
regionId,
rpcClientDataNode.getInternalEndPoint());
return status;
@@ -252,7 +313,7 @@ public class DataNodeRemoveHandler {
String errorMessage =
"deleteOldRegionPeer is not supported for schemaRegion when SchemaReplicationFactor equals 1, "
+ "you are supposed to delete the region data of datanode manually";
- LOGGER.info(errorMessage);
+ LOGGER.info("{}, {}", REMOVE_DATANODE_PROCESS, errorMessage);
TSStatus status = new TSStatus(TSStatusCode.MIGRATE_REGION_ERROR.getStatusCode());
status.setMessage(errorMessage);
return status;
@@ -266,7 +327,7 @@ public class DataNodeRemoveHandler {
String errorMessage =
"deleteOldRegionPeer is not supported for dataRegion when DataReplicationFactor equals 1, "
+ "you are supposed to delete the region data of datanode manually";
- LOGGER.info(errorMessage);
+ LOGGER.info("{}, {}", REMOVE_DATANODE_PROCESS, errorMessage);
TSStatus status = new TSStatus(TSStatusCode.MIGRATE_REGION_ERROR.getStatusCode());
status.setMessage(errorMessage);
return status;
@@ -281,7 +342,8 @@ public class DataNodeRemoveHandler {
maintainPeerReq,
DataNodeRequestType.DELETE_OLD_REGION_PEER);
LOGGER.info(
- "Send action deleteOldRegionPeer to regionId {} on dataNodeId {}, wait it finished",
+ "{}, Send action deleteOldRegionPeer finished, regionId: {}, dataNodeId: {}",
+ REMOVE_DATANODE_PROCESS,
regionId,
originalDataNode.getInternalEndPoint());
return status;
@@ -299,7 +361,7 @@ public class DataNodeRemoveHandler {
TDataNodeLocation originalDataNode,
TDataNodeLocation destDataNode) {
LOGGER.info(
- "start to update region {} location from {} to {} when it migrate succeed",
+ "Start to update region {} location from {} to {} when it migrate succeed",
regionId,
originalDataNode.getInternalEndPoint().getIp(),
destDataNode.getInternalEndPoint().getIp());
@@ -307,7 +369,7 @@ public class DataNodeRemoveHandler {
new UpdateRegionLocationPlan(regionId, originalDataNode, destDataNode);
TSStatus status = configManager.getPartitionManager().updateRegionLocation(req);
LOGGER.info(
- "update region {} location finished, result:{}, old:{}, new:{}",
+ "Update region {} location finished, result:{}, old:{}, new:{}",
regionId,
status,
originalDataNode.getInternalEndPoint().getIp(),
@@ -343,53 +405,6 @@ public class DataNodeRemoveHandler {
.findAny();
}
- /**
- * Create a new RegionReplica and build the ConsensusGroup on the destined DataNode
- *
- * <p>createNewRegionPeer should be invoked on a DataNode that doesn't contain any peer of the
- * specific ConsensusGroup, in order to avoid there exists one DataNode who has more than one
- * RegionReplica.
- *
- * @param regionId The given ConsensusGroup
- * @param destDataNode The destined DataNode where the new peer will be created
- * @return status
- */
- public TSStatus createNewRegionPeer(TConsensusGroupId regionId, TDataNodeLocation destDataNode) {
- TSStatus status;
- List<TDataNodeLocation> regionReplicaNodes = findRegionReplicaNodes(regionId);
- if (regionReplicaNodes.isEmpty()) {
- LOGGER.warn("Not find region replica nodes in createPeer, regionId: {}", regionId);
- status = new TSStatus(TSStatusCode.MIGRATE_REGION_ERROR.getStatusCode());
- status.setMessage("Not find region replica nodes in createPeer, regionId: " + regionId);
- return status;
- }
-
- List<TDataNodeLocation> currentPeerNodes = new ArrayList<>(regionReplicaNodes);
- currentPeerNodes.add(destDataNode);
- String storageGroup = configManager.getPartitionManager().getRegionStorageGroup(regionId);
- TCreatePeerReq req = new TCreatePeerReq(regionId, currentPeerNodes, storageGroup);
- // TODO replace with real ttl
- req.setTtl(Long.MAX_VALUE);
-
- status =
- SyncDataNodeClientPool.getInstance()
- .sendSyncRequestToDataNodeWithRetry(
- destDataNode.getInternalEndPoint(),
- req,
- DataNodeRequestType.CREATE_NEW_REGION_PEER);
-
- LOGGER.info(
- "Send action createNewRegionPeer, regionId: {}, dataNode: {}", regionId, destDataNode);
- if (isFailed(status)) {
- LOGGER.error(
- "Send action createNewRegionPeer, regionId: {}, dataNode: {}, result: {}",
- regionId,
- destDataNode,
- status);
- }
- return status;
- }
-
private boolean isSucceed(TSStatus status) {
return status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode();
}
diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/node/RemoveDataNodeProcedure.java b/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/node/RemoveDataNodeProcedure.java
index f46d6bf4f7..1e824a0b59 100644
--- a/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/node/RemoveDataNodeProcedure.java
+++ b/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/node/RemoveDataNodeProcedure.java
@@ -38,6 +38,8 @@ import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
+import static org.apache.iotdb.confignode.conf.ConfigNodeConstant.REMOVE_DATANODE_PROCESS;
+
/** remove data node procedure */
public class RemoveDataNodeProcedure extends AbstractNodeProcedure<RemoveDataNodeState> {
private static final Logger LOG = LoggerFactory.getLogger(RemoveDataNodeProcedure.class);
@@ -68,7 +70,10 @@ public class RemoveDataNodeProcedure extends AbstractNodeProcedure<RemoveDataNod
env.markDataNodeAsRemovingAndBroadCast(disableDataNodeLocation);
execDataNodeRegionIds =
env.getDataNodeRemoveHandler().getDataNodeRegionIds(disableDataNodeLocation);
- LOG.info("DataNode region id is {}", execDataNodeRegionIds);
+ LOG.info(
+ "{}, DataNode regions to be removed is {}",
+ REMOVE_DATANODE_PROCESS,
+ execDataNodeRegionIds);
setNextState(RemoveDataNodeState.BROADCAST_DISABLE_DATA_NODE);
break;
case BROADCAST_DISABLE_DATA_NODE:
diff --git a/node-commons/src/main/java/org/apache/iotdb/commons/utils/NodeUrlUtils.java b/node-commons/src/main/java/org/apache/iotdb/commons/utils/NodeUrlUtils.java
index 8dbf8459e3..f54290d0d7 100644
--- a/node-commons/src/main/java/org/apache/iotdb/commons/utils/NodeUrlUtils.java
+++ b/node-commons/src/main/java/org/apache/iotdb/commons/utils/NodeUrlUtils.java
@@ -72,8 +72,8 @@ public class NodeUrlUtils {
public static TEndPoint parseTEndPointUrl(String endPointUrl) throws BadNodeUrlException {
String[] split = endPointUrl.split(":");
if (split.length != 2) {
- logger.warn("Bad node url: {}, please check the IP:PORT inputs", endPointUrl);
- throw new BadNodeUrlException(String.format("Bad node url: %s", endPointUrl));
+ logger.warn("Illegal endpoint url format: {}", endPointUrl);
+ throw new BadNodeUrlException(String.format("Bad endpoint url: %s", endPointUrl));
}
String ip = split[0];
TEndPoint result;
@@ -81,7 +81,7 @@ public class NodeUrlUtils {
int port = Integer.parseInt(split[1]);
result = new TEndPoint(ip, port);
} catch (NumberFormatException e) {
- logger.warn("Bad node url: {}, please check the IP:PORT inputs", endPointUrl);
+ logger.warn("Illegal endpoint url format: {}", endPointUrl);
throw new BadNodeUrlException(String.format("Bad node url: %s", endPointUrl));
}
return result;
diff --git a/server/src/main/java/org/apache/iotdb/db/client/ConfigNodeClient.java b/server/src/main/java/org/apache/iotdb/db/client/ConfigNodeClient.java
index f9dd77cc37..02029dbcea 100644
--- a/server/src/main/java/org/apache/iotdb/db/client/ConfigNodeClient.java
+++ b/server/src/main/java/org/apache/iotdb/db/client/ConfigNodeClient.java
@@ -262,7 +262,7 @@ public class ConfigNodeClient
configLeader = null;
}
logger.warn(
- "Failed to connect to ConfigNode {} from DataNode {},because the current node is not leader,try next node",
+ "Failed to connect to ConfigNode {} from DataNode {}, because the current node is not leader, try next node",
configNode,
config.getAddressAndPort());
return true;
diff --git a/server/src/main/java/org/apache/iotdb/db/service/DataNodeServerCommandLine.java b/server/src/main/java/org/apache/iotdb/db/service/DataNodeServerCommandLine.java
index fedc46a4bf..d3de543558 100644
--- a/server/src/main/java/org/apache/iotdb/db/service/DataNodeServerCommandLine.java
+++ b/server/src/main/java/org/apache/iotdb/db/service/DataNodeServerCommandLine.java
@@ -117,7 +117,7 @@ public class DataNodeServerCommandLine extends ServerCommandLine {
if (dataNodeLocations.isEmpty()) {
throw new BadNodeUrlException("No DataNode to remove");
}
- logger.info("Start to remove datanode, detail:{}", dataNodeLocations);
+ logger.info("Start to remove datanode, removed datanode: {}", dataNodeLocations);
TDataNodeRemoveReq removeReq = new TDataNodeRemoveReq(dataNodeLocations);
try (ConfigNodeClient configNodeClient = new ConfigNodeClient()) {
TDataNodeRemoveResp removeResp = configNodeClient.removeDataNode(removeReq);