You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ca...@apache.org on 2022/10/11 08:22:49 UTC

[iotdb] branch beyyes/remove_node_stability updated: add REMOVE_DATANODE_PROCESS log for remove datanode process

This is an automated email from the ASF dual-hosted git repository.

caogaofei pushed a commit to branch beyyes/remove_node_stability
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/beyyes/remove_node_stability by this push:
     new 41716e0c89 add REMOVE_DATANODE_PROCESS log for remove datanode process
41716e0c89 is described below

commit 41716e0c894228b3944d9ac7990169493f69564f
Author: Beyyes <cg...@foxmail.com>
AuthorDate: Tue Oct 11 16:22:31 2022 +0800

    add REMOVE_DATANODE_PROCESS log for remove datanode process
---
 .../iotdb/confignode/conf/ConfigNodeConstant.java  |   2 +
 .../confignode/conf/ConfigNodeRemoveCheck.java     |   1 +
 .../iotdb/confignode/persistence/NodeInfo.java     |   8 +-
 .../procedure/env/DataNodeRemoveHandler.java       | 131 ++++++++++++---------
 .../impl/node/RemoveDataNodeProcedure.java         |   7 +-
 .../apache/iotdb/commons/utils/NodeUrlUtils.java   |   6 +-
 .../apache/iotdb/db/client/ConfigNodeClient.java   |   2 +-
 .../db/service/DataNodeServerCommandLine.java      |   2 +-
 8 files changed, 93 insertions(+), 66 deletions(-)

diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeConstant.java b/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeConstant.java
index 4f58c58331..70c2a1c620 100644
--- a/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeConstant.java
+++ b/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeConstant.java
@@ -45,6 +45,8 @@ public class ConfigNodeConstant {
   public static final String METRIC_STATUS_ONLINE = "Online";
   public static final String METRIC_STATUS_UNKNOWN = "Unknown";
 
+  public static final String REMOVE_DATANODE_PROCESS = "[REMOVE_DATANODE_PROCESS]";
+
   private ConfigNodeConstant() {
     // empty constructor
   }
diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeRemoveCheck.java b/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeRemoveCheck.java
index 6b9031c30a..62be003b2c 100644
--- a/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeRemoveCheck.java
+++ b/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeRemoveCheck.java
@@ -40,6 +40,7 @@ import java.util.Properties;
 import static org.apache.commons.lang3.StringUtils.isNumeric;
 
 public class ConfigNodeRemoveCheck {
+
   private static final Logger LOGGER = LoggerFactory.getLogger(ConfigNodeStartupCheck.class);
 
   private static final ConfigNodeConfig CONF = ConfigNodeDescriptor.getInstance().getConf();
diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/persistence/NodeInfo.java b/confignode/src/main/java/org/apache/iotdb/confignode/persistence/NodeInfo.java
index 30c108a06f..fd64713896 100644
--- a/confignode/src/main/java/org/apache/iotdb/confignode/persistence/NodeInfo.java
+++ b/confignode/src/main/java/org/apache/iotdb/confignode/persistence/NodeInfo.java
@@ -166,7 +166,9 @@ public class NodeInfo implements SnapshotProcessor {
    * @return TSStatus
    */
   public TSStatus removeDataNode(RemoveDataNodePlan req) {
-    LOGGER.info("there are {} data node in cluster before remove some", registeredDataNodes.size());
+    LOGGER.info(
+        "there are {} data node in cluster before executed remove-datanode.sh",
+        registeredDataNodes.size());
     try {
       dataNodeInfoReadWriteLock.writeLock().lock();
       req.getDataNodeLocations()
@@ -178,7 +180,9 @@ public class NodeInfo implements SnapshotProcessor {
     } finally {
       dataNodeInfoReadWriteLock.writeLock().unlock();
     }
-    LOGGER.info("there are {} data node in cluster after remove some", registeredDataNodes.size());
+    LOGGER.info(
+        "there are {} data node in cluster after executed remove-datanode.sh",
+        registeredDataNodes.size());
     return new TSStatus(TSStatusCode.SUCCESS_STATUS.getStatusCode());
   }
 
diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java b/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
index 73e2b65c48..f816814bb9 100644
--- a/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
+++ b/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/DataNodeRemoveHandler.java
@@ -53,6 +53,8 @@ import java.util.List;
 import java.util.Optional;
 import java.util.stream.Collectors;
 
+import static org.apache.iotdb.confignode.conf.ConfigNodeConstant.REMOVE_DATANODE_PROCESS;
+
 public class DataNodeRemoveHandler {
   private static final Logger LOGGER = LoggerFactory.getLogger(DataNodeRemoveHandler.class);
 
@@ -109,7 +111,8 @@ public class DataNodeRemoveHandler {
                   DataNodeRequestType.DISABLE_DATA_NODE);
       if (!isSucceed(status)) {
         LOGGER.error(
-            "broadcastDisableDataNode meets error, disabledDataNode: {}, error: {}",
+            "{}, BroadcastDisableDataNode meets error, disabledDataNode: {}, error: {}",
+            REMOVE_DATANODE_PROCESS,
             getIdWithRpcEndpoint(disabledDataNode),
             status);
         return;
@@ -145,6 +148,60 @@ public class DataNodeRemoveHandler {
     return newNode.get();
   }
 
+  /**
+   * Create a new RegionReplica and build the ConsensusGroup on the destined DataNode
+   *
+   * <p>createNewRegionPeer should be invoked on a DataNode that doesn't contain any peer of the
+   * specific ConsensusGroup, in order to avoid there exists one DataNode who has more than one
+   * RegionReplica.
+   *
+   * @param regionId The given ConsensusGroup
+   * @param destDataNode The destined DataNode where the new peer will be created
+   * @return status
+   */
+  public TSStatus createNewRegionPeer(TConsensusGroupId regionId, TDataNodeLocation destDataNode) {
+    TSStatus status;
+    List<TDataNodeLocation> regionReplicaNodes = findRegionReplicaNodes(regionId);
+    if (regionReplicaNodes.isEmpty()) {
+      LOGGER.warn(
+          "{}, Not find region replica nodes in createPeer, regionId: {}",
+          REMOVE_DATANODE_PROCESS,
+          regionId);
+      status = new TSStatus(TSStatusCode.MIGRATE_REGION_ERROR.getStatusCode());
+      status.setMessage("Not find region replica nodes in createPeer, regionId: " + regionId);
+      return status;
+    }
+
+    List<TDataNodeLocation> currentPeerNodes = new ArrayList<>(regionReplicaNodes);
+    currentPeerNodes.add(destDataNode);
+    String storageGroup = configManager.getPartitionManager().getRegionStorageGroup(regionId);
+    TCreatePeerReq req = new TCreatePeerReq(regionId, currentPeerNodes, storageGroup);
+    // TODO replace with real ttl
+    req.setTtl(Long.MAX_VALUE);
+
+    status =
+        SyncDataNodeClientPool.getInstance()
+            .sendSyncRequestToDataNodeWithRetry(
+                destDataNode.getInternalEndPoint(),
+                req,
+                DataNodeRequestType.CREATE_NEW_REGION_PEER);
+
+    LOGGER.info(
+        "{}, Send action createNewRegionPeer finished, regionId: {}, destDataNode: {}",
+        REMOVE_DATANODE_PROCESS,
+        regionId,
+        destDataNode);
+    if (isFailed(status)) {
+      LOGGER.error(
+          "{}, Send action createNewRegionPeer error, regionId: {}, destDataNode: {}, result: {}",
+          REMOVE_DATANODE_PROCESS,
+          regionId,
+          destDataNode,
+          status);
+    }
+    return status;
+  }
+
   /**
    * Order the specific ConsensusGroup to add peer for the new RegionReplica.
    *
@@ -165,13 +222,14 @@ public class DataNodeRemoveHandler {
         filterDataNodeWithOtherRegionReplica(regionId, destDataNode);
     if (!selectedDataNode.isPresent()) {
       LOGGER.warn(
-          "There are no other DataNodes could be selected to perform the add peer process, "
+          "{}, There are no other DataNodes could be selected to perform the add peer process, "
               + "please check RegionGroup: {} by SQL: show regions",
+          REMOVE_DATANODE_PROCESS,
           regionId);
       status = new TSStatus(TSStatusCode.MIGRATE_REGION_ERROR.getStatusCode());
       status.setMessage(
           "There are no other DataNodes could be selected to perform the add peer process, "
-              + "please check by SQL: show regions");
+              + "please check by show regions command");
       return status;
     }
 
@@ -185,9 +243,11 @@ public class DataNodeRemoveHandler {
                 maintainPeerReq,
                 DataNodeRequestType.ADD_REGION_PEER);
     LOGGER.info(
-        "Send action addRegionPeer, wait it finished, regionId: {}, dataNode: {}",
+        "{}, Send action addRegionPeer finished, regionId: {}, rpcDataNode: {}, destDataNode: {}",
+        REMOVE_DATANODE_PROCESS,
         regionId,
-        getIdWithRpcEndpoint(selectedDataNode.get()));
+        getIdWithRpcEndpoint(selectedDataNode.get()),
+        destDataNode);
     return status;
   }
 
@@ -226,7 +286,8 @@ public class DataNodeRemoveHandler {
                 maintainPeerReq,
                 DataNodeRequestType.REMOVE_REGION_PEER);
     LOGGER.info(
-        "Send action removeRegionPeer, wait it finished, regionId: {}, dataNode: {}",
+        "{}, Send action removeRegionPeer finished, regionId: {}, dataNode: {}",
+        REMOVE_DATANODE_PROCESS,
         regionId,
         rpcClientDataNode.getInternalEndPoint());
     return status;
@@ -252,7 +313,7 @@ public class DataNodeRemoveHandler {
       String errorMessage =
           "deleteOldRegionPeer is not supported for schemaRegion when SchemaReplicationFactor equals 1, "
               + "you are supposed to delete the region data of datanode manually";
-      LOGGER.info(errorMessage);
+      LOGGER.info("{}, {}", REMOVE_DATANODE_PROCESS, errorMessage);
       TSStatus status = new TSStatus(TSStatusCode.MIGRATE_REGION_ERROR.getStatusCode());
       status.setMessage(errorMessage);
       return status;
@@ -266,7 +327,7 @@ public class DataNodeRemoveHandler {
       String errorMessage =
           "deleteOldRegionPeer is not supported for dataRegion when DataReplicationFactor equals 1, "
               + "you are supposed to delete the region data of datanode manually";
-      LOGGER.info(errorMessage);
+      LOGGER.info("{}, {}", REMOVE_DATANODE_PROCESS, errorMessage);
       TSStatus status = new TSStatus(TSStatusCode.MIGRATE_REGION_ERROR.getStatusCode());
       status.setMessage(errorMessage);
       return status;
@@ -281,7 +342,8 @@ public class DataNodeRemoveHandler {
                 maintainPeerReq,
                 DataNodeRequestType.DELETE_OLD_REGION_PEER);
     LOGGER.info(
-        "Send action deleteOldRegionPeer to regionId {} on dataNodeId {}, wait it finished",
+        "{}, Send action deleteOldRegionPeer finished, regionId: {}, dataNodeId: {}",
+        REMOVE_DATANODE_PROCESS,
         regionId,
         originalDataNode.getInternalEndPoint());
     return status;
@@ -299,7 +361,7 @@ public class DataNodeRemoveHandler {
       TDataNodeLocation originalDataNode,
       TDataNodeLocation destDataNode) {
     LOGGER.info(
-        "start to update region {} location from {} to {} when it migrate succeed",
+        "Start to update region {} location from {} to {} when it migrate succeed",
         regionId,
         originalDataNode.getInternalEndPoint().getIp(),
         destDataNode.getInternalEndPoint().getIp());
@@ -307,7 +369,7 @@ public class DataNodeRemoveHandler {
         new UpdateRegionLocationPlan(regionId, originalDataNode, destDataNode);
     TSStatus status = configManager.getPartitionManager().updateRegionLocation(req);
     LOGGER.info(
-        "update region {} location finished, result:{}, old:{}, new:{}",
+        "Update region {} location finished, result:{}, old:{}, new:{}",
         regionId,
         status,
         originalDataNode.getInternalEndPoint().getIp(),
@@ -343,53 +405,6 @@ public class DataNodeRemoveHandler {
         .findAny();
   }
 
-  /**
-   * Create a new RegionReplica and build the ConsensusGroup on the destined DataNode
-   *
-   * <p>createNewRegionPeer should be invoked on a DataNode that doesn't contain any peer of the
-   * specific ConsensusGroup, in order to avoid there exists one DataNode who has more than one
-   * RegionReplica.
-   *
-   * @param regionId The given ConsensusGroup
-   * @param destDataNode The destined DataNode where the new peer will be created
-   * @return status
-   */
-  public TSStatus createNewRegionPeer(TConsensusGroupId regionId, TDataNodeLocation destDataNode) {
-    TSStatus status;
-    List<TDataNodeLocation> regionReplicaNodes = findRegionReplicaNodes(regionId);
-    if (regionReplicaNodes.isEmpty()) {
-      LOGGER.warn("Not find region replica nodes in createPeer, regionId: {}", regionId);
-      status = new TSStatus(TSStatusCode.MIGRATE_REGION_ERROR.getStatusCode());
-      status.setMessage("Not find region replica nodes in createPeer, regionId: " + regionId);
-      return status;
-    }
-
-    List<TDataNodeLocation> currentPeerNodes = new ArrayList<>(regionReplicaNodes);
-    currentPeerNodes.add(destDataNode);
-    String storageGroup = configManager.getPartitionManager().getRegionStorageGroup(regionId);
-    TCreatePeerReq req = new TCreatePeerReq(regionId, currentPeerNodes, storageGroup);
-    // TODO replace with real ttl
-    req.setTtl(Long.MAX_VALUE);
-
-    status =
-        SyncDataNodeClientPool.getInstance()
-            .sendSyncRequestToDataNodeWithRetry(
-                destDataNode.getInternalEndPoint(),
-                req,
-                DataNodeRequestType.CREATE_NEW_REGION_PEER);
-
-    LOGGER.info(
-        "Send action createNewRegionPeer, regionId: {}, dataNode: {}", regionId, destDataNode);
-    if (isFailed(status)) {
-      LOGGER.error(
-          "Send action createNewRegionPeer, regionId: {}, dataNode: {}, result: {}",
-          regionId,
-          destDataNode,
-          status);
-    }
-    return status;
-  }
-
   private boolean isSucceed(TSStatus status) {
     return status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode();
   }
diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/node/RemoveDataNodeProcedure.java b/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/node/RemoveDataNodeProcedure.java
index f46d6bf4f7..1e824a0b59 100644
--- a/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/node/RemoveDataNodeProcedure.java
+++ b/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/node/RemoveDataNodeProcedure.java
@@ -38,6 +38,8 @@ import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
 
+import static org.apache.iotdb.confignode.conf.ConfigNodeConstant.REMOVE_DATANODE_PROCESS;
+
 /** remove data node procedure */
 public class RemoveDataNodeProcedure extends AbstractNodeProcedure<RemoveDataNodeState> {
   private static final Logger LOG = LoggerFactory.getLogger(RemoveDataNodeProcedure.class);
@@ -68,7 +70,10 @@ public class RemoveDataNodeProcedure extends AbstractNodeProcedure<RemoveDataNod
           env.markDataNodeAsRemovingAndBroadCast(disableDataNodeLocation);
           execDataNodeRegionIds =
               env.getDataNodeRemoveHandler().getDataNodeRegionIds(disableDataNodeLocation);
-          LOG.info("DataNode region id is {}", execDataNodeRegionIds);
+          LOG.info(
+              "{}, DataNode regions to be removed is {}",
+              REMOVE_DATANODE_PROCESS,
+              execDataNodeRegionIds);
           setNextState(RemoveDataNodeState.BROADCAST_DISABLE_DATA_NODE);
           break;
         case BROADCAST_DISABLE_DATA_NODE:
diff --git a/node-commons/src/main/java/org/apache/iotdb/commons/utils/NodeUrlUtils.java b/node-commons/src/main/java/org/apache/iotdb/commons/utils/NodeUrlUtils.java
index 8dbf8459e3..f54290d0d7 100644
--- a/node-commons/src/main/java/org/apache/iotdb/commons/utils/NodeUrlUtils.java
+++ b/node-commons/src/main/java/org/apache/iotdb/commons/utils/NodeUrlUtils.java
@@ -72,8 +72,8 @@ public class NodeUrlUtils {
   public static TEndPoint parseTEndPointUrl(String endPointUrl) throws BadNodeUrlException {
     String[] split = endPointUrl.split(":");
     if (split.length != 2) {
-      logger.warn("Bad node url: {}, please check the IP:PORT inputs", endPointUrl);
-      throw new BadNodeUrlException(String.format("Bad node url: %s", endPointUrl));
+      logger.warn("Illegal endpoint url format: {}", endPointUrl);
+      throw new BadNodeUrlException(String.format("Bad endpoint url: %s", endPointUrl));
     }
     String ip = split[0];
     TEndPoint result;
@@ -81,7 +81,7 @@ public class NodeUrlUtils {
       int port = Integer.parseInt(split[1]);
       result = new TEndPoint(ip, port);
     } catch (NumberFormatException e) {
-      logger.warn("Bad node url: {}, please check the IP:PORT inputs", endPointUrl);
+      logger.warn("Illegal endpoint url format: {}", endPointUrl);
       throw new BadNodeUrlException(String.format("Bad node url: %s", endPointUrl));
     }
     return result;
diff --git a/server/src/main/java/org/apache/iotdb/db/client/ConfigNodeClient.java b/server/src/main/java/org/apache/iotdb/db/client/ConfigNodeClient.java
index f9dd77cc37..02029dbcea 100644
--- a/server/src/main/java/org/apache/iotdb/db/client/ConfigNodeClient.java
+++ b/server/src/main/java/org/apache/iotdb/db/client/ConfigNodeClient.java
@@ -262,7 +262,7 @@ public class ConfigNodeClient
         configLeader = null;
       }
       logger.warn(
-          "Failed to connect to ConfigNode {} from DataNode {},because the current node is not leader,try next node",
+          "Failed to connect to ConfigNode {} from DataNode {}, because the current node is not leader, try next node",
           configNode,
           config.getAddressAndPort());
       return true;
diff --git a/server/src/main/java/org/apache/iotdb/db/service/DataNodeServerCommandLine.java b/server/src/main/java/org/apache/iotdb/db/service/DataNodeServerCommandLine.java
index fedc46a4bf..d3de543558 100644
--- a/server/src/main/java/org/apache/iotdb/db/service/DataNodeServerCommandLine.java
+++ b/server/src/main/java/org/apache/iotdb/db/service/DataNodeServerCommandLine.java
@@ -117,7 +117,7 @@ public class DataNodeServerCommandLine extends ServerCommandLine {
     if (dataNodeLocations.isEmpty()) {
       throw new BadNodeUrlException("No DataNode to remove");
     }
-    logger.info("Start to remove datanode, detail:{}", dataNodeLocations);
+    logger.info("Start to remove datanode, removed datanode: {}", dataNodeLocations);
     TDataNodeRemoveReq removeReq = new TDataNodeRemoveReq(dataNodeLocations);
     try (ConfigNodeClient configNodeClient = new ConfigNodeClient()) {
       TDataNodeRemoveResp removeResp = configNodeClient.removeDataNode(removeReq);