You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@iotdb.apache.org by GitBox <gi...@apache.org> on 2022/06/21 15:25:44 UTC

[GitHub] [iotdb] qiaojialin commented on a diff in pull request #6329: [IoTDB-3052] ConfigNode shrinking process

qiaojialin commented on code in PR #6329:
URL: https://github.com/apache/iotdb/pull/6329#discussion_r902757600


##########
confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceProcessor.java:
##########
@@ -383,6 +388,62 @@ public TSStatus applyConfigNode(TConfigNodeLocation configNodeLocation) throws T
     return status;
   }
 
+  /**
+   * For leader to remove ConfigNode configuration in consensus layer
+   *
+   * @param configNodeLocation
+   * @return
+   */
+  @Override
+  public TSStatus removeConfigNode(TConfigNodeLocation configNodeLocation) throws TException {
+    RemoveConfigNodeReq removeConfigNodeReq = new RemoveConfigNodeReq(configNodeLocation);
+
+    TSStatus status = configManager.removeConfigNode(removeConfigNodeReq);
+    if (status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()) {
+      status = SyncConfigNodeClientPool.getInstance().stopConfigNode(configNodeLocation);
+    }
+
+    // Print log to record the ConfigNode that performs the RemoveConfigNodeRequest
+    LOGGER.info("Execute RemoveConfigNodeRequest {} with result {}", configNodeLocation, status);
+
+    return status;
+  }
+
+  /**
+   * For leader to stop ConfigNode
+   *
+   * @param configNodeLocation
+   * @return
+   */
+  @Override
+  public TSStatus stopConfigNode(TConfigNodeLocation configNodeLocation) throws TException {
+    if (!configManager.getNodeManager().getOnlineConfigNodes().contains(configNodeLocation)) {
+      return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_FAILED.getStatusCode())
+          .setMessage("Remove ConfigNode failed because the ConfigNode not in current Cluster.");
+    }
+
+    ConsensusGroupId groupId = configManager.getConsensusManager().getConsensusGroupId();
+    ConsensusGenericResponse resp =
+        configManager.getConsensusManager().getConsensusImpl().removeConsensusGroup(groupId);
+    if (!resp.isSuccess()) {
+      return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_FAILED.getStatusCode())
+          .setMessage("Remove ConfigNode failed because remove ConsensusGroup failed.");

Review Comment:
   ```suggestion
             .setMessage("Stop ConfigNode failed because remove ConsensusGroup failed.");
   ```



##########
confignode/src/main/java/org/apache/iotdb/confignode/manager/NodeManager.java:
##########
@@ -183,6 +191,68 @@ public void addMetrics() {
     nodeInfo.addMetrics();
   }
 
+  public TSStatus removeConfigNode(RemoveConfigNodeReq removeConfigNodeReq) {
+    if (removeConfigNodeLock.tryLock()) {
+      try {
+        // Check ConfigNode numbers
+        if (getOnlineConfigNodes().size() <= 1) {
+          return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_FAILED.getStatusCode())
+              .setMessage(
+                  "Remove ConfigNode failed because there is only one ConfigNode in current Cluster.");
+        }
+
+        // Check whether the onlineConfigNode contain the ConfigNode to be removed.

Review Comment:
   ```suggestion
           // Check whether the onlineConfigNodes contain the ConfigNode to be removed.
   ```



##########
confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceProcessor.java:
##########
@@ -383,6 +388,62 @@ public TSStatus applyConfigNode(TConfigNodeLocation configNodeLocation) throws T
     return status;
   }
 
+  /**
+   * For leader to remove ConfigNode configuration in consensus layer
+   *
+   * @param configNodeLocation
+   * @return
+   */
+  @Override
+  public TSStatus removeConfigNode(TConfigNodeLocation configNodeLocation) throws TException {
+    RemoveConfigNodeReq removeConfigNodeReq = new RemoveConfigNodeReq(configNodeLocation);
+
+    TSStatus status = configManager.removeConfigNode(removeConfigNodeReq);
+    if (status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()) {
+      status = SyncConfigNodeClientPool.getInstance().stopConfigNode(configNodeLocation);
+    }
+
+    // Print log to record the ConfigNode that performs the RemoveConfigNodeRequest
+    LOGGER.info("Execute RemoveConfigNodeRequest {} with result {}", configNodeLocation, status);
+
+    return status;
+  }
+
+  /**
+   * For leader to stop ConfigNode
+   *
+   * @param configNodeLocation
+   * @return
+   */
+  @Override
+  public TSStatus stopConfigNode(TConfigNodeLocation configNodeLocation) throws TException {
+    if (!configManager.getNodeManager().getOnlineConfigNodes().contains(configNodeLocation)) {
+      return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_FAILED.getStatusCode())
+          .setMessage("Remove ConfigNode failed because the ConfigNode not in current Cluster.");

Review Comment:
   ```suggestion
             .setMessage("Stop ConfigNode failed because the ConfigNode not in current Cluster.");
   ```



##########
confignode/src/main/java/org/apache/iotdb/confignode/manager/NodeManager.java:
##########
@@ -183,6 +191,68 @@ public void addMetrics() {
     nodeInfo.addMetrics();
   }
 
+  public TSStatus removeConfigNode(RemoveConfigNodeReq removeConfigNodeReq) {
+    if (removeConfigNodeLock.tryLock()) {
+      try {
+        // Check ConfigNode numbers
+        if (getOnlineConfigNodes().size() <= 1) {
+          return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_FAILED.getStatusCode())
+              .setMessage(
+                  "Remove ConfigNode failed because there is only one ConfigNode in current Cluster.");
+        }
+
+        // Check whether the onlineConfigNode contain the ConfigNode to be removed.
+        if (!getOnlineConfigNodes().contains(removeConfigNodeReq.getConfigNodeLocation())) {
+          return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_FAILED.getStatusCode())
+              .setMessage(
+                  "Remove ConfigNode failed because the ConfigNode not in current Cluster.");
+        }
+
+        // Check whether the remove ConfigNode is leader
+        Peer leader = getConsensusManager().getLeader(getOnlineConfigNodes());
+        if (leader
+            .getEndpoint()
+            .equals(removeConfigNodeReq.getConfigNodeLocation().getInternalEndPoint())) {
+          // transfer leader
+          return transferLeader(removeConfigNodeReq, getConsensusManager().getConsensusGroupId());
+        }
+
+        // Execute removePeer
+        if (getConsensusManager().removeConfigNodePeer(removeConfigNodeReq)) {
+          return getConsensusManager().write(removeConfigNodeReq).getStatus();
+        } else {
+          return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_FAILED.getStatusCode())
+              .setMessage("Remove ConfigNode failed because there is no ConfigNode.");
+        }
+      } finally {
+        removeConfigNodeLock.unlock();
+      }
+    } else {
+      return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_FAILED.getStatusCode())
+          .setMessage("A ConfigNode is removing. Please wait or try again.");
+    }
+  }
+
+  private TSStatus transferLeader(
+      RemoveConfigNodeReq removeConfigNodeReq, ConsensusGroupId groupId) {
+    TConfigNodeLocation newLeader =
+        getOnlineConfigNodes().stream()
+            .filter(e -> !e.equals(removeConfigNodeReq.getConfigNodeLocation()))
+            .findAny()
+            .get();
+    ConsensusGenericResponse resp =
+        getConsensusManager()
+            .getConsensusImpl()
+            .transferLeader(groupId, new Peer(groupId, newLeader.getConsensusEndPoint()));
+    if (!resp.isSuccess()) {
+      return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_FAILED.getStatusCode())
+          .setMessage("Remove ConfigNode failed because transfer ConfigNode leader failed.");
+    }
+    return new TSStatus(TSStatusCode.NEED_REDIRECTION.getStatusCode())
+        .setRedirectNode(newLeader.getInternalEndPoint())
+        .setMessage("Remove ConfigNode is leader, already transfer Leader.");

Review Comment:
   ```suggestion
           .setMessage("The ConfigNode to be removed is leader, already transfer Leader to {}.", newLeader);
   ```



##########
confignode/src/main/java/org/apache/iotdb/confignode/manager/NodeManager.java:
##########
@@ -183,6 +191,68 @@ public void addMetrics() {
     nodeInfo.addMetrics();
   }
 
+  public TSStatus removeConfigNode(RemoveConfigNodeReq removeConfigNodeReq) {
+    if (removeConfigNodeLock.tryLock()) {
+      try {
+        // Check ConfigNode numbers

Review Comment:
   ```suggestion
           // Check ConfigNodes number
   ```



##########
confignode/src/main/java/org/apache/iotdb/confignode/client/SyncConfigNodeClientPool.java:
##########
@@ -83,6 +95,52 @@ public TSStatus applyConfigNode(TEndPoint endPoint, TConfigNodeLocation configNo
         .setMessage("All retry failed.");
   }
 
+  public TSStatus removeConfigNode(
+      List<TConfigNodeLocation> configNodeLocations, TConfigNodeLocation configNodeLocation) {
+    // TODO: Unified retry logic
+    for (TConfigNodeLocation nodeLocation : configNodeLocations) {

Review Comment:
   Add some comment for this meaning, is this used to handle the brain-split? which means that the RemovedConfigNode may not exist in some confignode?



##########
confignode/src/main/java/org/apache/iotdb/confignode/manager/NodeManager.java:
##########
@@ -183,6 +191,68 @@ public void addMetrics() {
     nodeInfo.addMetrics();
   }
 
+  public TSStatus removeConfigNode(RemoveConfigNodeReq removeConfigNodeReq) {
+    if (removeConfigNodeLock.tryLock()) {
+      try {
+        // Check ConfigNode numbers
+        if (getOnlineConfigNodes().size() <= 1) {
+          return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_FAILED.getStatusCode())
+              .setMessage(
+                  "Remove ConfigNode failed because there is only one ConfigNode in current Cluster.");
+        }
+
+        // Check whether the onlineConfigNode contain the ConfigNode to be removed.
+        if (!getOnlineConfigNodes().contains(removeConfigNodeReq.getConfigNodeLocation())) {
+          return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_FAILED.getStatusCode())
+              .setMessage(
+                  "Remove ConfigNode failed because the ConfigNode not in current Cluster.");
+        }
+
+        // Check whether the remove ConfigNode is leader
+        Peer leader = getConsensusManager().getLeader(getOnlineConfigNodes());
+        if (leader
+            .getEndpoint()
+            .equals(removeConfigNodeReq.getConfigNodeLocation().getInternalEndPoint())) {
+          // transfer leader
+          return transferLeader(removeConfigNodeReq, getConsensusManager().getConsensusGroupId());
+        }
+
+        // Execute removePeer
+        if (getConsensusManager().removeConfigNodePeer(removeConfigNodeReq)) {
+          return getConsensusManager().write(removeConfigNodeReq).getStatus();
+        } else {
+          return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_FAILED.getStatusCode())
+              .setMessage("Remove ConfigNode failed because there is no ConfigNode.");

Review Comment:
   Is removing peer failed equals to no ConfigNode?
   Maybe this message needs more precise



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@iotdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org