You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@iotdb.apache.org by GitBox <gi...@apache.org> on 2022/12/23 09:58:09 UTC

[GitHub] [iotdb] Beyyes commented on a diff in pull request #8592: [To rel/1.0] Cherry pick cluster node start protocol for rel/1.0

Beyyes commented on code in PR #8592:
URL: https://github.com/apache/iotdb/pull/8592#discussion_r1056148631


##########
confignode/src/main/java/org/apache/iotdb/confignode/manager/ConfigManager.java:
##########
@@ -256,42 +262,65 @@ public void close() throws IOException {
   }
 
   @Override
-  public DataSet registerDataNode(RegisterDataNodePlan registerDataNodePlan) {
+  public DataSet getSystemConfiguration() {
     TSStatus status = confirmLeader();
-    DataNodeRegisterResp dataSet;
-    if (status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()) {
-      triggerManager.getTriggerInfo().acquireTriggerTableLock();
-      udfManager.getUdfInfo().acquireUDFTableLock();
-      try {
-        dataSet = (DataNodeRegisterResp) nodeManager.registerDataNode(registerDataNodePlan);
-        dataSet.setTemplateInfo(clusterSchemaManager.getAllTemplateSetInfo());
-        dataSet.setTriggerInformation(
-            triggerManager.getTriggerTable(false).getAllTriggerInformation());
-        dataSet.setAllUDFInformation(udfManager.getUDFTable().getAllUDFInformation());
-        dataSet.setAllTTLInformation(clusterSchemaManager.getAllTTLInfo());
-      } finally {
-        triggerManager.getTriggerInfo().releaseTriggerTableLock();
-        udfManager.getUdfInfo().releaseUDFTableLock();
-      }
+    ConfigurationResp dataSet;
+    // Notice: The Seed-ConfigNode must also have the privilege to give system configuration.
+    // Otherwise, the IoTDB-cluster will not have the ability to restart from scratch.
+    if (status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()
+        || ConfigNodeDescriptor.getInstance().isSeedConfigNode()

Review Comment:
   Remove `|| ConfigNodeDescriptor.getInstance().isSeedConfigNode()
           || SystemPropertiesUtils.isSeedConfigNode()` judgement?



##########
confignode/src/main/java/org/apache/iotdb/confignode/manager/ConfigManager.java:
##########
@@ -256,42 +262,65 @@ public void close() throws IOException {
   }
 
   @Override
-  public DataSet registerDataNode(RegisterDataNodePlan registerDataNodePlan) {
+  public DataSet getSystemConfiguration() {
     TSStatus status = confirmLeader();
-    DataNodeRegisterResp dataSet;
-    if (status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()) {
-      triggerManager.getTriggerInfo().acquireTriggerTableLock();
-      udfManager.getUdfInfo().acquireUDFTableLock();
-      try {
-        dataSet = (DataNodeRegisterResp) nodeManager.registerDataNode(registerDataNodePlan);
-        dataSet.setTemplateInfo(clusterSchemaManager.getAllTemplateSetInfo());
-        dataSet.setTriggerInformation(
-            triggerManager.getTriggerTable(false).getAllTriggerInformation());
-        dataSet.setAllUDFInformation(udfManager.getUDFTable().getAllUDFInformation());
-        dataSet.setAllTTLInformation(clusterSchemaManager.getAllTTLInfo());
-      } finally {
-        triggerManager.getTriggerInfo().releaseTriggerTableLock();
-        udfManager.getUdfInfo().releaseUDFTableLock();
-      }
+    ConfigurationResp dataSet;
+    // Notice: The Seed-ConfigNode must also have the privilege to give system configuration.
+    // Otherwise, the IoTDB-cluster will not have the ability to restart from scratch.
+    if (status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()
+        || ConfigNodeDescriptor.getInstance().isSeedConfigNode()
+        || SystemPropertiesUtils.isSeedConfigNode()) {
+      dataSet = (ConfigurationResp) nodeManager.getSystemConfiguration();
     } else {
-      dataSet = new DataNodeRegisterResp();
+      dataSet = new ConfigurationResp();
       dataSet.setStatus(status);
-      dataSet.setConfigNodeList(nodeManager.getRegisteredConfigNodes());
     }
     return dataSet;
   }
 
   @Override
-  public DataSet getConfiguration() {
+  public DataSet registerDataNode(RegisterDataNodePlan registerDataNodePlan) {
     TSStatus status = confirmLeader();
-    ConfigurationResp dataSet;
     if (status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()) {
-      dataSet = (ConfigurationResp) nodeManager.getConfiguration();
-    } else {
-      dataSet = new ConfigurationResp();
-      dataSet.setStatus(status);
+      status =
+          ClusterNodeStartUtils.confirmNodeRegistration(
+              NodeType.DataNode,
+              registerDataNodePlan.getDataNodeConfiguration().getLocation(),
+              this);
+      if (status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()) {
+        return nodeManager.registerDataNode(registerDataNodePlan);
+      }
     }
-    return dataSet;
+
+    DataNodeRegisterResp resp = new DataNodeRegisterResp();
+    resp.setStatus(status);
+    resp.setConfigNodeList(getNodeManager().getRegisteredConfigNodes());
+    return resp;
+  }
+
+  @Override
+  public TDataNodeRestartResp restartDataNode(TDataNodeRestartReq req) {
+    TSStatus status = confirmLeader();
+    // Notice: The Seed-ConfigNode must also have the privilege to do Node restart check.
+    // Otherwise, the IoTDB-cluster will not have the ability to restart from scratch.
+    if (status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()

Review Comment:
   Remove `|| ConfigNodeDescriptor.getInstance().isSeedConfigNode()
           || SystemPropertiesUtils.isSeedConfigNode()` judgement?



##########
confignode/src/main/java/org/apache/iotdb/confignode/manager/ConfigManager.java:
##########
@@ -750,7 +778,45 @@ public TPermissionInfoResp checkUserPrivileges(
 
   @Override
   public TConfigNodeRegisterResp registerConfigNode(TConfigNodeRegisterReq req) {
-    return nodeManager.registerConfigNode(req);
+    final int ERROR_STATUS_NODE_ID = -1;
+
+    TSStatus status = confirmLeader();
+    if (status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()) {
+      // Make sure the global configurations are consist
+      status = checkConfigNodeGlobalConfig(req);
+      if (status == null) {
+        status =
+            ClusterNodeStartUtils.confirmNodeRegistration(
+                NodeType.ConfigNode, req.getConfigNodeLocation(), this);
+        if (status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()) {
+          return nodeManager.registerConfigNode(req);
+        }
+      }
+    }
+
+    return new TConfigNodeRegisterResp().setStatus(status).setConfigNodeId(ERROR_STATUS_NODE_ID);
+  }
+
+  @Override
+  public TSStatus restartConfigNode(TConfigNodeRestartReq req) {
+    TSStatus status = confirmLeader();
+    // Notice: The Seed-ConfigNode must also have the privilege to do Node restart check.
+    // Otherwise, the IoTDB-cluster will not have the ability to restart from scratch.
+    if (status.getCode() == TSStatusCode.SUCCESS_STATUS.getStatusCode()
+        || ConfigNodeDescriptor.getInstance().isSeedConfigNode()

Review Comment:
   Remove `|| ConfigNodeDescriptor.getInstance().isSeedConfigNode()`?



##########
confignode/src/main/java/org/apache/iotdb/confignode/conf/SystemPropertiesUtils.java:
##########
@@ -244,6 +249,25 @@ public static void storeConfigNodeList(List<TConfigNodeLocation> configNodes) th
     storeSystemProperties(systemProperties);
   }
 
+  /**
+   * Load the cluster_name in confignode-system.properties file. We only invoke this interface when
+   * restarted.
+   *
+   * @return The property of cluster_name in confignode-system.properties file
+   * @throws IOException When load confignode-system.properties file failed
+   */
+  public static String loadClusterNameWhenRestarted() throws IOException {
+    Properties systemProperties = getSystemProperties();
+    String clusterName = systemProperties.getProperty("cluster_name", null);
+    if (clusterName == null) {
+      LOGGER.warn(
+          "Lack cluster_name field in data/confignode/system/confignode-system.properties, set it as defaultCluster");
+      systemProperties.setProperty("cluster_name", "defaultCluster");

Review Comment:
   Change to constant CLUSTER_NAME and DEFAULT_CLUSTER_NAME



##########
confignode/src/main/java/org/apache/iotdb/confignode/service/ConfigNode.java:
##########
@@ -59,7 +60,9 @@ public class ConfigNode implements ConfigNodeMBean {
   private static final ConfigNodeConfig CONF = ConfigNodeDescriptor.getInstance().getConf();
   private static final CommonConfig COMMON_CONF = CommonDescriptor.getInstance().getConfig();
 
+  private static final int STARTUP_RETRY_NUM = 10;
   private static final int SCHEDULE_WAITING_RETRY_NUM = 20;
+  private static final long STARTUP_RETRY_INTERVAL_IN_MS = TimeUnit.SECONDS.toMillis(30);

Review Comment:
   30000ms? before is 1000ms



##########
server/src/main/java/org/apache/iotdb/db/service/DataNode.java:
##########
@@ -132,200 +142,304 @@ public static void main(String[] args) {
     new DataNodeServerCommandLine().doMain(args);
   }
 
-  protected void serverCheckAndInit() throws ConfigurationException, IOException {
-    config.setClusterMode(true);
-    IoTDBStartCheck.getInstance().checkConfig();
-    // TODO: check configuration for data node
+  protected void doAddNode() {
+    boolean isFirstStart = false;
+    try {
+      // Check if this DataNode is start for the first time and do other pre-checks
+      isFirstStart = prepareDataNode();
 
-    for (TEndPoint endPoint : config.getTargetConfigNodeList()) {
-      if (endPoint.getIp().equals("0.0.0.0")) {
-        throw new ConfigurationException(
-            "The ip address of any target_config_node_list couldn't be 0.0.0.0");
-      }
-    }
+      // Set target ConfigNodeList from iotdb-datanode.properties file
+      ConfigNodeInfo.getInstance().updateConfigNodeList(config.getTargetConfigNodeList());
 
-    thisNode.setIp(config.getInternalAddress());
-    thisNode.setPort(config.getInternalPort());
-  }
+      // Pull and check system configurations from ConfigNode-leader
+      pullAndCheckSystemConfigurations();
 
-  protected void doAddNode() {
-    try {
-      // prepare cluster IoTDB-DataNode
-      prepareDataNode();
-      // pull and check configuration from ConfigNode
-      pullAndCheckConfiguration();
-      // register current DataNode to ConfigNode
-      registerInConfigNode();
-      // active DataNode
+      if (isFirstStart) {
+        // Register this DataNode to the cluster when first start
+        sendRegisterRequestToConfigNode();
+      } else {
+        // Send restart request of this DataNode
+        sendRestartRequestToConfigNode();
+      }
+
+      // Active DataNode
       active();
-      // setup rpc service
+
+      // Setup rpc service
       setUpRPCService();
-      registerManager.register(MetricService.getInstance());
-
-      // init metric service
-      if (MetricConfigDescriptor.getInstance()
-          .getMetricConfig()
-          .getInternalReportType()
-          .equals(InternalReporterType.IOTDB)) {
-        MetricService.getInstance().updateInternalReporter(new IoTDBInternalReporter());
-      }
-      MetricService.getInstance().startInternalReporter();
-      // bind predefined metrics
-      DataNodeMetricsHelper.bind();
+
+      // Setup metric service
+      setUpMetricService();
 
       logger.info("IoTDB configuration: " + config.getConfigMessage());
       logger.info("Congratulation, IoTDB DataNode is set up successfully. Now, enjoy yourself!");
-    } catch (StartupException e) {
+
+    } catch (StartupException | ConfigurationException | IOException e) {
       logger.error("Fail to start server", e);
+      if (isFirstStart) {
+        // Delete the system.properties file when first start failed.
+        // Therefore, the next time this DataNode is start will still be seen as the first time.
+        SYSTEM_PROPERTIES.deleteOnExit();
+      }
       stop();
     }
   }
 
-  /** initialize the current node and its services */
-  public boolean initLocalEngines() {
-    return true;
-  }
-
   /** Prepare cluster IoTDB-DataNode */
-  private void prepareDataNode() throws StartupException {
-    // check iotdb server first
+  private boolean prepareDataNode() throws StartupException, ConfigurationException, IOException {
+    // Set cluster mode
+    config.setClusterMode(true);
+
+    // Notice: Consider this DataNode as first start if the system.properties file doesn't exist
+    boolean isFirstStart = !SYSTEM_PROPERTIES.exists();
+
+    // Check target ConfigNodes
+    for (TEndPoint endPoint : config.getTargetConfigNodeList()) {
+      if (endPoint.getIp().equals("0.0.0.0")) {
+        throw new StartupException(
+            "The ip address of any target_config_node_list couldn't be 0.0.0.0");
+      }
+    }
+
+    // Set this node
+    thisNode.setIp(config.getInternalAddress());
+    thisNode.setPort(config.getInternalPort());
+
+    // Startup checks
     StartupChecks checks = new StartupChecks(IoTDBConstant.DN_ROLE).withDefaultTest();
     checks.verify();
 
-    // Register services
-    JMXService.registerMBean(getInstance(), mbeanName);
+    // Check system configurations
+    IoTDBStartCheck.getInstance().checkSystemConfig();
+
+    return isFirstStart;

Review Comment:
   return !SYSTEM_PROPERTIES.exists();



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@iotdb.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org