You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by jx...@apache.org on 2022/05/18 20:16:39 UTC

[helix] branch master updated: Fix incompatible issue for clusterConfig mapfields disabledInstances (#2100)

This is an automated email from the ASF dual-hosted git repository.

jxue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git


The following commit(s) were added to refs/heads/master by this push:
     new c46a70f63 Fix incompatible issue for clusterConfig mapfields disabledInstances (#2100)
c46a70f63 is described below

commit c46a70f6376bb8a217ea3404b8586425314e866b
Author: xyuanlu <xy...@gmail.com>
AuthorDate: Wed May 18 13:16:34 2022 -0700

    Fix incompatible issue for clusterConfig mapfields disabledInstances (#2100)
    
    Fix incompatible issue for clusterConfig mapfields
---
 .../rebalancer/util/DelayedRebalanceUtil.java      |  6 +--
 .../org/apache/helix/manager/zk/ZKHelixAdmin.java  | 15 ++++--
 .../java/org/apache/helix/model/ClusterConfig.java | 55 ++++++++++++++++++----
 .../apache/helix/util/InstanceValidationUtil.java  |  3 +-
 .../integration/TestBatchEnableInstances.java      | 15 ++++++
 .../java/org/apache/helix/mock/MockHelixAdmin.java |  7 ++-
 .../helix/rest/server/TestInstancesAccessor.java   |  7 +++
 7 files changed, 89 insertions(+), 19 deletions(-)

diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java
index 41df15005..4f48b2f93 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java
@@ -139,10 +139,10 @@ public class DelayedRebalanceUtil {
     // check the time instance got disabled.
     if (!InstanceValidationUtil.isInstanceEnabled(instanceConfig, clusterConfig)) {
       long disabledTime = instanceConfig.getInstanceEnabledTime();
-      Map<String, String> disabledInstances = clusterConfig.getDisabledInstances();
-      if (disabledInstances.containsKey(instance)) {
+      String batchedDisabledTime = clusterConfig.getInstanceHelixDisabledTimeStamp(instance);
+      if (batchedDisabledTime != null && !batchedDisabledTime.isEmpty()) {
         // Update batch disable time
-        long batchDisableTime = Long.parseLong(clusterConfig.getInstanceHelixDisabledTimeStamp(instance));
+        long batchDisableTime = Long.parseLong(batchedDisabledTime);
         if (disabledTime == -1 || disabledTime > batchDisableTime) {
           disabledTime = batchDisableTime;
         }
diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
index 673536a89..3e5f2da13 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
@@ -1933,17 +1933,24 @@ public class ZKHelixAdmin implements HelixAdmin {
 
         ClusterConfig clusterConfig = new ClusterConfig(currentData);
         Map<String, String> disabledInstances = new TreeMap<>(clusterConfig.getDisabledInstances());
+        Map<String, String> disabledInstancesWithInfo = new TreeMap<>(clusterConfig.getDisabledInstancesWithInfo());
         if (enabled) {
           disabledInstances.keySet().removeAll(instances);
+          disabledInstancesWithInfo.keySet().removeAll(instances);
         } else {
           for (String disabledInstance : instances) {
             // We allow user to override disabledType and reason for an already disabled instance.
+            // TODO: we are updating both DISABLED_INSTANCES and DISABLED_INSTANCES_W_INFO for
+            // backward compatible. Deprecate DISABLED_INSTANCES in the future.
             // TODO: update the history ZNode
-            disabledInstances
-                .put(disabledInstance, assembleInstanceBatchedDisabledInfo(disabledType, reason));
+            String timeStamp = String.valueOf(System.currentTimeMillis());
+            disabledInstances.put(disabledInstance, timeStamp);
+            disabledInstancesWithInfo
+                .put(disabledInstance, assembleInstanceBatchedDisabledInfo(disabledType, reason, timeStamp));
           }
         }
         clusterConfig.setDisabledInstances(disabledInstances);
+        clusterConfig.setDisabledInstancesWithInfo(disabledInstancesWithInfo);
 
         return clusterConfig.getRecord();
       }
@@ -1951,10 +1958,10 @@ public class ZKHelixAdmin implements HelixAdmin {
   }
 
   public static String assembleInstanceBatchedDisabledInfo(
-      InstanceConstants.InstanceDisabledType disabledType, String reason) {
+      InstanceConstants.InstanceDisabledType disabledType, String reason, String timeStamp) {
     Map<String, String> disableInfo = new TreeMap<>();
     disableInfo.put(ClusterConfig.ClusterConfigProperty.HELIX_ENABLED_DISABLE_TIMESTAMP.toString(),
-        String.valueOf(System.currentTimeMillis()));
+        timeStamp);
     if (disabledType != null) {
       disableInfo.put(ClusterConfig.ClusterConfigProperty.HELIX_DISABLED_TYPE.toString(),
           disabledType.toString());
diff --git a/helix-core/src/main/java/org/apache/helix/model/ClusterConfig.java b/helix-core/src/main/java/org/apache/helix/model/ClusterConfig.java
index 485076ee4..829724142 100644
--- a/helix-core/src/main/java/org/apache/helix/model/ClusterConfig.java
+++ b/helix-core/src/main/java/org/apache/helix/model/ClusterConfig.java
@@ -34,11 +34,10 @@ import org.apache.helix.HelixProperty;
 import org.apache.helix.api.config.HelixConfigProperty;
 import org.apache.helix.api.config.StateTransitionThrottleConfig;
 import org.apache.helix.api.config.StateTransitionTimeoutConfig;
+import org.apache.helix.api.config.ViewClusterSourceConfig;
 import org.apache.helix.constants.InstanceConstants;
 import org.apache.helix.util.ConfigStringUtil;
 import org.apache.helix.zookeeper.datamodel.ZNRecord;
-import org.apache.helix.api.config.ViewClusterSourceConfig;
-import org.apache.helix.zookeeper.datamodel.ZNRecord;
 
 /**
  * Cluster configurations
@@ -86,6 +85,9 @@ public class ClusterConfig extends HelixProperty {
     // partitons that need recovery or in
     // error exceeds this limitation
     DISABLED_INSTANCES,
+    DISABLED_INSTANCES_WITH_INFO,
+    // disabled instances and disabled instances with info are for storing batch disabled instances.
+    // disabled instances will write into both 2 fields for backward compatibility.
 
     VIEW_CLUSTER, // Set to "true" to indicate this is a view cluster
     VIEW_CLUSTER_SOURCES, // Map field, key is the name of source cluster, value is
@@ -772,12 +774,32 @@ public class ClusterConfig extends HelixProperty {
     _record.setMapField(ClusterConfigProperty.DISABLED_INSTANCES.name(), disabledInstances);
   }
 
+  /**
+   * Set the disabled instance list with concatenated Info
+   */
+  public void setDisabledInstancesWithInfo(Map<String, String> disabledInstancesWithInfo) {
+    _record.setMapField(ClusterConfigProperty.DISABLED_INSTANCES_WITH_INFO.name(),
+        disabledInstancesWithInfo);
+  }
+
   /**
    * Get current disabled instance map of <instance, disabledTimeStamp>
    * @return a non-null map of disabled instances in cluster config
    */
   public Map<String, String> getDisabledInstances() {
-    Map<String, String> disabledInstances = _record.getMapField(ClusterConfigProperty.DISABLED_INSTANCES.name());
+    Map<String, String> disabledInstances =
+        _record.getMapField(ClusterConfigProperty.DISABLED_INSTANCES.name());
+    return disabledInstances == null ? Collections.emptyMap() : disabledInstances;
+  }
+
+  /**
+   * Get current disabled instance map of
+   * <instance, disabledReason = "res, disabledType = typ, disabledTimeStamp = time">
+   * @return a non-null map of disabled instances in cluster config
+   */
+  public Map<String, String> getDisabledInstancesWithInfo() {
+    Map<String, String> disabledInstances =
+        _record.getMapField(ClusterConfigProperty.DISABLED_INSTANCES_WITH_INFO.name());
     return disabledInstances == null ? Collections.emptyMap() : disabledInstances;
   }
 
@@ -1103,7 +1125,6 @@ public class ClusterConfig extends HelixProperty {
     }
     return idealStateRuleMap;
   }
-
   @Override
   public int hashCode() {
     return getId().hashCode();
@@ -1118,26 +1139,40 @@ public class ClusterConfig extends HelixProperty {
   }
 
   public String getPlainInstanceHelixDisabledType(String instanceName) {
-    return ConfigStringUtil.parseConcatenatedConfig(getDisabledInstances().get(instanceName))
+    return ConfigStringUtil.parseConcatenatedConfig(getDisabledInstancesWithInfo().get(instanceName))
         .get(ClusterConfigProperty.HELIX_DISABLED_TYPE.toString());
   }
 
   public String getInstanceHelixDisabledType(String instanceName) {
-    if (!getDisabledInstances().containsKey(instanceName)) {
+    if (!getDisabledInstancesWithInfo().containsKey(instanceName) &&
+        !getDisabledInstances().containsKey(instanceName)) {
       return InstanceConstants.INSTANCE_NOT_DISABLED;
     }
-    return ConfigStringUtil.parseConcatenatedConfig(getDisabledInstances().get(instanceName))
+    return ConfigStringUtil.parseConcatenatedConfig(getDisabledInstancesWithInfo().get(instanceName))
         .getOrDefault(ClusterConfigProperty.HELIX_DISABLED_TYPE.toString(),
             InstanceConstants.InstanceDisabledType.DEFAULT_INSTANCE_DISABLE_TYPE.toString());
   }
 
+  /**
+   * @return a String representing reason.
+   * null if instance is not disabled in batch mode or do not have disabled reason
+   */
   public String getInstanceHelixDisabledReason(String instanceName) {
-    return ConfigStringUtil.parseConcatenatedConfig(getDisabledInstances().get(instanceName))
+    return ConfigStringUtil.parseConcatenatedConfig(getDisabledInstancesWithInfo().get(instanceName))
         .get(ClusterConfigProperty.HELIX_DISABLED_REASON.toString());
   }
 
+  /**
+   * @param instanceName
+   * @return a String representation of unix time
+   * null if the instance is not disabled in batch mode.
+   */
   public String getInstanceHelixDisabledTimeStamp(String instanceName) {
-    return ConfigStringUtil.parseConcatenatedConfig(getDisabledInstances().get(instanceName))
-        .get(ClusterConfigProperty.HELIX_ENABLED_DISABLE_TIMESTAMP.toString());
+    if (getDisabledInstancesWithInfo().containsKey(instanceName)) {
+      return ConfigStringUtil
+          .parseConcatenatedConfig(getDisabledInstancesWithInfo().get(instanceName))
+          .get(ClusterConfigProperty.HELIX_ENABLED_DISABLE_TIMESTAMP.toString());
+    }
+    return getDisabledInstances().get(instanceName);
   }
 }
diff --git a/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java b/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java
index 6f17d1c49..fa23372f9 100644
--- a/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java
+++ b/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java
@@ -129,7 +129,8 @@ public class InstanceValidationUtil {
       return enabledInInstanceConfig;
     }
     boolean enabledInClusterConfig =
-        !clusterConfig.getDisabledInstances().containsKey(instanceConfig.getInstanceName());
+        !clusterConfig.getDisabledInstances().containsKey(instanceConfig.getInstanceName())
+        && !clusterConfig.getDisabledInstancesWithInfo().containsKey(instanceConfig.getInstanceName());
     return enabledInClusterConfig && enabledInInstanceConfig;
   }
 
diff --git a/helix-core/src/test/java/org/apache/helix/integration/TestBatchEnableInstances.java b/helix-core/src/test/java/org/apache/helix/integration/TestBatchEnableInstances.java
index 6dfd0037f..6e07a373c 100644
--- a/helix-core/src/test/java/org/apache/helix/integration/TestBatchEnableInstances.java
+++ b/helix-core/src/test/java/org/apache/helix/integration/TestBatchEnableInstances.java
@@ -61,6 +61,12 @@ public class TestBatchEnableInstances extends TaskTestBase {
     for (Map<String, String> stateMap : externalView.getRecord().getMapFields().values()) {
       Assert.assertTrue(!stateMap.keySet().contains(_participants[0].getInstanceName()));
     }
+    HelixDataAccessor dataAccessor =
+        new ZKHelixDataAccessor(CLUSTER_NAME, new ZkBaseDataAccessor<>(_gZkClient));
+    ClusterConfig clusterConfig = dataAccessor.getProperty(dataAccessor.keyBuilder().clusterConfig());
+    Assert.assertEquals(Long.parseLong(
+        clusterConfig.getInstanceHelixDisabledTimeStamp(_participants[0].getInstanceName())),
+        Long.parseLong(clusterConfig.getDisabledInstances().get(_participants[0].getInstanceName())));
     _gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME,
         _participants[0].getInstanceName(), true);
   }
@@ -79,9 +85,18 @@ public class TestBatchEnableInstances extends TaskTestBase {
       Assert.assertTrue(!stateMap.keySet().contains(_participants[0].getInstanceName()));
       Assert.assertTrue(!stateMap.keySet().contains(_participants[1].getInstanceName()));
     }
+    HelixDataAccessor dataAccessor =
+        new ZKHelixDataAccessor(CLUSTER_NAME, new ZkBaseDataAccessor<>(_gZkClient));
+    ClusterConfig clusterConfig = dataAccessor.getProperty(dataAccessor.keyBuilder().clusterConfig());
+    Assert.assertEquals(Long.parseLong(
+        clusterConfig.getInstanceHelixDisabledTimeStamp(_participants[1].getInstanceName())),
+        Long.parseLong(clusterConfig.getDisabledInstances().get(_participants[1].getInstanceName())));
     _gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME,
         Arrays.asList(_participants[0].getInstanceName(), _participants[1].getInstanceName()),
         true);
+    Assert.assertEquals(Long.parseLong(
+        clusterConfig.getInstanceHelixDisabledTimeStamp(_participants[0].getInstanceName())),
+        Long.parseLong(clusterConfig.getDisabledInstances().get(_participants[0].getInstanceName())));
   }
 
   @Test
diff --git a/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java b/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java
index 87231d5d7..4063a58ed 100644
--- a/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java
+++ b/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java
@@ -319,18 +319,23 @@ public class MockHelixAdmin implements HelixAdmin {
     ClusterConfig clusterConfig = new ClusterConfig(record);
 
     Map<String, String> disabledInstances = new TreeMap<>();
+    Map<String, String> disabledInstancesWithInfo = new TreeMap<>();
     if (clusterConfig.getDisabledInstances() != null) {
       disabledInstances.putAll(clusterConfig.getDisabledInstances());
+      disabledInstancesWithInfo.putAll(clusterConfig.getDisabledInstancesWithInfo());
     }
     if (enabled) {
       disabledInstances.keySet().removeAll(instances);
     } else {
       for (String disabledInstance : instances) {
+        String timeStamp = String.valueOf(System.currentTimeMillis());
+        disabledInstances.put(disabledInstance, timeStamp);
         disabledInstances
-            .put(disabledInstance, assembleInstanceBatchedDisabledInfo(disabledType, reason));
+            .put(disabledInstance, assembleInstanceBatchedDisabledInfo(disabledType, reason, timeStamp));
       }
     }
     clusterConfig.setDisabledInstances(disabledInstances);
+    clusterConfig.setDisabledInstancesWithInfo(disabledInstancesWithInfo);
   }
 
   @Override
diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java b/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java
index 7e9058ff8..87859ffcc 100644
--- a/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java
+++ b/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java
@@ -154,6 +154,8 @@ public class TestInstancesAccessor extends AbstractTestClass {
     ClusterConfig clusterConfig = _configAccessor.getClusterConfig(CLUSTER_NAME);
     Assert.assertEquals(clusterConfig.getDisabledInstances().keySet(),
         new HashSet<>(instancesToDisable));
+    Assert.assertEquals(clusterConfig.getDisabledInstancesWithInfo().keySet(),
+        new HashSet<>(instancesToDisable));
     Assert
         .assertEquals(clusterConfig.getInstanceHelixDisabledType(CLUSTER_NAME + "localhost_12918"),
             "USER_OPERATION");
@@ -171,6 +173,11 @@ public class TestInstancesAccessor extends AbstractTestClass {
     clusterConfig = _configAccessor.getClusterConfig(CLUSTER_NAME);
     Assert.assertEquals(clusterConfig.getDisabledInstances().keySet(),
         new HashSet<>(Arrays.asList(CLUSTER_NAME + "localhost_12919")));
+    Assert.assertEquals(clusterConfig.getDisabledInstancesWithInfo().keySet(),
+        new HashSet<>(Arrays.asList(CLUSTER_NAME + "localhost_12919")));
+    Assert.assertEquals(Long.parseLong(
+        clusterConfig.getInstanceHelixDisabledTimeStamp(CLUSTER_NAME + "localhost_12919")),
+        Long.parseLong(clusterConfig.getDisabledInstances().get(CLUSTER_NAME + "localhost_12919")));
     Assert
         .assertEquals(clusterConfig.getInstanceHelixDisabledType(CLUSTER_NAME + "localhost_12918"),
             "INSTANCE_NOT_DISABLED");