You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by jx...@apache.org on 2018/01/25 21:49:03 UTC

[12/50] [abbrv] helix git commit: Cluster Maintenance Mode feature support

Cluster Maintenance Mode feature support

Helix does not have a state that keep original partitions are active status without doing rebalance partition placement.
The only state controller can switch to is paused state. If the controller has been paused, all the replicas in this cluster will not be active anymore.
It is better to have another mode that let current replicas functioning well without new replicas bootstraps when the cluster is full or instance.
There are several scenarios that may need such mode that keep original
assignment of partitions without partition movement. At same time, no partition will be assigned for newly added resources. This mode is
call cluster maintenance mode.

For more detail, please refer: https://iwww.corp.linkedin.com/wiki/cf/display/ENGS/Cluster+Maintenance+Mode+Design


Project: http://git-wip-us.apache.org/repos/asf/helix/repo
Commit: http://git-wip-us.apache.org/repos/asf/helix/commit/a7477c3b
Tree: http://git-wip-us.apache.org/repos/asf/helix/tree/a7477c3b
Diff: http://git-wip-us.apache.org/repos/asf/helix/diff/a7477c3b

Branch: refs/heads/master
Commit: a7477c3bbc85059b2e522f5caa214c33eb4c3e15
Parents: bd9f7a4
Author: Junkai Xue <jx...@linkedin.com>
Authored: Thu Nov 2 15:00:24 2017 -0700
Committer: Junkai Xue <jx...@linkedin.com>
Committed: Wed Jan 24 18:31:11 2018 -0800

----------------------------------------------------------------------
 .../main/java/org/apache/helix/HelixAdmin.java  | 15 ++++
 .../org/apache/helix/HelixDataAccessor.java     |  2 +
 .../main/java/org/apache/helix/PropertyKey.java |  9 +++
 .../org/apache/helix/PropertyPathBuilder.java   | 20 +++--
 .../java/org/apache/helix/PropertyType.java     |  1 +
 .../rebalancer/AbstractRebalancer.java          | 35 ++++++++-
 .../rebalancer/DelayedAutoRebalancer.java       | 34 --------
 .../rebalancer/MaintenanceRebalancer.java       | 43 +++++++++++
 .../stages/BestPossibleStateCalcStage.java      | 17 ++--
 .../controller/stages/ClusterDataCache.java     |  9 +++
 .../apache/helix/manager/zk/ZKHelixAdmin.java   | 29 ++++++-
 .../helix/manager/zk/ZKHelixDataAccessor.java   |  8 ++
 .../apache/helix/model/MaintenanceSignal.java   | 13 ++++
 .../java/org/apache/helix/MockAccessor.java     |  5 ++
 .../controller/TestClusterMaintenanceMode.java  | 81 ++++++++++++++++++++
 .../org/apache/helix/mock/MockHelixAdmin.java   |  8 ++
 16 files changed, 277 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/HelixAdmin.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/HelixAdmin.java b/helix-core/src/main/java/org/apache/helix/HelixAdmin.java
index 652ab7a..9562a0b 100644
--- a/helix-core/src/main/java/org/apache/helix/HelixAdmin.java
+++ b/helix-core/src/main/java/org/apache/helix/HelixAdmin.java
@@ -259,6 +259,21 @@ public interface HelixAdmin {
   void enableCluster(String clusterName, boolean enabled, String reason);
 
   /**
+   * Enable or disable maintenance mode for a cluster
+   * @param clusterName
+   * @param enabled
+   */
+  void enableMaintenanceMode(String clusterName, boolean enabled);
+
+  /**
+   * Enable or disable maintenance mode for a cluster
+   * @param clusterName
+   * @param enabled
+   * @param reason
+   */
+  void enableMaintenanceMode(String clusterName, boolean enabled, String reason);
+
+  /**
    * Reset a list of partitions in error state for an instance
    * The partitions are assume to be in error state and reset will bring them from error
    * to initial state. An error to initial state transition is required for reset.

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java b/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java
index 5c2baec..f8ae131 100644
--- a/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java
+++ b/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java
@@ -24,6 +24,7 @@ import java.util.Map;
 
 import org.I0Itec.zkclient.DataUpdater;
 import org.apache.helix.model.LiveInstance;
+import org.apache.helix.model.MaintenanceSignal;
 import org.apache.helix.model.Message;
 import org.apache.helix.model.PauseSignal;
 import org.apache.helix.model.StateModelDefinition;
@@ -38,6 +39,7 @@ public interface HelixDataAccessor {
   boolean createControllerMessage(Message message);
   boolean createControllerLeader(LiveInstance leader);
   boolean createPause(PauseSignal pauseSignal);
+  boolean createMaintenance(MaintenanceSignal maintenanceSignal);
 
   /**
    * Set a property, overwrite if it exists and creates if not exists. This api

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/PropertyKey.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/PropertyKey.java b/helix-core/src/main/java/org/apache/helix/PropertyKey.java
index f2d1e7b..4e58a89 100644
--- a/helix-core/src/main/java/org/apache/helix/PropertyKey.java
+++ b/helix-core/src/main/java/org/apache/helix/PropertyKey.java
@@ -32,6 +32,7 @@ import org.apache.helix.model.IdealState;
 import org.apache.helix.model.InstanceConfig;
 import org.apache.helix.model.LeaderHistory;
 import org.apache.helix.model.LiveInstance;
+import org.apache.helix.model.MaintenanceSignal;
 import org.apache.helix.model.Message;
 import org.apache.helix.model.ParticipantHistory;
 import org.apache.helix.model.PauseSignal;
@@ -674,6 +675,14 @@ public class PropertyKey {
     }
 
     /**
+     * Get a property key associated with {@link MaintenanceSignal}
+     * @return {@link PropertyKey}
+     */
+    public PropertyKey maintenance() {
+      return new PropertyKey(MAINTENANCE, MaintenanceSignal.class, _clusterName);
+    }
+
+    /**
      * Get a property key associated with a {@link HealthStat} for an instance
      * @param instanceName
      * @param id identifies the statistics

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java b/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java
index a61519a..cfff5bb 100644
--- a/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java
+++ b/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java
@@ -19,17 +19,6 @@ package org.apache.helix;
  * under the License.
  */
 
-import static org.apache.helix.PropertyType.CONFIGS;
-import static org.apache.helix.PropertyType.CURRENTSTATES;
-import static org.apache.helix.PropertyType.EXTERNALVIEW;
-import static org.apache.helix.PropertyType.HISTORY;
-import static org.apache.helix.PropertyType.IDEALSTATES;
-import static org.apache.helix.PropertyType.LIVEINSTANCES;
-import static org.apache.helix.PropertyType.MESSAGES;
-import static org.apache.helix.PropertyType.PAUSE;
-import static org.apache.helix.PropertyType.STATEMODELDEFS;
-import static org.apache.helix.PropertyType.STATUSUPDATES;
-
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
@@ -42,6 +31,7 @@ import org.apache.helix.model.IdealState;
 import org.apache.helix.model.InstanceConfig;
 import org.apache.helix.model.LeaderHistory;
 import org.apache.helix.model.LiveInstance;
+import org.apache.helix.model.MaintenanceSignal;
 import org.apache.helix.model.Message;
 import org.apache.helix.model.PauseSignal;
 import org.apache.helix.model.StateModelDefinition;
@@ -49,6 +39,8 @@ import org.apache.helix.model.StatusUpdate;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.helix.PropertyType.*;
+
 /**
  * Utility mapping properties to their Zookeeper locations
  */
@@ -70,6 +62,7 @@ public class PropertyPathBuilder {
     typeToClassMapping.put(STATUSUPDATES, StatusUpdate.class);
     typeToClassMapping.put(HISTORY, LeaderHistory.class);
     typeToClassMapping.put(PAUSE, PauseSignal.class);
+    typeToClassMapping.put(MAINTENANCE, MaintenanceSignal.class);
 
     // @formatter:off
     addEntry(PropertyType.CONFIGS, 1, "/{clusterName}/CONFIGS");
@@ -132,6 +125,7 @@ public class PropertyPathBuilder {
     addEntry(PropertyType.LEADER, 1, "/{clusterName}/CONTROLLER/LEADER");
     addEntry(PropertyType.HISTORY, 1, "/{clusterName}/CONTROLLER/HISTORY");
     addEntry(PropertyType.PAUSE, 1, "/{clusterName}/CONTROLLER/PAUSE");
+    addEntry(PropertyType.MAINTENANCE, 1, "/{clusterName}/CONTROLLER/MAINTENANCE");
     // @formatter:on
 
   }
@@ -354,4 +348,8 @@ public class PropertyPathBuilder {
   public static String pause(String clusterName) {
     return String.format("/%s/CONTROLLER/PAUSE", clusterName);
   }
+
+  public static String maintenance(String clusterName) {
+    return String.format("/%s/CONTROLLER/MAINTENANCE", clusterName);
+  }
 }

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/PropertyType.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/PropertyType.java b/helix-core/src/main/java/org/apache/helix/PropertyType.java
index b522014..73da39e 100644
--- a/helix-core/src/main/java/org/apache/helix/PropertyType.java
+++ b/helix-core/src/main/java/org/apache/helix/PropertyType.java
@@ -58,6 +58,7 @@ public enum PropertyType {
   LEADER(Type.CONTROLLER, false, false, true, true),
   HISTORY(Type.CONTROLLER, true, true, true),
   PAUSE(Type.CONTROLLER, true, false, true),
+  MAINTENANCE(Type.CONTROLLER, true, false, true),
   MESSAGES_CONTROLLER(Type.CONTROLLER, true, false, true),
   STATUSUPDATES_CONTROLLER(Type.CONTROLLER, true, true, true),
   ERRORS_CONTROLLER(Type.CONTROLLER, true, true, true);

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java
index b1888d1..5d77eb3 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java
@@ -314,7 +314,7 @@ public abstract class AbstractRebalancer implements Rebalancer, MappingCalculato
    * Sorter for nodes that sorts according to the CurrentState of the partition. There are only two priorities:
    * (1) Top-state and second states have priority 0. (2) Other states(or no state) have priority 1.
    */
-  private static class TopStatePreferenceListComparator implements Comparator<String> {
+  protected static class TopStatePreferenceListComparator implements Comparator<String> {
     protected final Map<String, String> _currentStateMap;
     protected final StateModelDefinition _stateModelDef;
 
@@ -345,4 +345,37 @@ public abstract class AbstractRebalancer implements Rebalancer, MappingCalculato
       return p1 - p2;
     }
   }
+
+  /**
+   * Sorter for nodes that sorts according to the CurrentState of the partition, based on the state priority defined
+   * in the state model definition.
+   * If the CurrentState doesn't exist, treat it as having lowest priority(Integer.MAX_VALUE).
+   */
+  protected static class PreferenceListNodeComparator implements Comparator<String> {
+    protected final Map<String, String> _currentStateMap;
+    protected final StateModelDefinition _stateModelDef;
+
+    public PreferenceListNodeComparator(Map<String, String> currentStateMap, StateModelDefinition stateModelDef) {
+      _currentStateMap = currentStateMap;
+      _stateModelDef = stateModelDef;
+    }
+
+    @Override
+    public int compare(String ins1, String ins2) {
+      Integer p1 = Integer.MAX_VALUE;
+      Integer p2 = Integer.MAX_VALUE;
+
+      Map<String, Integer> statesPriorityMap = _stateModelDef.getStatePriorityMap();
+      String state1 = _currentStateMap.get(ins1);
+      String state2 = _currentStateMap.get(ins2);
+      if (state1 != null && statesPriorityMap.containsKey(state1)) {
+        p1 = statesPriorityMap.get(state1);
+      }
+      if (state2 != null && statesPriorityMap.containsKey(state2)) {
+        p2 = statesPriorityMap.get(state2);
+      }
+
+      return p1.compareTo(p2);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java
index adac235..5ebb57d 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java
@@ -529,38 +529,4 @@ public class DelayedAutoRebalancer extends AbstractRebalancer {
 
     return true;
   }
-
-
-  /**
-   * Sorter for nodes that sorts according to the CurrentState of the partition, based on the state priority defined
-   * in the state model definition.
-   * If the CurrentState doesn't exist, treat it as having lowest priority(Integer.MAX_VALUE).
-   */
-  private static class PreferenceListNodeComparator implements Comparator<String> {
-    protected final Map<String, String> _currentStateMap;
-    protected final StateModelDefinition _stateModelDef;
-
-    public PreferenceListNodeComparator(Map<String, String> currentStateMap, StateModelDefinition stateModelDef) {
-      _currentStateMap = currentStateMap;
-      _stateModelDef = stateModelDef;
-    }
-
-    @Override
-    public int compare(String ins1, String ins2) {
-      Integer p1 = Integer.MAX_VALUE;
-      Integer p2 = Integer.MAX_VALUE;
-
-      Map<String, Integer> statesPriorityMap = _stateModelDef.getStatePriorityMap();
-      String state1 = _currentStateMap.get(ins1);
-      String state2 = _currentStateMap.get(ins2);
-      if (state1 != null && statesPriorityMap.containsKey(state1)) {
-        p1 = statesPriorityMap.get(state1);
-      }
-      if (state2 != null && statesPriorityMap.containsKey(state2)) {
-        p2 = statesPriorityMap.get(state2);
-      }
-
-      return p1.compareTo(p2);
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/MaintenanceRebalancer.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/MaintenanceRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/MaintenanceRebalancer.java
new file mode 100644
index 0000000..d324659
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/MaintenanceRebalancer.java
@@ -0,0 +1,43 @@
+package org.apache.helix.controller.rebalancer;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import org.apache.helix.controller.stages.ClusterDataCache;
+import org.apache.helix.controller.stages.CurrentStateOutput;
+import org.apache.helix.model.IdealState;
+import org.apache.helix.model.Partition;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class MaintenanceRebalancer extends SemiAutoRebalancer {
+  private static final Logger LOG = LoggerFactory.getLogger(MaintenanceRebalancer.class);
+
+  @Override
+  public IdealState computeNewIdealState(String resourceName, IdealState currentIdealState,
+      CurrentStateOutput currentStateOutput, ClusterDataCache clusterData) {
+    LOG.info(String
+        .format("Start computing ideal state for resource %s in maintenance mode.", resourceName));
+    Map<Partition, Map<String, String>> currentStateMap =
+        currentStateOutput.getCurrentStateMap(resourceName);
+    if (currentStateMap == null || currentStateMap.size() == 0) {
+      LOG.warn(String
+          .format("No new partition will be assigned for %s in maintenance mode", resourceName));
+      currentIdealState.setPreferenceLists(Collections.EMPTY_MAP);
+      return currentIdealState;
+    }
+
+    // One principal is to prohibit DROP -> OFFLINE and OFFLINE -> DROP state transitions.
+    // Derived preference list from current state with state priority
+    for (Partition partition : currentStateMap.keySet()) {
+      Map<String, String> stateMap = currentStateMap.get(partition);
+      List<String> preferenceList = new ArrayList<>(stateMap.keySet());
+      Collections.sort(preferenceList, new PreferenceListNodeComparator(stateMap,
+          clusterData.getStateModelDef(currentIdealState.getStateModelDefRef())));
+      currentIdealState.setPreferenceList(partition.getPartitionName(), preferenceList);
+    }
+    LOG.info("End computing ideal state for resource %s in maintenance mode.");
+    return currentIdealState;
+  }
+}

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
index c43b96c..4fb8cd7 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
@@ -26,6 +26,7 @@ import org.apache.helix.controller.pipeline.AbstractBaseStage;
 import org.apache.helix.controller.pipeline.StageException;
 import org.apache.helix.controller.rebalancer.AutoRebalancer;
 import org.apache.helix.controller.rebalancer.CustomRebalancer;
+import org.apache.helix.controller.rebalancer.MaintenanceRebalancer;
 import org.apache.helix.controller.rebalancer.Rebalancer;
 import org.apache.helix.controller.rebalancer.SemiAutoRebalancer;
 import org.apache.helix.controller.rebalancer.internal.MappingCalculator;
@@ -211,7 +212,8 @@ public class BestPossibleStateCalcStage extends AbstractBaseStage {
       idealState.setStateModelDefRef(resource.getStateModelDefRef());
     }
 
-    Rebalancer rebalancer = getRebalancer(idealState, resourceName);
+    Rebalancer rebalancer =
+        getRebalancer(idealState, resourceName, cache.isMaintenanceModeEnabled());
     MappingCalculator mappingCalculator = getMappingCalculator(rebalancer, resourceName);
 
     if (rebalancer == null || mappingCalculator == null) {
@@ -289,7 +291,8 @@ public class BestPossibleStateCalcStage extends AbstractBaseStage {
     }
   }
 
-  private Rebalancer getRebalancer(IdealState idealState, String resourceName) {
+  private Rebalancer getRebalancer(IdealState idealState, String resourceName,
+      boolean isMaintenanceModeEnabled) {
     Rebalancer customizedRebalancer = null;
     String rebalancerClassName = idealState.getRebalancerClassName();
     if (rebalancerClassName != null) {
@@ -305,10 +308,14 @@ public class BestPossibleStateCalcStage extends AbstractBaseStage {
     Rebalancer rebalancer = null;
     switch (idealState.getRebalanceMode()) {
     case FULL_AUTO:
-      if (customizedRebalancer != null) {
-        rebalancer = customizedRebalancer;
+      if (isMaintenanceModeEnabled) {
+        rebalancer = new MaintenanceRebalancer();
       } else {
-        rebalancer = new AutoRebalancer();
+        if (customizedRebalancer != null) {
+          rebalancer = customizedRebalancer;
+        } else {
+          rebalancer = new AutoRebalancer();
+        }
       }
       break;
     case SEMI_AUTO:

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java b/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java
index 1dd862d..5b4aa83 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/stages/ClusterDataCache.java
@@ -45,6 +45,7 @@ import org.apache.helix.model.ExternalView;
 import org.apache.helix.model.IdealState;
 import org.apache.helix.model.InstanceConfig;
 import org.apache.helix.model.LiveInstance;
+import org.apache.helix.model.MaintenanceSignal;
 import org.apache.helix.model.Message;
 import org.apache.helix.model.ParticipantHistory;
 import org.apache.helix.model.ResourceAssignment;
@@ -111,6 +112,7 @@ public class ClusterDataCache {
 
   boolean _updateInstanceOfflineTime = true;
   boolean _isTaskCache;
+  boolean _isMaintenanceModeEnabled;
 
   private String _clusterName;
 
@@ -204,6 +206,9 @@ public class ClusterDataCache {
       LOG.warn("Cluster config is null!");
     }
 
+    MaintenanceSignal maintenanceSignal = accessor.getProperty(keyBuilder.maintenance());
+    _isMaintenanceModeEnabled = (maintenanceSignal != null) ? true : false;
+
     long endTime = System.currentTimeMillis();
     LOG.info(
         "END: ClusterDataCache.refresh() for cluster " + getClusterName() + ", took " + (endTime
@@ -1008,6 +1013,10 @@ public class ClusterDataCache {
     return _isTaskCache;
   }
 
+  public boolean isMaintenanceModeEnabled() {
+    return _isMaintenanceModeEnabled;
+  }
+
   /**
    * toString method to print the entire cluster state
    */

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
index c3fa9e9..1af881e 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
@@ -67,6 +67,7 @@ import org.apache.helix.model.IdealState;
 import org.apache.helix.model.IdealState.RebalanceMode;
 import org.apache.helix.model.InstanceConfig;
 import org.apache.helix.model.LiveInstance;
+import org.apache.helix.model.MaintenanceSignal;
 import org.apache.helix.model.Message;
 import org.apache.helix.model.Message.MessageState;
 import org.apache.helix.model.Message.MessageType;
@@ -326,7 +327,33 @@ public class ZKHelixAdmin implements HelixAdmin {
       if (reason != null) {
         pauseSignal.setReason(reason);
       }
-      accessor.createPause(pauseSignal);
+      if (!accessor.createPause(pauseSignal)) {
+        throw new HelixException("Failed to create pause signal");
+      }
+    }
+  }
+
+  @Override
+  public void enableMaintenanceMode(String clusterName, boolean enabled) {
+    enableMaintenanceMode(clusterName, enabled, null);
+  }
+
+  @Override
+  public void enableMaintenanceMode(String clusterName, boolean enabled, String reason) {
+    HelixDataAccessor accessor =
+        new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_zkClient));
+    Builder keyBuilder = accessor.keyBuilder();
+
+    if (!enabled) {
+      accessor.removeProperty(keyBuilder.maintenance());
+    } else {
+      MaintenanceSignal maintenanceSignal = new MaintenanceSignal("maintenance");
+      if (reason != null) {
+        maintenanceSignal.setReason(reason);
+      }
+      if (!accessor.createMaintenance(maintenanceSignal)) {
+        throw new HelixException("Failed to create maintenance signal");
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixDataAccessor.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixDataAccessor.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixDataAccessor.java
index c5ce121..eff76f8 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixDataAccessor.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixDataAccessor.java
@@ -43,6 +43,7 @@ import org.apache.helix.ZNRecordAssembler;
 import org.apache.helix.ZNRecordBucketizer;
 import org.apache.helix.ZNRecordUpdater;
 import org.apache.helix.model.LiveInstance;
+import org.apache.helix.model.MaintenanceSignal;
 import org.apache.helix.model.Message;
 import org.apache.helix.model.PauseSignal;
 import org.apache.helix.model.StateModelDefinition;
@@ -113,6 +114,13 @@ public class ZKHelixDataAccessor implements HelixDataAccessor {
   }
 
   @Override
+  public boolean createMaintenance(MaintenanceSignal maintenanceSignal) {
+    return _baseDataAccessor
+        .create(PropertyPathBuilder.maintenance(_clusterName), maintenanceSignal.getRecord(),
+            AccessOption.PERSISTENT);
+  }
+
+  @Override
   public <T extends HelixProperty> boolean setProperty(PropertyKey key, T value) {
     PropertyType type = key.getType();
     if (!value.isValid()) {

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/main/java/org/apache/helix/model/MaintenanceSignal.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/model/MaintenanceSignal.java b/helix-core/src/main/java/org/apache/helix/model/MaintenanceSignal.java
new file mode 100644
index 0000000..b678738
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/model/MaintenanceSignal.java
@@ -0,0 +1,13 @@
+package org.apache.helix.model;
+
+import org.apache.helix.ZNRecord;
+
+public class MaintenanceSignal extends PauseSignal {
+  public MaintenanceSignal(String id) {
+    super(id);
+  }
+
+  public MaintenanceSignal(ZNRecord record) {
+    super(record);
+  }
+}

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/test/java/org/apache/helix/MockAccessor.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/MockAccessor.java b/helix-core/src/test/java/org/apache/helix/MockAccessor.java
index d41c2d3..583dbd8 100644
--- a/helix-core/src/test/java/org/apache/helix/MockAccessor.java
+++ b/helix-core/src/test/java/org/apache/helix/MockAccessor.java
@@ -27,6 +27,7 @@ import org.I0Itec.zkclient.DataUpdater;
 import org.I0Itec.zkclient.exception.ZkNoNodeException;
 import org.apache.helix.mock.MockBaseDataAccessor;
 import org.apache.helix.model.LiveInstance;
+import org.apache.helix.model.MaintenanceSignal;
 import org.apache.helix.model.Message;
 import org.apache.helix.model.PauseSignal;
 import org.apache.helix.model.StateModelDefinition;
@@ -67,6 +68,10 @@ public class MockAccessor implements HelixDataAccessor {
     return false;
   }
 
+  @Override public boolean createMaintenance(MaintenanceSignal maintenanceSignal) {
+    return false;
+  }
+
   @Override public boolean setProperty(PropertyKey key, HelixProperty value) {
     String path = key.getPath();
     _baseDataAccessor.set(path, value.getRecord(), AccessOption.PERSISTENT);

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java b/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java
new file mode 100644
index 0000000..3949183
--- /dev/null
+++ b/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java
@@ -0,0 +1,81 @@
+package org.apache.helix.integration.controller;
+
+import java.util.Map;
+import org.apache.helix.integration.manager.MockParticipantManager;
+import org.apache.helix.integration.task.TaskTestBase;
+import org.apache.helix.integration.task.WorkflowGenerator;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.IdealState;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+public class TestClusterMaintenanceMode extends TaskTestBase {
+  @BeforeClass
+  public void beforeClass() throws Exception {
+    _numDbs = 1;
+    _numNodes = 3;
+    _numReplicas = 3;
+    _numParitions = 5;
+    super.beforeClass();
+  }
+
+  @Test
+  public void testMaintenanceModeAddNewInstance() throws InterruptedException {
+    _gSetupTool.getClusterManagementTool().enableMaintenanceMode(CLUSTER_NAME, true, "Test");
+    Thread.sleep(2000);
+    ExternalView prevExternalView = _gSetupTool.getClusterManagementTool()
+        .getResourceExternalView(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB);
+    String instanceName = PARTICIPANT_PREFIX + "_" + (_startPort + 10);
+    _setupTool.addInstanceToCluster(CLUSTER_NAME, instanceName);
+    MockParticipantManager newInstance =
+        new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, instanceName);
+    newInstance.syncStart();
+    _gSetupTool.getClusterManagementTool()
+        .rebalance(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB, 3);
+    Thread.sleep(3000);
+    ExternalView newExternalView = _gSetupTool.getClusterManagementTool()
+        .getResourceExternalView(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB);
+    Assert.assertEquals(prevExternalView.getRecord().getMapFields(),
+        newExternalView.getRecord().getMapFields());
+  }
+
+  @Test (dependsOnMethods = "testMaintenanceModeAddNewInstance")
+  public void testMaintenanceModeAddNewResource() throws InterruptedException {
+    _gSetupTool.getClusterManagementTool()
+        .addResource(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB + 1, 7, "MasterSlave",
+            IdealState.RebalanceMode.FULL_AUTO.name());
+    _gSetupTool.getClusterManagementTool()
+        .rebalance(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB + 1, 3);
+    Thread.sleep(2000);
+    ExternalView externalView = _gSetupTool.getClusterManagementTool()
+        .getResourceExternalView(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB + 1);
+    Assert.assertNull(externalView);
+  }
+
+  @Test (dependsOnMethods = "testMaintenanceModeAddNewResource")
+  public void testMaintenanceModeInstanceDown() throws InterruptedException {
+    _participants[0].syncStop();
+    Thread.sleep(2000);
+    ExternalView externalView = _gSetupTool.getClusterManagementTool()
+        .getResourceExternalView(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB);
+    for (Map<String, String> stateMap : externalView.getRecord().getMapFields().values()) {
+      Assert.assertTrue(stateMap.values().contains("MASTER"));
+    }
+  }
+
+  @Test (dependsOnMethods = "testMaintenanceModeInstanceDown")
+  public void testMaintenanceModeInstanceBack() throws InterruptedException {
+    _participants[0] =
+        new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, _participants[0].getInstanceName());
+    _participants[0].syncStart();
+    Thread.sleep(2000);
+    ExternalView externalView = _gSetupTool.getClusterManagementTool()
+        .getResourceExternalView(CLUSTER_NAME, WorkflowGenerator.DEFAULT_TGT_DB);
+    for (Map<String, String> stateMap : externalView.getRecord().getMapFields().values()) {
+      if (stateMap.containsKey(_participants[0].getInstanceName())) {
+        Assert.assertTrue(stateMap.get(_participants[0].getInstanceName()).equals("SLAVE"));
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/helix/blob/a7477c3b/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java b/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java
index 037d92b..b1d5da7 100644
--- a/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java
+++ b/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java
@@ -253,6 +253,14 @@ public class MockHelixAdmin implements HelixAdmin {
 
   }
 
+  @Override public void enableMaintenanceMode(String clusterName, boolean enabled) {
+
+  }
+
+  @Override public void enableMaintenanceMode(String clusterName, boolean enabled, String reason) {
+
+  }
+
   @Override public void resetPartition(String clusterName, String instanceName, String resourceName,
       List<String> partitionNames) {