You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by jx...@apache.org on 2018/01/25 21:49:28 UTC

[37/50] [abbrv] helix git commit: Improve Helix maintenance mode

Improve Helix maintenance mode

1. Remove the exception in best possible stage to let pipeline pass.
2. Add event generation for maintenance mode change.


Project: http://git-wip-us.apache.org/repos/asf/helix/repo
Commit: http://git-wip-us.apache.org/repos/asf/helix/commit/89089b45
Tree: http://git-wip-us.apache.org/repos/asf/helix/tree/89089b45
Diff: http://git-wip-us.apache.org/repos/asf/helix/diff/89089b45

Branch: refs/heads/master
Commit: 89089b4523e91e356a87f5ad151ee9432b574cf8
Parents: ec7eaaa
Author: Junkai Xue <jx...@linkedin.com>
Authored: Fri Dec 15 11:43:07 2017 -0800
Committer: Junkai Xue <jx...@linkedin.com>
Committed: Wed Jan 24 18:32:46 2018 -0800

----------------------------------------------------------------------
 .../controller/GenericHelixController.java      | 47 +++++++++++++-------
 .../stages/BestPossibleStateCalcStage.java      |  8 ++--
 2 files changed, 35 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/helix/blob/89089b45/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java b/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
index 6d1af7c..2546bd2 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
@@ -73,6 +73,7 @@ import org.apache.helix.model.CurrentState;
 import org.apache.helix.model.IdealState;
 import org.apache.helix.model.InstanceConfig;
 import org.apache.helix.model.LiveInstance;
+import org.apache.helix.model.MaintenanceSignal;
 import org.apache.helix.model.Message;
 import org.apache.helix.model.PauseSignal;
 import org.apache.helix.model.ResourceConfig;
@@ -128,6 +129,7 @@ public class GenericHelixController implements IdealStateChangeListener,
    * will be no-op. Other event handling logic keeps the same when the flag is set.
    */
   private boolean _paused;
+  private boolean _inMaintenanceMode;
 
   /**
    * The timer that can periodically run the rebalancing pipeline. The timer will start if there is
@@ -632,23 +634,10 @@ public class GenericHelixController implements IdealStateChangeListener,
     }
 
     PauseSignal pauseSignal = accessor.getProperty(keyBuilder.pause());
-    if (pauseSignal != null) {
-      if (!_paused) {
-        _paused = true;
-        logger.info("controller is now paused");
-      }
-    } else {
-      if (_paused) {
-        _paused = false;
-        logger.info("controller is now resumed");
-        ClusterEvent event = new ClusterEvent(_clusterName, ClusterEventType.Resume);
-        event.addAttribute(AttributeName.changeContext.name(), changeContext);
-        event.addAttribute(AttributeName.helixmanager.name(), changeContext.getManager());
-        event.addAttribute(AttributeName.eventData.name(), pauseSignal);
-        _eventQueue.put(event);
-        _taskEventQueue.put(event.clone());
-      }
-    }
+    MaintenanceSignal maintenanceSignal = accessor.getProperty(keyBuilder.maintenance());
+    _paused = updateControllerState(changeContext, pauseSignal, _paused);
+    _inMaintenanceMode = updateControllerState(changeContext, maintenanceSignal, _inMaintenanceMode);
+
     synchronized (this) {
       if (_clusterStatusMonitor == null) {
         _clusterStatusMonitor = new ClusterStatusMonitor(changeContext.getManager().getClusterName());
@@ -758,6 +747,30 @@ public class GenericHelixController implements IdealStateChangeListener,
     }
   }
 
+  private boolean updateControllerState(NotificationContext changeContext, PauseSignal signal,
+      boolean statusFlag) {
+    if (signal != null) {
+      // This logic is used for recording first time entering PAUSE/MAINTENCE mode
+      if (!statusFlag) {
+        statusFlag = true;
+        logger.info(String.format("controller is now %s",
+            (signal instanceof MaintenanceSignal) ? "in maintenance mode" : "paused"));
+      }
+    } else {
+      if (statusFlag) {
+        statusFlag = false;
+        logger.info("controller is now resumed from paused state");
+        ClusterEvent event = new ClusterEvent(_clusterName, ClusterEventType.Resume);
+        event.addAttribute(AttributeName.changeContext.name(), changeContext);
+        event.addAttribute(AttributeName.helixmanager.name(), changeContext.getManager());
+        event.addAttribute(AttributeName.eventData.name(), signal);
+        _eventQueue.put(event);
+        _taskEventQueue.put(event.clone());
+      }
+    }
+    return statusFlag;
+  }
+
 
   // TODO: refactor this to use common/ClusterEventProcessor.
   private class ClusterEventProcessor extends Thread {

http://git-wip-us.apache.org/repos/asf/helix/blob/89089b45/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
index e96f0f3..9566f2c 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
@@ -180,15 +180,17 @@ public class BestPossibleStateCalcStage extends AbstractBaseStage {
             "Offline Instances count %d greater than allowed count %d. Stop rebalance pipeline and pause the cluster %s",
             offlineCount, maxOfflineInstancesAllowed, cache.getClusterName());
         if (manager != null) {
-          manager.getClusterManagmentTool()
-              .enableMaintenanceMode(manager.getClusterName(), true, errMsg);
+          if (manager.getHelixDataAccessor()
+              .getProperty(manager.getHelixDataAccessor().keyBuilder().maintenance()) == null) {
+            manager.getClusterManagmentTool()
+                .enableMaintenanceMode(manager.getClusterName(), true, errMsg);
+          }
         } else {
           logger.error("Failed to pause cluster, HelixManager is not set!");
         }
         if (!cache.isTaskCache()) {
           updateRebalanceStatus(true, manager, cache, clusterStatusMonitor, errMsg);
         }
-        throw new HelixException(errMsg);
       }
     }
   }