You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by jx...@apache.org on 2018/01/25 21:49:28 UTC
[37/50] [abbrv] helix git commit: Improve Helix maintenance mode
Improve Helix maintenance mode
1. Remove the exception in best possible stage to let pipeline pass.
2. Add event generation for maintenance mode change.
Project: http://git-wip-us.apache.org/repos/asf/helix/repo
Commit: http://git-wip-us.apache.org/repos/asf/helix/commit/89089b45
Tree: http://git-wip-us.apache.org/repos/asf/helix/tree/89089b45
Diff: http://git-wip-us.apache.org/repos/asf/helix/diff/89089b45
Branch: refs/heads/master
Commit: 89089b4523e91e356a87f5ad151ee9432b574cf8
Parents: ec7eaaa
Author: Junkai Xue <jx...@linkedin.com>
Authored: Fri Dec 15 11:43:07 2017 -0800
Committer: Junkai Xue <jx...@linkedin.com>
Committed: Wed Jan 24 18:32:46 2018 -0800
----------------------------------------------------------------------
.../controller/GenericHelixController.java | 47 +++++++++++++-------
.../stages/BestPossibleStateCalcStage.java | 8 ++--
2 files changed, 35 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/helix/blob/89089b45/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java b/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
index 6d1af7c..2546bd2 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
@@ -73,6 +73,7 @@ import org.apache.helix.model.CurrentState;
import org.apache.helix.model.IdealState;
import org.apache.helix.model.InstanceConfig;
import org.apache.helix.model.LiveInstance;
+import org.apache.helix.model.MaintenanceSignal;
import org.apache.helix.model.Message;
import org.apache.helix.model.PauseSignal;
import org.apache.helix.model.ResourceConfig;
@@ -128,6 +129,7 @@ public class GenericHelixController implements IdealStateChangeListener,
* will be no-op. Other event handling logic keeps the same when the flag is set.
*/
private boolean _paused;
+ private boolean _inMaintenanceMode;
/**
* The timer that can periodically run the rebalancing pipeline. The timer will start if there is
@@ -632,23 +634,10 @@ public class GenericHelixController implements IdealStateChangeListener,
}
PauseSignal pauseSignal = accessor.getProperty(keyBuilder.pause());
- if (pauseSignal != null) {
- if (!_paused) {
- _paused = true;
- logger.info("controller is now paused");
- }
- } else {
- if (_paused) {
- _paused = false;
- logger.info("controller is now resumed");
- ClusterEvent event = new ClusterEvent(_clusterName, ClusterEventType.Resume);
- event.addAttribute(AttributeName.changeContext.name(), changeContext);
- event.addAttribute(AttributeName.helixmanager.name(), changeContext.getManager());
- event.addAttribute(AttributeName.eventData.name(), pauseSignal);
- _eventQueue.put(event);
- _taskEventQueue.put(event.clone());
- }
- }
+ MaintenanceSignal maintenanceSignal = accessor.getProperty(keyBuilder.maintenance());
+ _paused = updateControllerState(changeContext, pauseSignal, _paused);
+ _inMaintenanceMode = updateControllerState(changeContext, maintenanceSignal, _inMaintenanceMode);
+
synchronized (this) {
if (_clusterStatusMonitor == null) {
_clusterStatusMonitor = new ClusterStatusMonitor(changeContext.getManager().getClusterName());
@@ -758,6 +747,30 @@ public class GenericHelixController implements IdealStateChangeListener,
}
}
+ private boolean updateControllerState(NotificationContext changeContext, PauseSignal signal,
+ boolean statusFlag) {
+ if (signal != null) {
+ // This logic is used for recording first time entering PAUSE/MAINTENCE mode
+ if (!statusFlag) {
+ statusFlag = true;
+ logger.info(String.format("controller is now %s",
+ (signal instanceof MaintenanceSignal) ? "in maintenance mode" : "paused"));
+ }
+ } else {
+ if (statusFlag) {
+ statusFlag = false;
+ logger.info("controller is now resumed from paused state");
+ ClusterEvent event = new ClusterEvent(_clusterName, ClusterEventType.Resume);
+ event.addAttribute(AttributeName.changeContext.name(), changeContext);
+ event.addAttribute(AttributeName.helixmanager.name(), changeContext.getManager());
+ event.addAttribute(AttributeName.eventData.name(), signal);
+ _eventQueue.put(event);
+ _taskEventQueue.put(event.clone());
+ }
+ }
+ return statusFlag;
+ }
+
// TODO: refactor this to use common/ClusterEventProcessor.
private class ClusterEventProcessor extends Thread {
http://git-wip-us.apache.org/repos/asf/helix/blob/89089b45/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
index e96f0f3..9566f2c 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
@@ -180,15 +180,17 @@ public class BestPossibleStateCalcStage extends AbstractBaseStage {
"Offline Instances count %d greater than allowed count %d. Stop rebalance pipeline and pause the cluster %s",
offlineCount, maxOfflineInstancesAllowed, cache.getClusterName());
if (manager != null) {
- manager.getClusterManagmentTool()
- .enableMaintenanceMode(manager.getClusterName(), true, errMsg);
+ if (manager.getHelixDataAccessor()
+ .getProperty(manager.getHelixDataAccessor().keyBuilder().maintenance()) == null) {
+ manager.getClusterManagmentTool()
+ .enableMaintenanceMode(manager.getClusterName(), true, errMsg);
+ }
} else {
logger.error("Failed to pause cluster, HelixManager is not set!");
}
if (!cache.isTaskCache()) {
updateRebalanceStatus(true, manager, cache, clusterStatusMonitor, errMsg);
}
- throw new HelixException(errMsg);
}
}
}