You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by jx...@apache.org on 2018/10/29 20:48:05 UTC

[1/2] helix git commit: Add warn log when Helix controller puts a cluster into maintenance mode.

Repository: helix
Updated Branches:
  refs/heads/master 923002e8e -> a6863937c


Add warn log when Helix controller puts a cluster into maintenance mode.


Project: http://git-wip-us.apache.org/repos/asf/helix/repo
Commit: http://git-wip-us.apache.org/repos/asf/helix/commit/ce167f54
Tree: http://git-wip-us.apache.org/repos/asf/helix/tree/ce167f54
Diff: http://git-wip-us.apache.org/repos/asf/helix/diff/ce167f54

Branch: refs/heads/master
Commit: ce167f544ab4e8cfb4229b3ad585f6c725c43771
Parents: 923002e
Author: Lei Xia <lx...@linkedin.com>
Authored: Tue Aug 21 10:36:33 2018 -0700
Committer: Junkai Xue <jx...@linkedin.com>
Committed: Mon Oct 29 13:47:49 2018 -0700

----------------------------------------------------------------------
 .../helix/controller/stages/BestPossibleStateCalcStage.java    | 6 ++++--
 .../helix/controller/stages/IntermediateStateCalcStage.java    | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/helix/blob/ce167f54/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
index 5cc9593..2000bec 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
@@ -160,16 +160,18 @@ public class BestPossibleStateCalcStage extends AbstractBaseStage {
       int offlineCount = cache.getAllInstances().size() - cache.getEnabledLiveInstances().size();
       if (offlineCount > maxOfflineInstancesAllowed) {
         String errMsg = String.format(
-            "Offline Instances count %d greater than allowed count %d. Stop rebalance pipeline and pause the cluster %s",
+            "Offline Instances count %d greater than allowed count %d. Stop rebalance and put the cluster %s into maintenance mode.",
             offlineCount, maxOfflineInstancesAllowed, cache.getClusterName());
         if (manager != null) {
           if (manager.getHelixDataAccessor()
               .getProperty(manager.getHelixDataAccessor().keyBuilder().maintenance()) == null) {
             manager.getClusterManagmentTool()
                 .enableMaintenanceMode(manager.getClusterName(), true, errMsg);
+            LogUtil.logWarn(logger, _eventId, errMsg);
           }
         } else {
-          LogUtil.logError(logger, _eventId, "Failed to pause cluster, HelixManager is not set!");
+          LogUtil.logError(logger, _eventId, "Failed to put cluster " + cache.getClusterName()
+              + " into maintenance mode, HelixManager is not set!");
         }
         if (!cache.isTaskCache()) {
           updateRebalanceStatus(true, manager, cache, clusterStatusMonitor, errMsg);

http://git-wip-us.apache.org/repos/asf/helix/blob/ce167f54/helix-core/src/main/java/org/apache/helix/controller/stages/IntermediateStateCalcStage.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/IntermediateStateCalcStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/IntermediateStateCalcStage.java
index 6d10c6f..425f6fa 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/stages/IntermediateStateCalcStage.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/stages/IntermediateStateCalcStage.java
@@ -199,11 +199,12 @@ public class IntermediateStateCalcStage extends AbstractBaseStage {
             String errMsg = String.format(
                 "Problem: according to this assignment, instance %s contains more "
                     + "replicas/partitions than the maximum number allowed (%d). Pipeline will "
-                    + "stop the rebalance and pause the cluster %s",
+                    + "stop the rebalance and put the cluster %s into maintenance mode",
                 instance, maxPartitionPerInstance, cache.getClusterName());
             if (manager != null) {
               manager.getClusterManagmentTool().enableMaintenanceMode(manager.getClusterName(),
                   true, errMsg);
+              LogUtil.logWarn(logger, _eventId, errMsg);
             } else {
               LogUtil.logError(logger, _eventId,
                   "HelixManager is not set/null! Failed to pause this cluster/enable maintenance"


[2/2] helix git commit: HELIX-1269: improve semantics for BaseDataAccessor.remove()

Posted by jx...@apache.org.
HELIX-1269: improve semantics for BaseDataAccessor.remove()


Project: http://git-wip-us.apache.org/repos/asf/helix/repo
Commit: http://git-wip-us.apache.org/repos/asf/helix/commit/a6863937
Tree: http://git-wip-us.apache.org/repos/asf/helix/tree/a6863937
Diff: http://git-wip-us.apache.org/repos/asf/helix/diff/a6863937

Branch: refs/heads/master
Commit: a6863937c7d404ffdf703d8f2f7a0735b41ea197
Parents: ce167f5
Author: Harry Zhang <hr...@linkedin.com>
Authored: Tue Aug 28 12:18:27 2018 -0700
Committer: Junkai Xue <jx...@linkedin.com>
Committed: Mon Oct 29 13:47:54 2018 -0700

----------------------------------------------------------------------
 .../helix/manager/zk/ZkBaseDataAccessor.java    | 21 ++++++++++++++------
 .../manager/zk/TestZkBaseDataAccessor.java      |  3 ++-
 2 files changed, 17 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/helix/blob/a6863937/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java
index 7d0032e..6811766 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java
@@ -530,18 +530,27 @@ public class ZkBaseDataAccessor<T> implements BaseDataAccessor<T> {
   }
 
   /**
-   * sync remove
+   * Sync remove. it tries to remove the ZNode and all its descendants if any, node does not exist
+   * is regarded as success
    */
   @Override
   public boolean remove(String path, int options) {
     try {
-      // optimize on common path
-      return _zkClient.delete(path);
+      // operation will not throw exception  when path successfully deleted or does not exist
+      // despite real error, operation will throw exception when path not empty, and in this
+      // case, we try to delete recursively
+      _zkClient.delete(path);
     } catch (ZkException e) {
-      LOG.warn(String.format("Caught exception when deleting %s with options %s.", path, options),
-          e);
-      return _zkClient.deleteRecursive(path);
+      LOG.debug("Failed to delete {} with opts {}, err: {}. Try recursive delete", path, options,
+          e.getMessage());
+      try {
+        _zkClient.deleteRecursively(path);
+      } catch (HelixException he) {
+        LOG.error("Failed to delete {} recursively with opts {}.", path, options, he);
+        return false;
+      }
     }
+    return true;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/helix/blob/a6863937/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java
index bee1f9a..671ce80 100644
--- a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java
+++ b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java
@@ -243,8 +243,9 @@ public class TestZkBaseDataAccessor extends ZkUnitTestBase {
     ZNRecord record = new ZNRecord("msg_0");
     ZkBaseDataAccessor<ZNRecord> accessor = new ZkBaseDataAccessor<ZNRecord>(_gZkClient);
 
+    // Base data accessor shall not fail when remove a non-exist path
     boolean success = accessor.remove(path, 0);
-    Assert.assertFalse(success);
+    Assert.assertTrue(success);
 
     success = accessor.create(path, record, AccessOption.PERSISTENT);
     Assert.assertTrue(success);