You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by jx...@apache.org on 2020/04/14 01:58:17 UTC

[helix] 01/01: Fix TestCrushAutoRebalanceNonRack failure of dropping instance

This is an automated email from the ASF dual-hosted git repository.

jxue pushed a commit to branch helix-1.0.0
in repository https://gitbox.apache.org/repos/asf/helix.git

commit 5b8297f4d2a15584443abd89926cb2e3e60af908
Author: Huizhi Lu <ih...@gmail.com>
AuthorDate: Mon Apr 13 13:25:20 2020 -0700

    Fix TestCrushAutoRebalanceNonRack failure of dropping instance
---
 .../org/apache/helix/manager/zk/ZKHelixAdmin.java  | 10 +++---
 .../helix/manager/zk/ZkBaseDataAccessor.java       |  5 +--
 .../TestCrushAutoRebalanceNonRack.java             | 14 +++++++-
 .../helix/manager/zk/TestZkBaseDataAccessor.java   | 20 ++++++++++++
 .../apache/helix/manager/zk/TestZkHelixAdmin.java  | 37 +++++++++++++++++++++-
 5 files changed, 78 insertions(+), 8 deletions(-)

diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
index bc62b18..14f73f9 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
@@ -83,6 +83,7 @@ import org.apache.helix.util.RebalanceUtil;
 import org.apache.helix.zookeeper.api.client.HelixZkClient;
 import org.apache.helix.zookeeper.api.client.RealmAwareZkClient;
 import org.apache.helix.zookeeper.datamodel.ZNRecord;
+import org.apache.helix.zookeeper.exception.ZkClientException;
 import org.apache.helix.zookeeper.impl.client.FederatedZkClient;
 import org.apache.helix.zookeeper.impl.factory.SharedZkClientFactory;
 import org.apache.helix.zookeeper.util.HttpRoutingDataReader;
@@ -225,7 +226,7 @@ public class ZKHelixAdmin implements HelixAdmin {
       try {
         _zkClient.deleteRecursively(instancePath);
         return;
-      } catch (HelixException e) {
+      } catch (ZkClientException e) {
         if (retryCnt < 3 && e.getCause() instanceof ZkException && e.getCause()
             .getCause() instanceof KeeperException.NotEmptyException) {
           // Racing condition with controller's persisting node history, retryable.
@@ -235,9 +236,10 @@ public class ZKHelixAdmin implements HelixAdmin {
               instanceConfig.getInstanceName(), e.getCause().getMessage());
           retryCnt++;
         } else {
-          logger.error("Failed to drop instance {} (not retryable).",
-              instanceConfig.getInstanceName(), e.getCause());
-          throw e;
+          String errorMessage = "Failed to drop instance: " + instanceConfig.getInstanceName()
+              + ". Retry times: " + retryCnt;
+          logger.error(errorMessage, e);
+          throw new HelixException(errorMessage, e);
         }
       }
     }
diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java
index a7596f4..d5ea20b 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java
@@ -40,6 +40,7 @@ import org.apache.helix.util.HelixUtil;
 import org.apache.helix.zookeeper.api.client.HelixZkClient;
 import org.apache.helix.zookeeper.api.client.RealmAwareZkClient;
 import org.apache.helix.zookeeper.datamodel.ZNRecord;
+import org.apache.helix.zookeeper.exception.ZkClientException;
 import org.apache.helix.zookeeper.impl.client.FederatedZkClient;
 import org.apache.helix.zookeeper.impl.factory.DedicatedZkClientFactory;
 import org.apache.helix.zookeeper.impl.factory.SharedZkClientFactory;
@@ -703,8 +704,8 @@ public class ZkBaseDataAccessor<T> implements BaseDataAccessor<T> {
           e.getMessage());
       try {
         _zkClient.deleteRecursively(path);
-      } catch (HelixException he) {
-        LOG.error("Failed to delete {} recursively with opts {}.", path, options, he);
+      } catch (ZkClientException zce) {
+        LOG.error("Failed to delete {} recursively with opts {}.", path, options, zce);
         return false;
       }
     }
diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/CrushRebalancers/TestCrushAutoRebalanceNonRack.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/CrushRebalancers/TestCrushAutoRebalanceNonRack.java
index 5b7d083..f889074 100644
--- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/CrushRebalancers/TestCrushAutoRebalanceNonRack.java
+++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/CrushRebalancers/TestCrushAutoRebalanceNonRack.java
@@ -28,11 +28,15 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.helix.ConfigAccessor;
+import org.apache.helix.HelixDataAccessor;
+import org.apache.helix.InstanceType;
+import org.apache.helix.TestHelper;
 import org.apache.helix.controller.rebalancer.strategy.CrushEdRebalanceStrategy;
 import org.apache.helix.controller.rebalancer.strategy.CrushRebalanceStrategy;
 import org.apache.helix.integration.common.ZkStandAloneCMTestBase;
 import org.apache.helix.integration.manager.ClusterControllerManager;
 import org.apache.helix.integration.manager.MockParticipantManager;
+import org.apache.helix.manager.zk.ZKHelixDataAccessor;
 import org.apache.helix.model.BuiltInStateModelDefinitions;
 import org.apache.helix.model.ClusterConfig;
 import org.apache.helix.model.ExternalView;
@@ -42,6 +46,7 @@ import org.apache.helix.model.InstanceConfig;
 import org.apache.helix.tools.ClusterVerifiers.HelixClusterVerifier;
 import org.apache.helix.tools.ClusterVerifiers.StrictMatchExternalViewVerifier;
 import org.apache.helix.tools.ClusterVerifiers.ZkHelixClusterVerifier;
+import org.apache.helix.util.InstanceValidationUtil;
 import org.testng.Assert;
 import org.testng.annotations.AfterClass;
 import org.testng.annotations.AfterMethod;
@@ -242,13 +247,20 @@ public class TestCrushAutoRebalanceNonRack extends ZkStandAloneCMTestBase {
     enablePersistBestPossibleAssignment(_gZkClient, CLUSTER_NAME, true);
 
     // shutdown participants, keep only two left
+    HelixDataAccessor helixDataAccessor =
+        new ZKHelixDataAccessor(CLUSTER_NAME, InstanceType.PARTICIPANT, _baseAccessor);
     for (int i = 2; i < _participants.size(); i++) {
       MockParticipantManager p = _participants.get(i);
       p.syncStop();
       _gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME, p.getInstanceName(),
           false);
+      Assert.assertTrue(TestHelper.verify(() -> {
+        _gSetupTool.getClusterManagementTool()
+            .enableInstance(CLUSTER_NAME, p.getInstanceName(), false);
+        return !InstanceValidationUtil.isEnabled(helixDataAccessor, p.getInstanceName())
+            && !InstanceValidationUtil.isAlive(helixDataAccessor, p.getInstanceName());
+      }, TestHelper.WAIT_DURATION), "Instance should be disabled and offline");
       _gSetupTool.dropInstanceFromCluster(CLUSTER_NAME, p.getInstanceName());
-
     }
 
     int j = 0;
diff --git a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java
index 3e03125..9e56b6e 100644
--- a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java
+++ b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java
@@ -30,15 +30,19 @@ import org.apache.helix.AccessOption;
 import org.apache.helix.BaseDataAccessor;
 import org.apache.helix.PropertyPathBuilder;
 import org.apache.helix.TestHelper;
+import org.apache.helix.zookeeper.api.client.RealmAwareZkClient;
 import org.apache.helix.zookeeper.datamodel.ZNRecord;
 import org.apache.helix.zookeeper.datamodel.ZNRecordUpdater;
 import org.apache.helix.ZkUnitTestBase;
 import org.apache.helix.manager.zk.ZkBaseDataAccessor.AccessResult;
 import org.apache.helix.manager.zk.ZkBaseDataAccessor.RetCode;
+import org.apache.helix.zookeeper.exception.ZkClientException;
 import org.apache.helix.zookeeper.zkclient.DataUpdater;
+import org.apache.helix.zookeeper.zkclient.exception.ZkException;
 import org.apache.helix.zookeeper.zkclient.exception.ZkMarshallingError;
 import org.apache.helix.zookeeper.zkclient.serialize.ZkSerializer;
 import org.apache.zookeeper.data.Stat;
+import org.mockito.Mockito;
 import org.testng.Assert;
 import org.testng.annotations.AfterMethod;
 import org.testng.annotations.Test;
@@ -343,6 +347,22 @@ public class TestZkBaseDataAccessor extends ZkUnitTestBase {
     Assert.assertNotNull(getRecord);
     Assert.assertEquals(getRecord.getId(), "msg_0");
 
+    // Tests that ZkClientException thrown from ZkClient should be caught
+    // and remove() should return false.
+    RealmAwareZkClient mockZkClient = Mockito.mock(RealmAwareZkClient.class);
+    Mockito.doThrow(new ZkException("Failed to delete " + path)).when(mockZkClient)
+        .delete(path);
+    Mockito.doThrow(new ZkClientException("Failed to recursively delete " + path)).when(mockZkClient)
+        .deleteRecursively(path);
+    ZkBaseDataAccessor<ZNRecord> accessorMock =
+        new ZkBaseDataAccessor<>(mockZkClient);
+    try {
+      Assert.assertFalse(accessorMock.remove(path, AccessOption.PERSISTENT),
+          "Should return false because ZkClientException is thrown");
+    } catch (ZkClientException e) {
+      Assert.fail("Should not throw ZkClientException because it should be caught.");
+    }
+
     success = accessor.remove(path, 0);
     Assert.assertTrue(success);
     Assert.assertFalse(_gZkClient.exists(path));
diff --git a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
index 234f2c3..639a4b1 100644
--- a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
+++ b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
@@ -41,6 +41,7 @@ import org.apache.helix.PropertyKey;
 import org.apache.helix.PropertyPathBuilder;
 import org.apache.helix.PropertyType;
 import org.apache.helix.TestHelper;
+import org.apache.helix.zookeeper.api.client.RealmAwareZkClient;
 import org.apache.helix.zookeeper.datamodel.ZNRecord;
 import org.apache.helix.ZkUnitTestBase;
 import org.apache.helix.controller.rebalancer.waged.WagedRebalancer;
@@ -48,7 +49,6 @@ import org.apache.helix.examples.MasterSlaveStateModelFactory;
 import org.apache.helix.integration.manager.MockParticipantManager;
 import org.apache.helix.model.ClusterConfig;
 import org.apache.helix.cloud.constants.CloudProvider;
-import org.apache.helix.examples.MasterSlaveStateModelFactory;
 import org.apache.helix.model.CloudConfig;
 import org.apache.helix.model.ClusterConstraints;
 import org.apache.helix.model.ClusterConstraints.ConstraintAttribute;
@@ -66,8 +66,12 @@ import org.apache.helix.model.builder.ConstraintItemBuilder;
 import org.apache.helix.model.builder.HelixConfigScopeBuilder;
 import org.apache.helix.participant.StateMachineEngine;
 import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.helix.zookeeper.exception.ZkClientException;
+import org.apache.helix.zookeeper.zkclient.exception.ZkException;
+import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.data.Stat;
 import org.codehaus.jackson.map.ObjectMapper;
+import org.mockito.Mockito;
 import org.testng.Assert;
 import org.testng.AssertJUnit;
 import org.testng.annotations.BeforeClass;
@@ -191,6 +195,36 @@ public class TestZkHelixAdmin extends ZkUnitTestBase {
       Assert.fail("HelixManager failed disconnecting");
     }
 
+    // Tests that ZkClientException thrown from ZkClient should be caught
+    // and it should be converted HelixException to be rethrown
+    String instancePath = PropertyPathBuilder.instance(clusterName, config.getInstanceName());
+    String instanceConfigPath = PropertyPathBuilder.instanceConfig(clusterName, instanceName);
+    String liveInstancePath = PropertyPathBuilder.liveInstance(clusterName, instanceName);
+    RealmAwareZkClient mockZkClient = Mockito.mock(RealmAwareZkClient.class);
+    // Mock the exists() method to let dropInstance() reach deleteRecursively().
+    Mockito.when(mockZkClient.exists(instanceConfigPath)).thenReturn(true);
+    Mockito.when(mockZkClient.exists(instancePath)).thenReturn(true);
+    Mockito.when(mockZkClient.exists(liveInstancePath)).thenReturn(false);
+    Mockito.doThrow(new ZkClientException("ZkClientException: failed to delete " + instancePath,
+        new ZkException("ZkException: failed to delete " + instancePath,
+            new KeeperException.NotEmptyException(
+                "NotEmptyException: directory" + instancePath + " is not empty"))))
+        .when(mockZkClient).deleteRecursively(instancePath);
+
+    HelixAdmin helixAdminMock = new ZKHelixAdmin(mockZkClient);
+    try {
+      helixAdminMock.dropInstance(clusterName, config);
+      Assert.fail("Should throw HelixException");
+    } catch (HelixException expected) {
+      // This exception is expected because it is converted from ZkClientException and rethrown.
+      Assert.assertEquals(expected.getMessage(),
+          "Failed to drop instance: " + config.getInstanceName() + ". Retry times: 3");
+    } catch (ZkClientException e) {
+      if (e.getMessage().equals("ZkClientException: failed to delete " + instancePath)) {
+        Assert.fail("Should not throw ZkClientException because it should be caught.");
+      }
+    }
+
     tool.dropInstance(clusterName, config); // correctly drop the instance
 
     try {
@@ -211,6 +245,7 @@ public class TestZkHelixAdmin extends ZkUnitTestBase {
     } catch (HelixException e) {
       // OK
     }
+
     ZNRecord stateModelRecord = new ZNRecord("id1");
     try {
       tool.addStateModelDef(clusterName, "id1", new StateModelDefinition(stateModelRecord));