You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by jx...@apache.org on 2020/04/14 01:51:19 UTC
[helix] 01/02: Fix TestCrushAutoRebalanceNonRack failure of
dropping instance
This is an automated email from the ASF dual-hosted git repository.
jxue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git
commit 4dcf624777786fadcad5ed71471db27a775e36eb
Author: Huizhi Lu <ih...@gmail.com>
AuthorDate: Mon Apr 13 13:25:20 2020 -0700
Fix TestCrushAutoRebalanceNonRack failure of dropping instance
---
.../org/apache/helix/manager/zk/ZKHelixAdmin.java | 10 +++---
.../helix/manager/zk/ZkBaseDataAccessor.java | 5 +--
.../TestCrushAutoRebalanceNonRack.java | 14 +++++++-
.../helix/manager/zk/TestZkBaseDataAccessor.java | 20 ++++++++++++
.../apache/helix/manager/zk/TestZkHelixAdmin.java | 37 +++++++++++++++++++++-
5 files changed, 78 insertions(+), 8 deletions(-)
diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
index bc62b18..095fd08 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
@@ -83,6 +83,7 @@ import org.apache.helix.util.RebalanceUtil;
import org.apache.helix.zookeeper.api.client.HelixZkClient;
import org.apache.helix.zookeeper.api.client.RealmAwareZkClient;
import org.apache.helix.zookeeper.datamodel.ZNRecord;
+import org.apache.helix.zookeeper.exception.ZkClientException;
import org.apache.helix.zookeeper.impl.client.FederatedZkClient;
import org.apache.helix.zookeeper.impl.factory.SharedZkClientFactory;
import org.apache.helix.zookeeper.util.HttpRoutingDataReader;
@@ -225,7 +226,7 @@ public class ZKHelixAdmin implements HelixAdmin {
try {
_zkClient.deleteRecursively(instancePath);
return;
- } catch (HelixException e) {
+ } catch (ZkClientException e) {
if (retryCnt < 3 && e.getCause() instanceof ZkException && e.getCause()
.getCause() instanceof KeeperException.NotEmptyException) {
// Racing condition with controller's persisting node history, retryable.
@@ -235,9 +236,10 @@ public class ZKHelixAdmin implements HelixAdmin {
instanceConfig.getInstanceName(), e.getCause().getMessage());
retryCnt++;
} else {
- logger.error("Failed to drop instance {} (not retryable).",
- instanceConfig.getInstanceName(), e.getCause());
- throw e;
+ String errorMessage = "Failed to drop instance: " + instanceConfig.getInstanceName()
+ + ". Retry times: " + retryCnt;
+ logger.error(errorMessage, retryCnt, e.getCause());
+ throw new HelixException(errorMessage, e);
}
}
}
diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java
index a7596f4..d5ea20b 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZkBaseDataAccessor.java
@@ -40,6 +40,7 @@ import org.apache.helix.util.HelixUtil;
import org.apache.helix.zookeeper.api.client.HelixZkClient;
import org.apache.helix.zookeeper.api.client.RealmAwareZkClient;
import org.apache.helix.zookeeper.datamodel.ZNRecord;
+import org.apache.helix.zookeeper.exception.ZkClientException;
import org.apache.helix.zookeeper.impl.client.FederatedZkClient;
import org.apache.helix.zookeeper.impl.factory.DedicatedZkClientFactory;
import org.apache.helix.zookeeper.impl.factory.SharedZkClientFactory;
@@ -703,8 +704,8 @@ public class ZkBaseDataAccessor<T> implements BaseDataAccessor<T> {
e.getMessage());
try {
_zkClient.deleteRecursively(path);
- } catch (HelixException he) {
- LOG.error("Failed to delete {} recursively with opts {}.", path, options, he);
+ } catch (ZkClientException zce) {
+ LOG.error("Failed to delete {} recursively with opts {}.", path, options, zce);
return false;
}
}
diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/CrushRebalancers/TestCrushAutoRebalanceNonRack.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/CrushRebalancers/TestCrushAutoRebalanceNonRack.java
index 5b7d083..f889074 100644
--- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/CrushRebalancers/TestCrushAutoRebalanceNonRack.java
+++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/CrushRebalancers/TestCrushAutoRebalanceNonRack.java
@@ -28,11 +28,15 @@ import java.util.Map;
import java.util.Set;
import org.apache.helix.ConfigAccessor;
+import org.apache.helix.HelixDataAccessor;
+import org.apache.helix.InstanceType;
+import org.apache.helix.TestHelper;
import org.apache.helix.controller.rebalancer.strategy.CrushEdRebalanceStrategy;
import org.apache.helix.controller.rebalancer.strategy.CrushRebalanceStrategy;
import org.apache.helix.integration.common.ZkStandAloneCMTestBase;
import org.apache.helix.integration.manager.ClusterControllerManager;
import org.apache.helix.integration.manager.MockParticipantManager;
+import org.apache.helix.manager.zk.ZKHelixDataAccessor;
import org.apache.helix.model.BuiltInStateModelDefinitions;
import org.apache.helix.model.ClusterConfig;
import org.apache.helix.model.ExternalView;
@@ -42,6 +46,7 @@ import org.apache.helix.model.InstanceConfig;
import org.apache.helix.tools.ClusterVerifiers.HelixClusterVerifier;
import org.apache.helix.tools.ClusterVerifiers.StrictMatchExternalViewVerifier;
import org.apache.helix.tools.ClusterVerifiers.ZkHelixClusterVerifier;
+import org.apache.helix.util.InstanceValidationUtil;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.AfterMethod;
@@ -242,13 +247,20 @@ public class TestCrushAutoRebalanceNonRack extends ZkStandAloneCMTestBase {
enablePersistBestPossibleAssignment(_gZkClient, CLUSTER_NAME, true);
// shutdown participants, keep only two left
+ HelixDataAccessor helixDataAccessor =
+ new ZKHelixDataAccessor(CLUSTER_NAME, InstanceType.PARTICIPANT, _baseAccessor);
for (int i = 2; i < _participants.size(); i++) {
MockParticipantManager p = _participants.get(i);
p.syncStop();
_gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME, p.getInstanceName(),
false);
+ Assert.assertTrue(TestHelper.verify(() -> {
+ _gSetupTool.getClusterManagementTool()
+ .enableInstance(CLUSTER_NAME, p.getInstanceName(), false);
+ return !InstanceValidationUtil.isEnabled(helixDataAccessor, p.getInstanceName())
+ && !InstanceValidationUtil.isAlive(helixDataAccessor, p.getInstanceName());
+ }, TestHelper.WAIT_DURATION), "Instance should be disabled and offline");
_gSetupTool.dropInstanceFromCluster(CLUSTER_NAME, p.getInstanceName());
-
}
int j = 0;
diff --git a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java
index 3e03125..9e56b6e 100644
--- a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java
+++ b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkBaseDataAccessor.java
@@ -30,15 +30,19 @@ import org.apache.helix.AccessOption;
import org.apache.helix.BaseDataAccessor;
import org.apache.helix.PropertyPathBuilder;
import org.apache.helix.TestHelper;
+import org.apache.helix.zookeeper.api.client.RealmAwareZkClient;
import org.apache.helix.zookeeper.datamodel.ZNRecord;
import org.apache.helix.zookeeper.datamodel.ZNRecordUpdater;
import org.apache.helix.ZkUnitTestBase;
import org.apache.helix.manager.zk.ZkBaseDataAccessor.AccessResult;
import org.apache.helix.manager.zk.ZkBaseDataAccessor.RetCode;
+import org.apache.helix.zookeeper.exception.ZkClientException;
import org.apache.helix.zookeeper.zkclient.DataUpdater;
+import org.apache.helix.zookeeper.zkclient.exception.ZkException;
import org.apache.helix.zookeeper.zkclient.exception.ZkMarshallingError;
import org.apache.helix.zookeeper.zkclient.serialize.ZkSerializer;
import org.apache.zookeeper.data.Stat;
+import org.mockito.Mockito;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.Test;
@@ -343,6 +347,22 @@ public class TestZkBaseDataAccessor extends ZkUnitTestBase {
Assert.assertNotNull(getRecord);
Assert.assertEquals(getRecord.getId(), "msg_0");
+ // Tests that ZkClientException thrown from ZkClient should be caught
+ // and remove() should return false.
+ RealmAwareZkClient mockZkClient = Mockito.mock(RealmAwareZkClient.class);
+ Mockito.doThrow(new ZkException("Failed to delete " + path)).when(mockZkClient)
+ .delete(path);
+ Mockito.doThrow(new ZkClientException("Failed to recursively delete " + path)).when(mockZkClient)
+ .deleteRecursively(path);
+ ZkBaseDataAccessor<ZNRecord> accessorMock =
+ new ZkBaseDataAccessor<>(mockZkClient);
+ try {
+ Assert.assertFalse(accessorMock.remove(path, AccessOption.PERSISTENT),
+ "Should return false because ZkClientException is thrown");
+ } catch (ZkClientException e) {
+ Assert.fail("Should not throw ZkClientException because it should be caught.");
+ }
+
success = accessor.remove(path, 0);
Assert.assertTrue(success);
Assert.assertFalse(_gZkClient.exists(path));
diff --git a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
index 234f2c3..639a4b1 100644
--- a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
+++ b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
@@ -41,6 +41,7 @@ import org.apache.helix.PropertyKey;
import org.apache.helix.PropertyPathBuilder;
import org.apache.helix.PropertyType;
import org.apache.helix.TestHelper;
+import org.apache.helix.zookeeper.api.client.RealmAwareZkClient;
import org.apache.helix.zookeeper.datamodel.ZNRecord;
import org.apache.helix.ZkUnitTestBase;
import org.apache.helix.controller.rebalancer.waged.WagedRebalancer;
@@ -48,7 +49,6 @@ import org.apache.helix.examples.MasterSlaveStateModelFactory;
import org.apache.helix.integration.manager.MockParticipantManager;
import org.apache.helix.model.ClusterConfig;
import org.apache.helix.cloud.constants.CloudProvider;
-import org.apache.helix.examples.MasterSlaveStateModelFactory;
import org.apache.helix.model.CloudConfig;
import org.apache.helix.model.ClusterConstraints;
import org.apache.helix.model.ClusterConstraints.ConstraintAttribute;
@@ -66,8 +66,12 @@ import org.apache.helix.model.builder.ConstraintItemBuilder;
import org.apache.helix.model.builder.HelixConfigScopeBuilder;
import org.apache.helix.participant.StateMachineEngine;
import org.apache.helix.tools.StateModelConfigGenerator;
+import org.apache.helix.zookeeper.exception.ZkClientException;
+import org.apache.helix.zookeeper.zkclient.exception.ZkException;
+import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.data.Stat;
import org.codehaus.jackson.map.ObjectMapper;
+import org.mockito.Mockito;
import org.testng.Assert;
import org.testng.AssertJUnit;
import org.testng.annotations.BeforeClass;
@@ -191,6 +195,36 @@ public class TestZkHelixAdmin extends ZkUnitTestBase {
Assert.fail("HelixManager failed disconnecting");
}
+ // Tests that ZkClientException thrown from ZkClient should be caught
+ // and it should be converted HelixException to be rethrown
+ String instancePath = PropertyPathBuilder.instance(clusterName, config.getInstanceName());
+ String instanceConfigPath = PropertyPathBuilder.instanceConfig(clusterName, instanceName);
+ String liveInstancePath = PropertyPathBuilder.liveInstance(clusterName, instanceName);
+ RealmAwareZkClient mockZkClient = Mockito.mock(RealmAwareZkClient.class);
+ // Mock the exists() method to let dropInstance() reach deleteRecursively().
+ Mockito.when(mockZkClient.exists(instanceConfigPath)).thenReturn(true);
+ Mockito.when(mockZkClient.exists(instancePath)).thenReturn(true);
+ Mockito.when(mockZkClient.exists(liveInstancePath)).thenReturn(false);
+ Mockito.doThrow(new ZkClientException("ZkClientException: failed to delete " + instancePath,
+ new ZkException("ZkException: failed to delete " + instancePath,
+ new KeeperException.NotEmptyException(
+ "NotEmptyException: directory" + instancePath + " is not empty"))))
+ .when(mockZkClient).deleteRecursively(instancePath);
+
+ HelixAdmin helixAdminMock = new ZKHelixAdmin(mockZkClient);
+ try {
+ helixAdminMock.dropInstance(clusterName, config);
+ Assert.fail("Should throw HelixException");
+ } catch (HelixException expected) {
+ // This exception is expected because it is converted from ZkClientException and rethrown.
+ Assert.assertEquals(expected.getMessage(),
+ "Failed to drop instance: " + config.getInstanceName() + ". Retry times: 3");
+ } catch (ZkClientException e) {
+ if (e.getMessage().equals("ZkClientException: failed to delete " + instancePath)) {
+ Assert.fail("Should not throw ZkClientException because it should be caught.");
+ }
+ }
+
tool.dropInstance(clusterName, config); // correctly drop the instance
try {
@@ -211,6 +245,7 @@ public class TestZkHelixAdmin extends ZkUnitTestBase {
} catch (HelixException e) {
// OK
}
+
ZNRecord stateModelRecord = new ZNRecord("id1");
try {
tool.addStateModelDef(clusterName, "id1", new StateModelDefinition(stateModelRecord));