You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by hu...@apache.org on 2020/01/31 23:33:44 UTC

[helix] branch zooscalability updated (bba0142 -> 8f44113)

This is an automated email from the ASF dual-hosted git repository.

hulee pushed a change to branch zooscalability
in repository https://gitbox.apache.org/repos/asf/helix.git.


    from bba0142  Add MetadataStoreRoutingData interface and TrieRoutingData class to helix-rest
     new 3e10668  Bump jackson-databind from 2.9.5 to 2.9.10.1 in /helix-rest (#597)
     new 8f44113  Integration test for controller connect and disconnect (#681)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../main/java/org/apache/helix/HelixTimerTask.java |   5 +
 .../healthcheck/ParticipantHealthReportTask.java   |   2 -
 .../apache/helix/manager/zk/ZKHelixManager.java    |   4 +-
 .../helix/integration/TestStandAloneCMMain.java    |  83 -----------
 .../controller/TestControllerLeadershipChange.java | 165 +++++++++++++++++++--
 .../manager/ClusterControllerManager.java          |   7 +-
 helix-rest/pom.xml                                 |   2 +-
 7 files changed, 167 insertions(+), 101 deletions(-)
 delete mode 100644 helix-core/src/test/java/org/apache/helix/integration/TestStandAloneCMMain.java


[helix] 02/02: Integration test for controller connect and disconnect (#681)

Posted by hu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

hulee pushed a commit to branch zooscalability
in repository https://gitbox.apache.org/repos/asf/helix.git

commit 8f44113832d43983ba54b3a811f979d0137c2b93
Author: Yi Wang <i3...@gmail.com>
AuthorDate: Thu Jan 30 14:41:54 2020 -0800

    Integration test for controller connect and disconnect (#681)
    
    Add integration tests for verifying controller's behavior on/off leadership of a cluster
---
 .../main/java/org/apache/helix/HelixTimerTask.java |   5 +
 .../healthcheck/ParticipantHealthReportTask.java   |   2 -
 .../apache/helix/manager/zk/ZKHelixManager.java    |   4 +-
 .../helix/integration/TestStandAloneCMMain.java    |  83 -----------
 .../controller/TestControllerLeadershipChange.java | 165 +++++++++++++++++++--
 .../manager/ClusterControllerManager.java          |   7 +-
 6 files changed, 166 insertions(+), 100 deletions(-)

diff --git a/helix-core/src/main/java/org/apache/helix/HelixTimerTask.java b/helix-core/src/main/java/org/apache/helix/HelixTimerTask.java
index e53ac50..a26d5af 100644
--- a/helix-core/src/main/java/org/apache/helix/HelixTimerTask.java
+++ b/helix-core/src/main/java/org/apache/helix/HelixTimerTask.java
@@ -19,10 +19,15 @@ package org.apache.helix;
  * under the License.
  */
 
+import java.util.Timer;
+
+
 /**
  * Interface for defining a generic task to run periodically.
  */
 public abstract class HelixTimerTask {
+  protected Timer _timer = null;
+
   /**
    * Start a timer task
    */
diff --git a/helix-core/src/main/java/org/apache/helix/healthcheck/ParticipantHealthReportTask.java b/helix-core/src/main/java/org/apache/helix/healthcheck/ParticipantHealthReportTask.java
index f48d6d7..8f79401 100644
--- a/helix-core/src/main/java/org/apache/helix/healthcheck/ParticipantHealthReportTask.java
+++ b/helix-core/src/main/java/org/apache/helix/healthcheck/ParticipantHealthReportTask.java
@@ -32,7 +32,6 @@ public class ParticipantHealthReportTask extends HelixTimerTask {
   public final static int DEFAULT_REPORT_LATENCY = 60 * 1000;
   private final int _reportLatency;
 
-  Timer _timer;
   final ParticipantHealthReportCollectorImpl _healthReportCollector;
 
   class ParticipantHealthReportTimerTask extends TimerTask {
@@ -75,5 +74,4 @@ public class ParticipantHealthReportTask extends HelixTimerTask {
       LOG.warn("ParticipantHealthReportTimerTask already stopped");
     }
   }
-
 }
diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixManager.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixManager.java
index 0d66af8..ef0308e 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixManager.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixManager.java
@@ -32,7 +32,6 @@ import javax.management.JMException;
 
 import com.google.common.collect.Sets;
 import org.I0Itec.zkclient.exception.ZkInterruptedException;
-import org.I0Itec.zkclient.exception.ZkTimeoutException;
 import org.apache.helix.BaseDataAccessor;
 import org.apache.helix.ClusterMessagingService;
 import org.apache.helix.ConfigAccessor;
@@ -161,8 +160,7 @@ public class ZKHelixManager implements HelixManager, IZkStateListener {
   /**
    * status dump timer-task
    */
-  static class StatusDumpTask extends HelixTimerTask {
-    Timer _timer = null;
+  protected static class StatusDumpTask extends HelixTimerTask {
     final HelixManager helixController;
 
     public StatusDumpTask(HelixManager helixController) {
diff --git a/helix-core/src/test/java/org/apache/helix/integration/TestStandAloneCMMain.java b/helix-core/src/test/java/org/apache/helix/integration/TestStandAloneCMMain.java
deleted file mode 100644
index 596c8b3..0000000
--- a/helix-core/src/test/java/org/apache/helix/integration/TestStandAloneCMMain.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package org.apache.helix.integration;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-import java.util.Date;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.helix.HelixDataAccessor;
-import org.apache.helix.PropertyKey;
-import org.apache.helix.TestHelper;
-import org.apache.helix.integration.common.ZkStandAloneCMTestBase;
-import org.apache.helix.integration.manager.ClusterControllerManager;
-import org.apache.helix.manager.zk.ZKHelixDataAccessor;
-import org.apache.helix.manager.zk.ZkBaseDataAccessor;
-import org.apache.helix.model.LiveInstance;
-import org.apache.helix.tools.ClusterStateVerifier;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-public class TestStandAloneCMMain extends ZkStandAloneCMTestBase {
-  private static Logger logger = LoggerFactory.getLogger(TestStandAloneCMMain.class);
-
-  @Test()
-  public void testStandAloneCMMain() throws Exception {
-    logger.info("RUN testStandAloneCMMain() at " + new Date(System.currentTimeMillis()));
-
-    // Keep references to the controllers created so that they could be shut down
-    Set<ClusterControllerManager> controllers = new HashSet<>();
-
-    ClusterControllerManager newController = null;
-    for (int i = 1; i <= 2; i++) {
-      String controllerName = "controller_" + i;
-      newController = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, controllerName);
-      newController.syncStart();
-      controllers.add(newController);
-    }
-
-    _controller.syncStop();
-
-    final HelixDataAccessor accessor =
-        new ZKHelixDataAccessor(CLUSTER_NAME, new ZkBaseDataAccessor<>(_gZkClient));
-    final PropertyKey.Builder keyBuilder = accessor.keyBuilder();
-    final String newControllerName = newController.getInstanceName();
-    TestHelper.verify(() -> {
-      LiveInstance leader = accessor.getProperty(keyBuilder.controllerLeader());
-      if (leader == null) {
-        return false;
-      }
-      return leader.getInstanceName().equals(newControllerName);
-
-    }, 30 * 1000);
-
-    Assert.assertTrue(ClusterStateVerifier.verifyByPolling(
-        new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, CLUSTER_NAME)));
-
-    // Shut down all controllers so that the cluster could be deleted
-    for (ClusterControllerManager controller : controllers) {
-      controller.syncStop();
-    }
-
-    logger.info("STOP testStandAloneCMMain() at " + new Date(System.currentTimeMillis()));
-  }
-}
diff --git a/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java b/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
index 60d19ed..73eeb55 100644
--- a/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
+++ b/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
@@ -20,6 +20,7 @@ package org.apache.helix.integration.controller;
  */
 
 import java.lang.management.ManagementFactory;
+import java.util.List;
 import javax.management.MBeanServer;
 import javax.management.ObjectName;
 
@@ -30,16 +31,155 @@ import org.apache.helix.HelixManagerFactory;
 import org.apache.helix.InstanceType;
 import org.apache.helix.PropertyPathBuilder;
 import org.apache.helix.common.ZkTestBase;
+import org.apache.helix.integration.manager.ClusterControllerManager;
 import org.apache.helix.integration.manager.MockParticipantManager;
+import org.apache.helix.manager.zk.CallbackHandler;
+import org.apache.helix.manager.zk.client.HelixZkClient;
 import org.apache.helix.model.IdealState;
 import org.apache.helix.model.LiveInstance;
 import org.apache.helix.monitoring.mbeans.MonitorDomainNames;
 import org.apache.helix.tools.ClusterVerifiers.BestPossibleExternalViewVerifier;
 import org.apache.helix.tools.ClusterVerifiers.ZkHelixClusterVerifier;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
+
+/**
+ * Integration test on controller leadership on several phases given the test cluster:
+ *  1. When a standalone controller becomes the leader
+ *  2. When a standalone leader relinquishes the leadership
+ *  3. When the leader node relinquishes the leadership and the other controller takes it over
+ */
 public class TestControllerLeadershipChange extends ZkTestBase {
+  private static final Logger LOG = LoggerFactory.getLogger(TestControllerLeadershipChange.class);
+  private final String CLASS_NAME = getShortClassName();
+  private final String CLUSTER_NAME = "TestCluster-" + CLASS_NAME;
+
+  @BeforeClass
+  public void beforeClass() throws Exception {
+    super.beforeClass();
+    _gSetupTool.addCluster(CLUSTER_NAME, true);
+    _gSetupTool.addInstanceToCluster(CLUSTER_NAME, "TestInstance");
+    _gSetupTool.addResourceToCluster(CLUSTER_NAME, "TestResource", 10, "MasterSlave");
+  }
+
+  @AfterClass
+  public void afterClass() {
+    deleteCluster(CLUSTER_NAME);
+  }
+
+  @Test
+  public void testControllerConnectThenDisconnect() {
+    ClusterControllerManager controller =
+        new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, "TestController");
+    long start = System.currentTimeMillis();
+    controller.syncStart();
+    verifyControllerIsLeader(controller);
+    LOG.info(System.currentTimeMillis() - start + "ms spent on becoming the leader");
+
+    start = System.currentTimeMillis();
+    controller.syncStop();
+    verifyControllerIsNotLeader(controller);
+    verifyZKDisconnected(controller);
+
+    LOG.info(
+        System.currentTimeMillis() - start + "ms spent on becoming the standby node from leader");
+  }
+
+  @Test(description = "If the cluster has a controller, the second controller cannot take its leadership")
+  public void testWhenControllerAlreadyExists() {
+    // when the controller0 already takes over the leadership
+    ClusterControllerManager firstController =
+        new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, "FirstController");
+    firstController.syncStart();
+    verifyControllerIsLeader(firstController);
+
+    ClusterControllerManager secondController =
+        new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, "SecondController");
+    secondController.syncStart();
+    // The second controller cannot acquire the leadership from existing controller
+    verifyControllerIsNotLeader(secondController);
+    // but the zkClient is still connected
+    Assert.assertFalse(secondController.getZkClient().isClosed());
+
+    // stop the controllers
+    firstController.syncStop();
+    secondController.syncStop();
+  }
+
+  @Test
+  public void testWhenLeadershipSwitch() {
+    ClusterControllerManager firstController =
+        new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, "FirstController");
+    ClusterControllerManager secondController =
+        new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, "SecondController");
+    firstController.syncStart();
+    verifyControllerIsLeader(firstController);
+    firstController.syncStop();
+    verifyControllerIsNotLeader(firstController);
+    long start = System.currentTimeMillis();
+
+    // the second controller is started after the first controller is stopped
+    secondController.syncStart();
+    verifyControllerIsLeader(secondController);
+    verifyZKDisconnected(firstController);
+    long end = System.currentTimeMillis();
+    LOG.info(end - start + "ms spent on the leadership switch");
+    secondController.syncStop();
+  }
+
+  /**
+   * If the controller is not the leader of a cluster,
+   * 1. The LEADER node in ZK reflects the leadership of the controller
+   * 2. All the callback handlers are ready (successfully registered)
+   * 3. Controller Timer tasks are scheduled
+   */
+  private void verifyControllerIsLeader(ClusterControllerManager controller) {
+    // check against the leader node
+    Assert.assertTrue(controller.isLeader());
+
+    // check the callback handlers are correctly registered
+    List<CallbackHandler> callbackHandlers = controller.getHandlers();
+    Assert.assertTrue(callbackHandlers.size() > 0);
+    callbackHandlers.forEach(callbackHandler -> Assert.assertTrue(callbackHandler.isReady()));
+
+    // check the zk connection is open
+    HelixZkClient zkClient = controller.getZkClient();
+    Assert.assertFalse(zkClient.isClosed());
+    Long sessionId = zkClient.getSessionId();
+    Assert.assertNotNull(sessionId);
+
+    // check the controller related timer tasks are all active
+    //TODO: currently no good way to check if controller timer tasks are all stopped without
+    // adding a public method only for test purpose
+//    Assert.assertTrue(controller.getControllerTimerTasks().size() > 0);
+  }
+
+  /**
+   * When the controller is not the leader of a cluster, none of the properties
+   * {@link #verifyControllerIsLeader(ClusterControllerManager)} will hold
+   * NOTE: it's possible the ZKConnection is open while the controller is not the leader
+   */
+  private void verifyControllerIsNotLeader(ClusterControllerManager controller) {
+    // check against the leader node
+    Assert.assertFalse(controller.isLeader());
+
+    // check no callback handler is leaked
+    Assert.assertTrue(controller.getHandlers().isEmpty());
+
+    // check the controller related timer tasks are all disabled
+//    Assert.assertTrue(controller.getControllerTimerTasks().isEmpty());
+  }
+
+  private void verifyZKDisconnected(ClusterControllerManager controller) {
+    // If the ZK connection is closed, it also means all ZK watchers of the session
+    // will be deleted on ZK servers
+    Assert.assertTrue(controller.getZkClient().isClosed());
+  }
 
   @Test
   public void testMissingTopStateDurationMonitoring() throws Exception {
@@ -66,8 +206,9 @@ public class TestControllerLeadershipChange extends ZkTestBase {
     participant.syncStart();
 
     // Create controller, since this is the only controller, it will be the leader
-    HelixManager manager1 = HelixManagerFactory.getZKHelixManager(clusterName,
-        clusterName + "-manager1", InstanceType.CONTROLLER, ZK_ADDR);
+    HelixManager manager1 = HelixManagerFactory
+        .getZKHelixManager(clusterName, clusterName + "-manager1", InstanceType.CONTROLLER,
+            ZK_ADDR);
     manager1.connect();
     Assert.assertTrue(manager1.isLeader());
 
@@ -87,8 +228,9 @@ public class TestControllerLeadershipChange extends ZkTestBase {
     Thread.sleep(1000);
 
     // Starting manager2
-    HelixManager manager2 = HelixManagerFactory.getZKHelixManager(clusterName,
-        clusterName + "-manager2", InstanceType.CONTROLLER, ZK_ADDR);
+    HelixManager manager2 = HelixManagerFactory
+        .getZKHelixManager(clusterName, clusterName + "-manager2", InstanceType.CONTROLLER,
+            ZK_ADDR);
     manager2.connect();
 
     // Set leader to manager2
@@ -117,8 +259,8 @@ public class TestControllerLeadershipChange extends ZkTestBase {
     // Resource lost top state, and manager1 lost leadership for 2000ms, because manager1 will
     // clean monitoring cache after re-gaining leadership, so max value of hand off duration should
     // not have such a large value
-    Assert.assertTrue((long) beanServer.getAttribute(resourceMBeanObjectName,
-        "PartitionTopStateHandoffDurationGauge.Max") < 500);
+    Assert.assertTrue((long) beanServer
+        .getAttribute(resourceMBeanObjectName, "PartitionTopStateHandoffDurationGauge.Max") < 500);
 
     participant.syncStop();
     manager1.disconnect();
@@ -127,7 +269,6 @@ public class TestControllerLeadershipChange extends ZkTestBase {
   }
 
   private void setLeader(HelixManager manager) throws Exception {
-    System.out.println("Setting controller " + manager.getInstanceName() + " as leader");
     HelixDataAccessor accessor = manager.getHelixDataAccessor();
     final LiveInstance leader = new LiveInstance(manager.getInstanceName());
     leader.setLiveInstance(ManagementFactory.getRuntimeMXBean().getName());
@@ -136,15 +277,17 @@ public class TestControllerLeadershipChange extends ZkTestBase {
 
     // Delete the current controller leader node so it will trigger leader election
     while (!manager.isLeader()) {
-      accessor.getBaseDataAccessor().remove(
-          PropertyPathBuilder.controllerLeader(manager.getClusterName()), AccessOption.EPHEMERAL);
+      accessor.getBaseDataAccessor()
+          .remove(PropertyPathBuilder.controllerLeader(manager.getClusterName()),
+              AccessOption.EPHEMERAL);
       Thread.sleep(50);
     }
   }
 
   private ObjectName getResourceMonitorObjectName(String clusterName, String resourceName)
       throws Exception {
-    return new ObjectName(String.format("%s:cluster=%s,resourceName=%s",
-        MonitorDomainNames.ClusterStatus.name(), clusterName, resourceName));
+    return new ObjectName(String
+        .format("%s:cluster=%s,resourceName=%s", MonitorDomainNames.ClusterStatus.name(),
+            clusterName, resourceName));
   }
 }
diff --git a/helix-core/src/test/java/org/apache/helix/integration/manager/ClusterControllerManager.java b/helix-core/src/test/java/org/apache/helix/integration/manager/ClusterControllerManager.java
index 5b2dedf..d4c3283 100644
--- a/helix-core/src/test/java/org/apache/helix/integration/manager/ClusterControllerManager.java
+++ b/helix-core/src/test/java/org/apache/helix/integration/manager/ClusterControllerManager.java
@@ -30,6 +30,10 @@ import org.apache.helix.manager.zk.client.HelixZkClient;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+
+/**
+ * The standalone cluster controller class
+ */
 public class ClusterControllerManager extends ZKHelixManager implements Runnable, ZkTestManager {
   private static Logger LOG = LoggerFactory.getLogger(ClusterControllerManager.class);
 
@@ -60,7 +64,8 @@ public class ClusterControllerManager extends ZKHelixManager implements Runnable
   // This should not be called more than once because HelixManager.connect() should not be called more than once.
   public void syncStart() {
     if (_started) {
-      throw new RuntimeException("Helix Controller already started. Do not call syncStart() more than once.");
+      throw new RuntimeException(
+          "Helix Controller already started. Do not call syncStart() more than once.");
     } else {
       _started = true;
     }


[helix] 01/02: Bump jackson-databind from 2.9.5 to 2.9.10.1 in /helix-rest (#597)

Posted by hu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

hulee pushed a commit to branch zooscalability
in repository https://gitbox.apache.org/repos/asf/helix.git

commit 3e10668c7ddbe25681659988363c75253da5018a
Author: dependabot[bot] <49...@users.noreply.github.com>
AuthorDate: Thu Jan 30 13:41:24 2020 -0800

    Bump jackson-databind from 2.9.5 to 2.9.10.1 in /helix-rest (#597)
    
    Bumps [jackson-databind](https://github.com/FasterXML/jackson) from 2.9.5 to 2.9.10.1.
    - [Release notes](https://github.com/FasterXML/jackson/releases)
    - [Commits](https://github.com/FasterXML/jackson/commits)
    
    Signed-off-by: dependabot[bot] <su...@github.com>
---
 helix-rest/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/helix-rest/pom.xml b/helix-rest/pom.xml
index 236669d..b14b902 100644
--- a/helix-rest/pom.xml
+++ b/helix-rest/pom.xml
@@ -134,7 +134,7 @@ under the License.
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-databind</artifactId>
-      <version>2.9.5</version>
+      <version>2.9.10.1</version>
     </dependency>
     <dependency>
       <groupId>commons-cli</groupId>