You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2021/04/15 06:06:23 UTC

[kudu] branch master updated (9bad249 -> ee79cdf)

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.


    from 9bad249  [thirdparty] enable building curl with alternative OpenSSL
     new 5443c57  [kudu-test-utils] enhance signature of KuduTestHarness methods
     new ee79cdf  KUDU-2612: an extra test for txn keepalive failover in Java client

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../apache/kudu/client/TestKuduTransaction.java    | 122 +++++++++++++++++++--
 .../java/org/apache/kudu/test/KuduTestHarness.java |  65 ++++++++---
 2 files changed, 162 insertions(+), 25 deletions(-)

[kudu] 01/02: [kudu-test-utils] enhance signature of KuduTestHarness methods

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 5443c570639a010e91636a370384e47695846a33
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Wed Apr 14 08:37:12 2021 -0700

    [kudu-test-utils] enhance signature of KuduTestHarness methods
    
    This patch enhances signature of some methods in the KuduTestHarness
    class to return host and port of shut down processes and adds a few
    new methods to allow to start those processes back later on.  The new
    functionality is necessary for a follow-up changelist.
    
    Change-Id: I329a5035ba767abe1128cf0b2b6300116ffb205b
    Reviewed-on: http://gerrit.cloudera.org:8080/17320
    Tested-by: Kudu Jenkins
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
 .../java/org/apache/kudu/test/KuduTestHarness.java | 65 ++++++++++++++++------
 1 file changed, 49 insertions(+), 16 deletions(-)

diff --git a/java/kudu-test-utils/src/main/java/org/apache/kudu/test/KuduTestHarness.java b/java/kudu-test-utils/src/main/java/org/apache/kudu/test/KuduTestHarness.java
index 0abb3f4..82017cd 100644
--- a/java/kudu-test-utils/src/main/java/org/apache/kudu/test/KuduTestHarness.java
+++ b/java/kudu-test-utils/src/main/java/org/apache/kudu/test/KuduTestHarness.java
@@ -201,37 +201,42 @@ public class KuduTestHarness extends ExternalResource {
   }
 
   /**
-   * Helper method to kill a tablet server that serves the given tablet's
-   * leader. The currently running test case will be failed if the tablet has no
-   * leader after some retries, or if the tablet server was already killed.
-   *
+   * Helper method to kill a tablet server that hosts the given tablet's leader
+   * replica.
    * This method is thread-safe.
+   *
    * @param tablet a RemoteTablet which will get its leader killed
-   * @throws Exception
+   * @return the host and port of the tablet server which hosted the tablet's
+   *         leader replica
+   * @throws Exception if no leader replica found after a few retries,
+   *         or if the tablet server isn't running
    */
-  public void killTabletLeader(RemoteTablet tablet) throws Exception {
-    killTabletLeader(new LocatedTablet(tablet));
+  public HostAndPort killTabletLeader(RemoteTablet tablet) throws Exception {
+    return killTabletLeader(new LocatedTablet(tablet));
   }
 
   /**
-   * Helper method to kill a tablet server that serves the given tablet's
-   * leader. The currently running test case will be failed if the tablet has no
-   * leader after some retries, or if the tablet server was already killed.
+   * Helper method to kill a tablet server that serves the given tablet's leader
+   * replica.
    *
    * This method is thread-safe.
    * @param tablet a LocatedTablet which will get its leader killed
-   * @throws Exception
+   * @return the host and port of the tablet server which hosted the tablet's
+   *         leader replica
+   * @throws Exception if no leader replica found or if the tablet server isn't
+   *                   running
    */
-  public void killTabletLeader(LocatedTablet tablet) throws Exception {
+  public HostAndPort killTabletLeader(LocatedTablet tablet) throws Exception {
     HostAndPort hp = findLeaderTabletServer(tablet);
     miniCluster.killTabletServer(hp);
+    return hp;
   }
 
   /**
    * Finds the RPC port of the given tablet's leader tserver.
    * @param tablet a LocatedTablet
    * @return the host and port of the given tablet's leader tserver
-   * @throws Exception if we are unable to find the leader tserver
+   * @throws Exception if unable to find a tablet server with leader replica
    */
   public HostAndPort findLeaderTabletServer(LocatedTablet tablet)
       throws Exception {
@@ -254,6 +259,17 @@ public class KuduTestHarness extends ExternalResource {
   }
 
   /**
+   * Start tablet server which has previously been registered at the specified
+   * host and port.
+   *
+   * @param hp host and port of the tablet server to start back
+   * @throws Exception
+   */
+  public void startTabletServer(HostAndPort hp) throws Exception {
+    miniCluster.startTabletServer(hp);
+  }
+
+  /**
    * Find the host and port of the leader master.
    * @return the host and port of the leader master
    * @throws Exception if we are unable to find the leader master
@@ -266,11 +282,13 @@ public class KuduTestHarness extends ExternalResource {
    * Helper method to easily kill the leader master.
    *
    * This method is thread-safe.
+   * @return the host and port of the detected leader master
    * @throws Exception if there is an error finding or killing the leader master.
    */
-  public void killLeaderMasterServer() throws Exception {
+  public HostAndPort killLeaderMasterServer() throws Exception {
     HostAndPort hp = findLeaderMasterServer();
     miniCluster.killMasterServer(hp);
+    return hp;
   }
 
   /**
@@ -296,22 +314,37 @@ public class KuduTestHarness extends ExternalResource {
   /**
    * Kills a tablet server that serves the given tablet's leader and restarts it.
    * @param tablet a RemoteTablet which will get its leader killed and restarted
+   * @return the host and port of the restarted tablet server
    * @throws Exception
    */
-  public void restartTabletServer(RemoteTablet tablet) throws Exception {
+  public HostAndPort restartTabletServer(RemoteTablet tablet) throws Exception {
     HostAndPort hp = findLeaderTabletServer(new LocatedTablet(tablet));
     miniCluster.killTabletServer(hp);
     miniCluster.startTabletServer(hp);
+    return hp;
   }
 
   /**
    * Kills and restarts the leader master.
+   * @return the host and port of the restarted master
    * @throws Exception
    */
-  public void restartLeaderMaster() throws Exception {
+  public HostAndPort restartLeaderMaster() throws Exception {
     HostAndPort hp = findLeaderMasterServer();
     miniCluster.killMasterServer(hp);
     miniCluster.startMasterServer(hp);
+    return hp;
+  }
+
+  /**
+   * Start master which has previously been registered at the specified
+   * host and port.
+   *
+   * @param hp host and port of the master to start back
+   * @throws Exception
+   */
+  public void startMaster(HostAndPort hp) throws Exception {
+    miniCluster.startMasterServer(hp);
   }
 
   /**

[kudu] 02/02: KUDU-2612: an extra test for txn keepalive failover in Java client

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit ee79cdfa9906d14a63d4ec4b487c6eece77cc50f
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Wed Apr 14 19:52:38 2021 -0700

    KUDU-2612: an extra test for txn keepalive failover in Java client
    
    This is a follow-up to 096f1ddf09047ea11d78a661010dd549ffa9af51.
    
    This patchs adds an extra test scenario similar the one added
    in the prior changelist, but with additional twist of "rolling"
    unavailability of leader masters.  In addition, it verifies that
    RPC error responses from TxnManager due to the unavailability
    of TxnStatusManager are properly handled by the Java client.
    
    Change-Id: Ib278d402bee85fb0442cbce98b2b4ab022229eb4
    Reviewed-on: http://gerrit.cloudera.org:8080/17321
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
    Tested-by: Kudu Jenkins
---
 .../apache/kudu/client/TestKuduTransaction.java    | 122 +++++++++++++++++++--
 1 file changed, 113 insertions(+), 9 deletions(-)

diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTransaction.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTransaction.java
index 1114d85..60ca48d 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTransaction.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestKuduTransaction.java
@@ -17,6 +17,7 @@
 
 package org.apache.kudu.client;
 
+import static org.apache.kudu.test.ClientTestUtil.countRowsInScan;
 import static org.apache.kudu.test.ClientTestUtil.createBasicSchemaInsert;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -769,9 +770,10 @@ public class TestKuduTransaction {
       // It should be possible to commit the transaction.
       txn.commit(true /*wait*/);
 
-      // An extra sanity check: read back the row written into the table in the
+      // An extra sanity check: read back the rows written into the table in the
       // context of the transaction.
       KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table)
+          .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES)
           .replicaSelection(ReplicaSelection.LEADER_ONLY)
           .build();
 
@@ -798,9 +800,10 @@ public class TestKuduTransaction {
       // txn-related operations routed through TxnManager should succeed.
       txn.commit(true /*wait*/);
 
-      // An extra sanity check: read back the row written into the table in the
+      // An extra sanity check: read back the rows written into the table in the
       // context of the transaction.
       KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table)
+          .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES)
           .replicaSelection(ReplicaSelection.LEADER_ONLY)
           .build();
 
@@ -864,13 +867,14 @@ public class TestKuduTransaction {
     // the call to KuduTransaction.commit() above could not succeed.
     t.join(250);
 
-    // An extra sanity check: read back the row written into the table in the
+    // An extra sanity check: read back the rows written into the table in the
     // context of the transaction.
     KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table)
+        .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES)
         .replicaSelection(ReplicaSelection.LEADER_ONLY)
         .build();
 
-    assertEquals(1, scanner.nextRows().getNumRows());
+    assertEquals(1, countRowsInScan(scanner));
   }
 
   /**
@@ -895,8 +899,8 @@ public class TestKuduTransaction {
   @TabletServerConfig(flags = {
       // The txn keepalive interval should be long enough to accommodate Raft
       // leader failure detection and election.
-      "--txn_keepalive_interval_ms=3000",
-      "--txn_staleness_tracker_interval_ms=500"
+      "--txn_keepalive_interval_ms=1000",
+      "--txn_staleness_tracker_interval_ms=250"
   })
   public void testTxnKeepaliveSwitchesToOtherTxnManager() throws Exception {
     final String TABLE_NAME = "txn_manager_fallback";
@@ -919,7 +923,7 @@ public class TestKuduTransaction {
     // Wait for two keepalive intervals to make sure the backend got a chance
     // to automatically abort the transaction if not receiving txn keepalive
     // messages.
-    Thread.sleep(2 * 3000);
+    Thread.sleep(2 * 1000);
 
     // It should be possible to commit the transaction. This is to verify that
     //
@@ -932,13 +936,113 @@ public class TestKuduTransaction {
     //     operations as well
     txn.commit(true /*wait*/);
 
-    // An extra sanity check: read back the row written into the table in the
+    // An extra sanity check: read back the rows written into the table in the
     // context of the transaction.
     KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table)
+        .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES)
         .replicaSelection(ReplicaSelection.LEADER_ONLY)
         .build();
+    assertEquals(1, countRowsInScan(scanner));
+  }
+
+  /**
+   * Similar to the {@link #testTxnKeepaliveSwitchesToOtherTxnManager()} above,
+   * but with additional twist of "rolling" unavailability of leader masters.
+   * In addition, make sure the errors sent from TxnManager are processed
+   * accordingly when TxnStatusManager is not around.
+   */
+  @Test(timeout = 100000)
+  @MasterServerConfig(flags = {
+      // TxnManager functionality is necessary for this scenario.
+      "--txn_manager_enabled",
+
+      // Set Raft heartbeat interval short for faster test runtime: speed up
+      // leader failure detection and new leader election.
+      "--raft_heartbeat_interval_ms=100",
+  })
+  @TabletServerConfig(flags = {
+      // The txn keepalive interval should be long enough to accommodate Raft
+      // leader failure detection and election.
+      "--txn_keepalive_interval_ms=1000",
+      "--txn_staleness_tracker_interval_ms=250"
+  })
+  public void testTxnKeepaliveRollingSwitchToOtherTxnManager() throws Exception {
+    final String TABLE_NAME = "txn_manager_fallback_rolling";
+    client.createTable(
+        TABLE_NAME,
+        ClientTestUtil.getBasicSchema(),
+        new CreateTableOptions().addHashPartitions(ImmutableList.of("key"), 2));
+
+    KuduTransaction txn = client.newTransaction();
+    KuduSession session = txn.newKuduSession();
+
+    KuduTable table = client.openTable(TABLE_NAME);
+
+    // Cycle the leadership among masters, making sure the client successfully
+    // switches to every newly elected leader master to send keepalive messages.
+    final int numMasters = harness.getMasterServers().size();
+    for (int i = 0; i < numMasters; ++i) {
+      // Shutdown the leader master.
+      final HostAndPort hp = harness.killLeaderMasterServer();
+
+      // Wait for two keepalive intervals to give the backend a chance
+      // to automatically abort the transaction if not receiving txn keepalive
+      // messages.
+      Thread.sleep(2 * 1000);
+
+      // The transaction should be still alive.
+      try {
+        txn.isCommitComplete();
+        fail("KuduTransaction.isCommitComplete should have thrown");
+      } catch (NonRecoverableException e) {
+        assertTrue(e.getStatus().toString(), e.getStatus().isIllegalState());
+        assertEquals("transaction is still open", e.getMessage());
+      }
+
+      // In addition, it should be possible to insert rows in the context
+      // of the transaction.
+      session.apply(createBasicSchemaInsert(table, i));
+      session.flush();
+
+      // Start the master back.
+      harness.startMaster(hp);
+    }
+
+    // Make sure Java client properly processes error responses sent back by
+    // TxnManager when the TxnStatusManager isn't available. So, shutdown all
+    // tablet servers: this is to make sure TxnStatusManager isn't there.
+    harness.killAllTabletServers();
+
+    Thread t = new Thread(new Runnable() {
+      @Override
+      public void run() {
+        try {
+          // Sleep for some time to allow the KuduTransaction.commit() call
+          // below issue RPCs when TxnStatusManager is not yet around.
+          Thread.sleep(2 * 1000);
+
+          // Start all the tablet servers back so the TxnStatusManager is back.
+          harness.startAllTabletServers();
+        } catch (Exception e) {
+          fail("failed to start all tablet servers back: " + e);
+        }
+      }
+    });
+    t.start();
 
-    assertEquals(1, scanner.nextRows().getNumRows());
+    // The transaction should be still alive, and it should be possible to
+    // commit it.
+    txn.commit(true /*wait*/);
+
+    t.join();
+
+    // An extra sanity check: read back the rows written into the table in the
+    // context of the transaction.
+    KuduScanner scanner = new KuduScanner.KuduScannerBuilder(asyncClient, table)
+        .readMode(AsyncKuduScanner.ReadMode.READ_YOUR_WRITES)
+        .replicaSelection(ReplicaSelection.LEADER_ONLY)
+        .build();
+    assertEquals(numMasters, countRowsInScan(scanner));
   }
 
   // TODO(aserbin): when test harness allows for sending Kudu servers particular