You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2021/02/26 03:10:16 UTC
[kudu] branch master updated: [client-test] added
WriteWhileRestartingMultipleTabletServers
This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 1ba4a4b [client-test] added WriteWhileRestartingMultipleTabletServers
1ba4a4b is described below
commit 1ba4a4b767ef62d1182d7091043a42cd0a1ac970
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Wed Feb 24 20:09:26 2021 -0800
[client-test] added WriteWhileRestartingMultipleTabletServers
While working on tests for multi-row transactions, I added a similar
scenario and found it a bit flaky. So, I wanted to see if the same
is true for the non-transactional write operations. It turns out
the latter is pretty stable, so I need to dig in to find the root
cause of the former. Anyways, I think this is a good scenario to
add into client-test.cc, extending already existing scenario
ClientTest.TestWriteWhileRestarting to multi-replica case and going
through multiple restarts. Another important detail is that the newly
added test scenario verifies the number of persisted rows in the end.
I also did a small touch-up of the code related to the utility method
ClientTest::CountRowsFromClient().
Change-Id: I95d1456dea2e6e2bb7d8b0c5d05e95798098710d
Reviewed-on: http://gerrit.cloudera.org:8080/17120
Tested-by: Alexey Serbin <as...@cloudera.com>
Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
src/kudu/client/client-test.cc | 147 +++++++++++++++++++++++++++++------------
1 file changed, 105 insertions(+), 42 deletions(-)
diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index d210765..ff4ca1f 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -275,10 +275,6 @@ class ClientTest : public KuduTest {
NO_FATALS(CreateTable(kTableName, 1, GenerateSplitRows(), {}, &client_table_));
}
- void TearDown() override {
- KuduTest::TearDown();
- }
-
// Looks up the remote tablet entry for a given partition key in the meta cache.
scoped_refptr<internal::RemoteTablet> MetaCacheLookup(KuduTable* table,
const string& partition_key) {
@@ -361,9 +357,8 @@ class ClientTest : public KuduTest {
}
protected:
-
- static const char *kTableName;
- static const int32_t kNoBound;
+ static constexpr const char* const kTableName = "client-testtb";
+ static constexpr int32_t kNoBound = kint32max;
// Set the location mapping command for the test's masters. Overridden by
// derived classes to test client location assignment.
@@ -723,18 +718,22 @@ class ClientTest : public KuduTest {
}
int CountRowsFromClient(KuduTable* table) {
- return CountRowsFromClient(table, KuduScanner::READ_LATEST, kNoBound, kNoBound);
+ return CountRowsFromClient(table, KuduScanner::READ_LATEST);
}
- int CountRowsFromClient(KuduTable* table, KuduScanner::ReadMode scan_mode,
- int32_t lower_bound, int32_t upper_bound) {
+ int CountRowsFromClient(KuduTable* table,
+ KuduScanner::ReadMode scan_mode,
+ int32_t lower_bound = kNoBound,
+ int32_t upper_bound = kNoBound) {
return CountRowsFromClient(table, KuduClient::LEADER_ONLY, scan_mode,
lower_bound, upper_bound);
}
- int CountRowsFromClient(KuduTable* table, KuduClient::ReplicaSelection selection,
+ int CountRowsFromClient(KuduTable* table,
+ KuduClient::ReplicaSelection selection,
KuduScanner::ReadMode scan_mode,
- int32_t lower_bound, int32_t upper_bound) {
+ int32_t lower_bound = kNoBound,
+ int32_t upper_bound = kNoBound) {
KuduScanner scanner(table);
CHECK_OK(scanner.SetSelection(selection));
CHECK_OK(scanner.SetProjectedColumnNames({}));
@@ -909,9 +908,6 @@ class ClientTest : public KuduTest {
shared_ptr<KuduTable> client_table_;
};
-const char *ClientTest::kTableName = "client-testtb";
-const int32_t ClientTest::kNoBound = kint32max;
-
TEST_F(ClientTest, TestClusterId) {
int leader_idx;
ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_idx));
@@ -1319,18 +1315,17 @@ TEST_P(ScanMultiTabletParamTest, Test) {
}
FlushSessionOrDie(session);
- ASSERT_EQ(4 * (kTabletsNum - 1),
- CountRowsFromClient(table.get(), read_mode, kNoBound, kNoBound));
+ ASSERT_EQ(4 * (kTabletsNum - 1), CountRowsFromClient(table.get(), read_mode));
ASSERT_EQ(3, CountRowsFromClient(table.get(), read_mode, kNoBound, 15));
- ASSERT_EQ(9, CountRowsFromClient(table.get(), read_mode, 27, kNoBound));
+ ASSERT_EQ(9, CountRowsFromClient(table.get(), read_mode, 27));
ASSERT_EQ(3, CountRowsFromClient(table.get(), read_mode, 0, 15));
ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 0, 10));
ASSERT_EQ(4, CountRowsFromClient(table.get(), read_mode, 0, 20));
ASSERT_EQ(8, CountRowsFromClient(table.get(), read_mode, 0, 30));
ASSERT_EQ(6, CountRowsFromClient(table.get(), read_mode, 14, 30));
ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 30, 30));
- ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, kTabletsNum * kRowsPerTablet,
- kNoBound));
+ ASSERT_EQ(0, CountRowsFromClient(
+ table.get(), read_mode, kTabletsNum * kRowsPerTablet));
// Update every other row
for (int i = 1; i < kTabletsNum; ++i) {
@@ -1343,18 +1338,17 @@ TEST_P(ScanMultiTabletParamTest, Test) {
FlushSessionOrDie(session);
// Check all counts the same (make sure updates don't change # of rows)
- ASSERT_EQ(4 * (kTabletsNum - 1),
- CountRowsFromClient(table.get(), read_mode, kNoBound, kNoBound));
+ ASSERT_EQ(4 * (kTabletsNum - 1), CountRowsFromClient(table.get(), read_mode));
ASSERT_EQ(3, CountRowsFromClient(table.get(), read_mode, kNoBound, 15));
- ASSERT_EQ(9, CountRowsFromClient(table.get(), read_mode, 27, kNoBound));
+ ASSERT_EQ(9, CountRowsFromClient(table.get(), read_mode, 27));
ASSERT_EQ(3, CountRowsFromClient(table.get(), read_mode, 0, 15));
ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 0, 10));
ASSERT_EQ(4, CountRowsFromClient(table.get(), read_mode, 0, 20));
ASSERT_EQ(8, CountRowsFromClient(table.get(), read_mode, 0, 30));
ASSERT_EQ(6, CountRowsFromClient(table.get(), read_mode, 14, 30));
ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 30, 30));
- ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, kTabletsNum * kRowsPerTablet,
- kNoBound));
+ ASSERT_EQ(0, CountRowsFromClient(
+ table.get(), read_mode, kTabletsNum * kRowsPerTablet));
// Delete half the rows
for (int i = 1; i < kTabletsNum; ++i) {
@@ -1368,17 +1362,17 @@ TEST_P(ScanMultiTabletParamTest, Test) {
// Check counts changed accordingly
ASSERT_EQ(2 * (kTabletsNum - 1),
- CountRowsFromClient(table.get(), read_mode, kNoBound, kNoBound));
+ CountRowsFromClient(table.get(), read_mode));
ASSERT_EQ(2, CountRowsFromClient(table.get(), read_mode, kNoBound, 15));
- ASSERT_EQ(4, CountRowsFromClient(table.get(), read_mode, 27, kNoBound));
+ ASSERT_EQ(4, CountRowsFromClient(table.get(), read_mode, 27));
ASSERT_EQ(2, CountRowsFromClient(table.get(), read_mode, 0, 15));
ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 0, 10));
ASSERT_EQ(2, CountRowsFromClient(table.get(), read_mode, 0, 20));
ASSERT_EQ(4, CountRowsFromClient(table.get(), read_mode, 0, 30));
ASSERT_EQ(2, CountRowsFromClient(table.get(), read_mode, 14, 30));
ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 30, 30));
- ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, kTabletsNum * kRowsPerTablet,
- kNoBound));
+ ASSERT_EQ(0, CountRowsFromClient(
+ table.get(), read_mode, kTabletsNum * kRowsPerTablet));
// Delete rest of rows
for (int i = 1; i < kTabletsNum; ++i) {
@@ -1391,17 +1385,17 @@ TEST_P(ScanMultiTabletParamTest, Test) {
FlushSessionOrDie(session);
// Check counts changed accordingly
- ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, kNoBound, kNoBound));
+ ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode));
ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, kNoBound, 15));
- ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 27, kNoBound));
+ ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 27));
ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 0, 15));
ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 0, 10));
ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 0, 20));
ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 0, 30));
ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 14, 30));
ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, 30, 30));
- ASSERT_EQ(0, CountRowsFromClient(table.get(), read_mode, kTabletsNum * kRowsPerTablet,
- kNoBound));
+ ASSERT_EQ(0, CountRowsFromClient(
+ table.get(), read_mode, kTabletsNum * kRowsPerTablet));
}
INSTANTIATE_TEST_SUITE_P(Params, ScanMultiTabletParamTest,
testing::ValuesIn(read_modes));
@@ -1622,14 +1616,12 @@ TEST_F(ClientTest, TestScanYourWrites) {
// achieve read-your-writes/read-your-reads.
uint64_t count = CountRowsFromClient(client_table_.get(),
KuduClient::LEADER_ONLY,
- KuduScanner::READ_YOUR_WRITES,
- kNoBound, kNoBound);
+ KuduScanner::READ_YOUR_WRITES);
ASSERT_EQ(FLAGS_test_scan_num_rows, count);
count = CountRowsFromClient(client_table_.get(),
KuduClient::CLOSEST_REPLICA,
- KuduScanner::READ_YOUR_WRITES,
- kNoBound, kNoBound);
+ KuduScanner::READ_YOUR_WRITES);
ASSERT_EQ(FLAGS_test_scan_num_rows, count);
}
@@ -4929,8 +4921,7 @@ TEST_F(ClientTest, TestReplicatedMultiTabletTableFailover) {
tries++;
int num_rows = CountRowsFromClient(table.get(),
KuduClient::LEADER_ONLY,
- KuduScanner::READ_LATEST,
- kNoBound, kNoBound);
+ KuduScanner::READ_LATEST);
int master_rpcs = CountMasterLookupRPCs() - master_rpcs_before;
// Regression test for KUDU-1387: we should not have any tight loops
@@ -4991,8 +4982,7 @@ TEST_F(ClientTest, TestReplicatedTabletWritesWithLeaderElection) {
LOG(INFO) << "Counting rows...";
ASSERT_EQ(2 * kNumRowsToWrite, CountRowsFromClient(table.get(),
KuduClient::LEADER_ONLY,
- KuduScanner::READ_LATEST,
- kNoBound, kNoBound));
+ KuduScanner::READ_LATEST));
}
namespace {
@@ -7162,7 +7152,7 @@ TEST_F(ClientTest, TxnBasicOperations) {
NO_FATALS(InsertTestRows(client_table_.get(), session.get(), kRowsNum));
ASSERT_OK(txn->Commit());
ASSERT_EQ(kRowsNum, CountRowsFromClient(
- client_table_.get(), KuduScanner::READ_YOUR_WRITES, kNoBound, kNoBound));
+ client_table_.get(), KuduScanner::READ_YOUR_WRITES));
ASSERT_EQ(0, session->CountPendingErrors());
}
#endif
@@ -7855,5 +7845,78 @@ TEST_F(ClientTestUnixSocket, TestConnectViaUnixSocket) {
ASSERT_EQ(1, total_unix_conns);
}
+class WriteRestartTest : public ClientTest {
+ public:
+ void SetUp() override {
+ KuduTest::SetUp();
+
+ // Start minicluster and wait for tablet servers to connect to master.
+ InternalMiniClusterOptions options;
+ options.num_tablet_servers = 3;
+ cluster_.reset(new InternalMiniCluster(env_, std::move(options)));
+ ASSERT_OK(cluster_->StartSync());
+
+ // Scenarios of this test might require multiple retries from the client if
+ // running on a slow or overloaded machine. The timeout for RPC operations
+ // is set higher than the default to avoid false positives.
+ KuduClientBuilder builder;
+ builder.default_admin_operation_timeout(MonoDelta::FromSeconds(60));
+ builder.default_rpc_timeout(MonoDelta::FromSeconds(60));
+ ASSERT_OK(cluster_->CreateClient(&builder, &client_));
+
+ unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+ ASSERT_OK(table_creator->table_name(kTableName)
+ .schema(&schema_)
+ .add_hash_partitions({ "key" }, 2)
+ .num_replicas(3)
+ .Create());
+ ASSERT_OK(client_->OpenTable(kTableName, &client_table_));
+ }
+};
+
+// Restart one tablet server in a round-robin fashion with every row written,
+// not waiting for the tablet server to be up and running before trying
+// to write the next row. Count the number of rows once done. There should be
+// no errors: client should retry any operations failed due to tablet server
+// restarting. The result row count should match the number of total rows
+// written by the client.
+TEST_F(ClientTest, WriteWhileRestartingMultipleTabletServers) {
+ SKIP_IF_SLOW_NOT_ALLOWED();
+
+ constexpr const auto read_mode_to_string =
+ [](KuduScanner::ReadMode mode) constexpr {
+ switch (mode) {
+ case KuduScanner::READ_LATEST:
+ return "READ_LATEST";
+ case KuduScanner::READ_AT_SNAPSHOT:
+ return "READ_AT_SNAPSHOT";
+ case KuduScanner::READ_YOUR_WRITES:
+ return "READ_YOUR_WRITES";
+ default:
+ return "UNKNOWN";
+ }
+ };
+
+ shared_ptr<KuduSession> session = client_->NewSession();
+ ASSERT_OK(session->SetFlushMode(KuduSession::AUTO_FLUSH_SYNC));
+
+ static constexpr auto kNumRows = 32;
+ const auto num_servers = cluster_->num_tablet_servers();
+ int64_t key = 0;
+ for (auto row_idx = 0; row_idx < kNumRows; ++row_idx) {
+ NO_FATALS(InsertTestRows(client_table_.get(), session.get(), 1, key++));
+ auto* ts = cluster_->mini_tablet_server(row_idx % num_servers);
+ ts->Shutdown();
+ ASSERT_OK(ts->Restart());
+ }
+ for (auto mode : {KuduScanner::READ_LATEST, KuduScanner::READ_YOUR_WRITES}) {
+ SCOPED_TRACE(Substitute("read mode $0", read_mode_to_string(mode)));
+ auto row_count = CountRowsFromClient(client_table_.get(),
+ KuduClient::LEADER_ONLY,
+ mode);
+ ASSERT_EQ(kNumRows, row_count);
+ }
+}
+
} // namespace client
} // namespace kudu