You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2020/01/29 19:07:04 UTC

[kudu] 02/02: [tests] fix flakiness in MasterReplicationAndRpcSizeLimitTest

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit ce73377a42c92edd1677a842af9598ccdfbd39ae
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Tue Jan 28 22:41:38 2020 -0800

    [tests] fix flakiness in MasterReplicationAndRpcSizeLimitTest
    
    Fixed rare flakiness in MasterReplicationAndRpcSizeLimitTest.AlterTable
    scenario.  In essence, the master leadership might change after the
    ALTER TABLE request but before collecting the metric value.
    
    Change-Id: Ib9c9207271826381995da3840c1e6bf9e0a6cbcd
    Reviewed-on: http://gerrit.cloudera.org:8080/15126
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
---
 .../integration-tests/master_replication-itest.cc  | 35 +++++++++++++++-------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/src/kudu/integration-tests/master_replication-itest.cc b/src/kudu/integration-tests/master_replication-itest.cc
index 1493f5a..997f52b 100644
--- a/src/kudu/integration-tests/master_replication-itest.cc
+++ b/src/kudu/integration-tests/master_replication-itest.cc
@@ -76,6 +76,7 @@ using kudu::cluster::ExternalMiniClusterOptions;
 using kudu::cluster::InternalMiniCluster;
 using kudu::cluster::InternalMiniClusterOptions;
 using kudu::consensus::ReplicaManagementInfoPB;
+using kudu::itest::GetInt64Metric;
 using std::string;
 using std::unique_ptr;
 using std::vector;
@@ -453,13 +454,12 @@ class MasterReplicationAndRpcSizeLimitTest : public KuduTest {
   Status GetMetricValue(const MetricPrototype& metric_proto, int64_t* value) {
     int leader_idx;
     RETURN_NOT_OK(cluster_->GetLeaderMasterIndex(&leader_idx));
-    return itest::GetInt64Metric(
-          cluster_->master(leader_idx)->bound_http_hostport(),
-          &METRIC_ENTITY_server,
-          nullptr,
-          &metric_proto,
-          "value",
-          value);
+    return GetInt64Metric(cluster_->master(leader_idx)->bound_http_hostport(),
+                          &METRIC_ENTITY_server,
+                          nullptr,
+                          &metric_proto,
+                          "value",
+                          value);
   }
 
   unique_ptr<cluster::ExternalMiniCluster> cluster_;
@@ -493,14 +493,29 @@ TEST_F(MasterReplicationAndRpcSizeLimitTest, AlterTable) {
   }
   auto s = alterer->timeout(MonoDelta::FromSeconds(30))->Alter();
 
-  // The DDL attempt above (i.e. the Alter() call) produces an oversided write
+  // The DDL attempt above (i.e. the Alter() call) produces an oversized write
   // request to the system catalog tablet. The request should have been rejected
   // and the corresponding metric incremented.
   ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
   ASSERT_STR_CONTAINS(s.ToString(), "too large for current setting of the "
                                     "--rpc_max_message_size flag");
-  ASSERT_OK(GetMetricValue(METRIC_sys_catalog_oversized_write_requests, &val));
-  ASSERT_EQ(1, val);
+
+  // Leader master can change after the ALTER TABLE request above and the time
+  // when collecting the metric value below. To avoid flakiness, get the
+  // readings for the metric from each master and verify the result.
+  int64_t sum = 0;
+  for (auto idx = 0; idx < kNumMasters; ++idx) {
+    int64_t val;
+    ASSERT_OK(GetInt64Metric(cluster_->master(idx)->bound_http_hostport(),
+                             &METRIC_ENTITY_server,
+                             nullptr,
+                             &METRIC_sys_catalog_oversized_write_requests,
+                             "value",
+                             &val));
+    ASSERT_LE(val, 1);
+    sum += val;
+  }
+  ASSERT_EQ(1, sum);
 
   NO_FATALS(cluster_->AssertNoCrashes());
 }