You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2016/08/30 06:25:34 UTC

[1/3] kudu git commit: raft_consensus-itest: inserter thread should FATAL instead of FAIL

Repository: kudu
Updated Branches:
  refs/heads/master ed216bcdf -> 0d18a0b02


raft_consensus-itest: inserter thread should FATAL instead of FAIL

This test has a thread which inserts rows and is supposed to fail the
test case if it sees any row errors. Failing the test using FAIL() from
a non-main thread is not thread-safe in gtest. Furthermore, FAIL() acts
as a 'return' and thus the latch used to communicate the thread
completion never gets fired. So, when this assertion failed, the test
would hang forever.

This was a regression caused by d0cff255f84e75b70c0c39ccd34a35f348e3c722
which changed the code from a CHECK(...) to a FAIL().

In order to correct this behavior, this patch switches to using the
utility method FlushSessionOrDie() which has an identical
implementation.

Change-Id: I9dbe1e551b7f1b8b81ce0156627deb096db5112b
Reviewed-on: http://gerrit.cloudera.org:8080/4140
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Reviewed-by: Mike Percy <mp...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/f78d1c84
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/f78d1c84
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/f78d1c84

Branch: refs/heads/master
Commit: f78d1c840a703d0f36c2bcf299b87d44f2a9fb0e
Parents: ed216bc
Author: Todd Lipcon <to...@apache.org>
Authored: Fri Aug 26 17:02:54 2016 -0700
Committer: Mike Percy <mp...@apache.org>
Committed: Tue Aug 30 06:24:53 2016 +0000

----------------------------------------------------------------------
 .../integration-tests/raft_consensus-itest.cc   | 22 +-------------------
 1 file changed, 1 insertion(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/f78d1c84/src/kudu/integration-tests/raft_consensus-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc b/src/kudu/integration-tests/raft_consensus-itest.cc
index 8d6553d..9fc74ef 100644
--- a/src/kudu/integration-tests/raft_consensus-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus-itest.cc
@@ -226,29 +226,9 @@ class RaftConsensusITest : public TabletServerIntegrationTestBase {
         CHECK_OK(session->Apply(insert.release()));
       }
 
-      // We don't handle write idempotency yet. (i.e making sure that when a leader fails
-      // writes to it that were eventually committed by the new leader but un-ackd to the
-      // client are not retried), so some errors are expected.
-      // It's OK as long as the errors are Status::AlreadyPresent();
+      FlushSessionOrDie(session);
 
       int inserted = last_row_in_batch - first_row_in_batch;
-
-      Status s = session->Flush();
-      if (PREDICT_FALSE(!s.ok())) {
-        std::vector<client::KuduError*> errors;
-        ElementDeleter d(&errors);
-        bool overflow;
-        session->GetPendingErrors(&errors, &overflow);
-        CHECK(!overflow);
-        if (!errors.empty()) {
-          for (const client::KuduError* e : errors) {
-            LOG(ERROR) << "Unexpected error: " << e->status().ToString();
-          }
-          FAIL() << "Found errors while inserting.";
-        }
-        inserted -= errors.size();
-      }
-
       for (CountDownLatch* latch : latches) {
         latch->CountDown(inserted);
       }


[2/3] kudu git commit: client-test: remove an unnecessary manual leader election

Posted by mp...@apache.org.
client-test: remove an unnecessary manual leader election

One test case predates the addition of automatic leader election to
Kudu, and was manually triggering a leader election after killing a
node. This is no longer necessary, so the test can be simplified.

Change-Id: I943d331d4d8afed9d7d929df1df05aa2f3cf454e
Reviewed-on: http://gerrit.cloudera.org:8080/4141
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mp...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/5e368488
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/5e368488
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/5e368488

Branch: refs/heads/master
Commit: 5e368488b10b28444e03517283beb8ecb958573d
Parents: f78d1c8
Author: Todd Lipcon <to...@apache.org>
Authored: Fri Aug 26 17:33:05 2016 -0700
Committer: Mike Percy <mp...@apache.org>
Committed: Tue Aug 30 06:24:58 2016 +0000

----------------------------------------------------------------------
 src/kudu/client/client-test.cc | 40 -------------------------------------
 1 file changed, 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/5e368488/src/kudu/client/client-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index 26ba4c3..1af7c59 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -38,7 +38,6 @@
 #include "kudu/client/write_op.h"
 #include "kudu/common/partial_row.h"
 #include "kudu/common/wire_protocol.h"
-#include "kudu/consensus/consensus.proxy.h"
 #include "kudu/gutil/atomicops.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/stl_util.h"
@@ -2560,8 +2559,6 @@ TEST_F(ClientTest, TestReplicatedMultiTabletTableFailover) {
 // tablet dies.
 // This currently forces leader promotion through RPC and creates
 // a new client afterwards.
-// TODO Remove the leader promotion part when we have automated
-// leader election.
 TEST_F(ClientTest, TestReplicatedTabletWritesWithLeaderElection) {
   const string kReplicatedTable = "replicated_failover_on_writes";
   const int kNumRowsToWrite = 100;
@@ -2595,43 +2592,6 @@ TEST_F(ClientTest, TestReplicatedTabletWritesWithLeaderElection) {
   // Kill the tserver that is serving the leader tablet.
   ASSERT_OK(KillTServer(killed_uuid));
 
-  // Since we waited before, hopefully all replicas will be up to date
-  // and we can just promote another replica.
-  std::shared_ptr<rpc::Messenger> client_messenger;
-  rpc::MessengerBuilder bld("client");
-  ASSERT_OK(bld.Build(&client_messenger));
-  gscoped_ptr<consensus::ConsensusServiceProxy> new_leader_proxy;
-
-  int new_leader_idx = -1;
-  for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
-    MiniTabletServer* ts = cluster_->mini_tablet_server(i);
-    if (ts->is_started()) {
-      const string& uuid = ts->server()->instance_pb().permanent_uuid();
-      if (uuid != killed_uuid) {
-        new_leader_idx = i;
-        break;
-      }
-    }
-  }
-  ASSERT_NE(-1, new_leader_idx);
-
-  MiniTabletServer* new_leader = cluster_->mini_tablet_server(new_leader_idx);
-  ASSERT_TRUE(new_leader != nullptr);
-  new_leader_proxy.reset(
-      new consensus::ConsensusServiceProxy(client_messenger,
-                                           new_leader->bound_rpc_addr()));
-
-  consensus::RunLeaderElectionRequestPB req;
-  consensus::RunLeaderElectionResponsePB resp;
-  rpc::RpcController controller;
-
-  LOG(INFO) << "Promoting server at index " << new_leader_idx << " listening at "
-            << new_leader->bound_rpc_addr().ToString() << " ...";
-  req.set_dest_uuid(new_leader->server()->fs_manager()->uuid());
-  req.set_tablet_id(rt->tablet_id());
-  ASSERT_OK(new_leader_proxy->RunLeaderElection(req, &resp, &controller));
-  ASSERT_FALSE(resp.has_error()) << "Got error. Response: " << resp.ShortDebugString();
-
   LOG(INFO) << "Inserting additional rows...";
   ASSERT_NO_FATAL_FAILURE(InsertTestRows(client_.get(),
                                          table.get(),


[3/3] kudu git commit: raft_consensus-itest: workaround flakiness due to KUDU-1580

Posted by mp...@apache.org.
raft_consensus-itest: workaround flakiness due to KUDU-1580

KUDU-1580 is a bug in which the client doesn't properly fail over in the
case that the RPC connection negotiation times out. In the
MultiThreadedInsertWithFailovers test case, the client is frequently
timing out with such errors causing flakiness.

This workaround just bumps the connection negotiation timeout.

Change-Id: If8f375654b954701ab8d23c07bd133ba393e222d
Reviewed-on: http://gerrit.cloudera.org:8080/4142
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mp...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/0d18a0b0
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/0d18a0b0
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/0d18a0b0

Branch: refs/heads/master
Commit: 0d18a0b026f850ded6647814e63183d897616535
Parents: 5e36848
Author: Todd Lipcon <to...@apache.org>
Authored: Fri Aug 26 17:37:42 2016 -0700
Committer: Mike Percy <mp...@apache.org>
Committed: Tue Aug 30 06:25:06 2016 +0000

----------------------------------------------------------------------
 src/kudu/integration-tests/raft_consensus-itest.cc | 5 +++++
 1 file changed, 5 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/0d18a0b0/src/kudu/integration-tests/raft_consensus-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc b/src/kudu/integration-tests/raft_consensus-itest.cc
index 9fc74ef..387c048 100644
--- a/src/kudu/integration-tests/raft_consensus-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus-itest.cc
@@ -50,6 +50,7 @@ DEFINE_int64(client_inserts_per_thread, 50,
 DEFINE_int64(client_num_batches_per_thread, 5,
              "In how many batches to group the rows, for each client");
 DECLARE_int32(consensus_rpc_timeout_ms);
+DECLARE_int64(rpc_negotiation_timeout_ms);
 
 METRIC_DECLARE_entity(tablet);
 METRIC_DECLARE_counter(transaction_memory_pressure_rejections);
@@ -932,6 +933,10 @@ TEST_F(RaftConsensusITest, MultiThreadedInsertWithFailovers) {
   // Reset consensus rpc timeout to the default value or the election might fail often.
   FLAGS_consensus_rpc_timeout_ms = 1000;
 
+  // TODO(KUDU-1580): this test seems to frequently trigger RPC negotiation timeouts,
+  // and the client doesn't properly fail over in this case.
+  FLAGS_rpc_negotiation_timeout_ms = 10000;
+
   // Start a 7 node configuration cluster (since we can't bring leaders back we start with a
   // higher replica count so that we kill more leaders).