You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2020/01/17 01:29:59 UTC

[kudu] branch master updated: KUDU-3042: Fix invalid DCHECK when rpc is cancelled then times out

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 0bd417b  KUDU-3042: Fix invalid DCHECK when rpc is cancelled then times out
0bd417b is described below

commit 0bd417b40e4dfea9bb454414c836ce589a16ca5d
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
AuthorDate: Tue Jan 14 12:57:30 2020 -0800

    KUDU-3042: Fix invalid DCHECK when rpc is cancelled then times out
    
    The timeout handling code in Connection::HandleOutboundCallTimeout
    assumes that when a timeout is triggered the OutboundCall must still
    be valid. In the case of an asynchronous rpc that was cancelled before
    the timeout was hit, this will not be the case.
    
    The fix is to check if the OutboundCall is still valid and return
    without processing the timeout if it is not.
    
    Testing:
    - Added timeout tests to TestCancellation to inject timeouts at
      OutboundCall states from READY to SENT.
    - Tested in Impala in the context of implementing IMPALA-8712.
    
    Change-Id: I5b97ccc4787809fca8fbc991bb1151940477efca
    Reviewed-on: http://gerrit.cloudera.org:8080/15032
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
 src/kudu/rpc/connection.cc   |  5 ++++-
 src/kudu/rpc/rpc-test-base.h | 20 +++++++++++++-------
 src/kudu/rpc/rpc-test.cc     |  6 +++++-
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/src/kudu/rpc/connection.cc b/src/kudu/rpc/connection.cc
index a15d482..0b78d46 100644
--- a/src/kudu/rpc/connection.cc
+++ b/src/kudu/rpc/connection.cc
@@ -376,7 +376,10 @@ void Connection::CallAwaitingResponse::HandleTimeout(ev::timer &watcher, int rev
 
 void Connection::HandleOutboundCallTimeout(CallAwaitingResponse *car) {
   DCHECK(reactor_thread_->IsCurrentThread());
-  DCHECK(car->call);
+  if (!car->call) {
+    // The RPC may have been cancelled before the timeout was hit.
+    return;
+  }
   // The timeout timer is stopped by the car destructor exiting Connection::HandleCallResponse()
   DCHECK(!car->call->IsFinished());
 
diff --git a/src/kudu/rpc/rpc-test-base.h b/src/kudu/rpc/rpc-test-base.h
index 99e36ff..0b6f047 100644
--- a/src/kudu/rpc/rpc-test-base.h
+++ b/src/kudu/rpc/rpc-test-base.h
@@ -533,9 +533,10 @@ class RpcTestBase : public KuduTest {
     CHECK_OK(DoTestOutgoingSidecar(p, size1, size2));
   }
 
-  void DoTestExpectTimeout(const Proxy& p,
-                           const MonoDelta& timeout,
-                           bool* is_negotiaton_error = nullptr) {
+  static void DoTestExpectTimeout(const Proxy& p,
+                                  const MonoDelta& timeout,
+                                  bool will_be_cancelled = false,
+                                  bool* is_negotiaton_error = nullptr) {
     SleepRequestPB req;
     SleepResponsePB resp;
     // Sleep for 500ms longer than the call timeout.
@@ -554,13 +555,18 @@ class RpcTestBase : public KuduTest {
     }
 
     int expected_millis = timeout.ToMilliseconds();
-    int elapsed_millis = sw.elapsed().wall_millis();
+    int elapsed_millis = static_cast<int>(sw.elapsed().wall_millis());
 
-    // We shouldn't timeout significantly faster than our configured timeout.
-    EXPECT_GE(elapsed_millis, expected_millis - 10);
+    // We shouldn't timeout significantly faster than our configured timeout, unless the
+    // rpc is cancelled.
+    if (!will_be_cancelled) EXPECT_GE(elapsed_millis, expected_millis - 10);
     // And we also shouldn't take the full time that we asked for
     EXPECT_LT(elapsed_millis * 1000, sleep_micros);
-    EXPECT_TRUE(s.IsTimedOut());
+    if (will_be_cancelled) {
+      EXPECT_TRUE(s.IsAborted());
+    } else {
+      EXPECT_TRUE(s.IsTimedOut());
+    }
     LOG(INFO) << "status: " << s.ToString() << ", seconds elapsed: " << sw.elapsed().wall_seconds();
   }
 
diff --git a/src/kudu/rpc/rpc-test.cc b/src/kudu/rpc/rpc-test.cc
index 839198c..4258de2 100644
--- a/src/kudu/rpc/rpc-test.cc
+++ b/src/kudu/rpc/rpc-test.cc
@@ -1016,7 +1016,7 @@ TEST_F(TestRpc, TestNegotiationTimeout) {
 
   bool is_negotiation_error = false;
   NO_FATALS(DoTestExpectTimeout(
-      p, MonoDelta::FromMilliseconds(100), &is_negotiation_error));
+      p, MonoDelta::FromMilliseconds(100), false, &is_negotiation_error));
   EXPECT_TRUE(is_negotiation_error);
 
   acceptor_thread->Join();
@@ -1305,6 +1305,7 @@ TEST_P(TestRpc, TestCancellation) {
   Proxy p(client_messenger, server_addr, server_addr.host(),
           GenericCalculatorService::static_service_name());
 
+  int timeout_ms = 10;
   for (int i = OutboundCall::READY; i <= OutboundCall::FINISHED_SUCCESS; ++i) {
     FLAGS_rpc_inject_cancellation_state = i;
     switch (i) {
@@ -1315,6 +1316,7 @@ TEST_P(TestRpc, TestCancellation) {
         ASSERT_TRUE(DoTestOutgoingSidecar(p, 0, 0).IsAborted());
         ASSERT_TRUE(DoTestOutgoingSidecar(p, 123, 456).IsAborted());
         ASSERT_TRUE(DoTestOutgoingSidecar(p, 3000 * 1024, 2000 * 1024).IsAborted());
+        DoTestExpectTimeout(p, MonoDelta::FromMilliseconds(timeout_ms), true);
         break;
       case OutboundCall::NEGOTIATION_TIMED_OUT:
       case OutboundCall::TIMED_OUT:
@@ -1342,6 +1344,8 @@ TEST_P(TestRpc, TestCancellation) {
         break;
     }
   }
+  // Sleep briefly to ensure the timeout tests have a chance for the timeouts to trigger.
+  SleepFor(MonoDelta::FromMilliseconds(timeout_ms * 2));
   client_messenger->Shutdown();
 }