You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jy...@apache.org on 2016/09/15 23:58:54 UTC

[2/2] incubator-impala git commit: IMPALA-3912: test_random_rpc_timeout is flaky.

IMPALA-3912: test_random_rpc_timeout is flaky.

Datastream sender default timeout is 2 mins which could
block some fragments to complete until the timeout and
cause the metric "num-fragments-in-flight" not back to 0
after 60 seconds.
Decrease the sender timeout to 30 seconds and adding
some logging.

Change-Id: I19f8b3fea66c5a0398e3476a46f060be9f951983
Reviewed-on: http://gerrit.cloudera.org:8080/4080
Reviewed-by: Juan Yu <jy...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/9b3f43b9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/9b3f43b9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/9b3f43b9

Branch: refs/heads/master
Commit: 9b3f43b9fd006fe5d72281b0862abe42e0a0a061
Parents: a42d18d
Author: Juan Yu <jy...@cloudera.com>
Authored: Mon Aug 22 11:49:18 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Thu Sep 15 21:46:45 2016 +0000

----------------------------------------------------------------------
 be/src/service/fragment-mgr.cc           |  2 ++
 tests/custom_cluster/test_rpc_timeout.py | 11 +++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9b3f43b9/be/src/service/fragment-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/service/fragment-mgr.cc b/be/src/service/fragment-mgr.cc
index 5ae5845..a98bbf9 100644
--- a/be/src/service/fragment-mgr.cc
+++ b/be/src/service/fragment-mgr.cc
@@ -100,6 +100,8 @@ void FragmentMgr::FragmentThread(TUniqueId fragment_instance_id) {
   // the fragment exec state.
   ImpaladMetrics::IMPALA_SERVER_NUM_FRAGMENTS_IN_FLIGHT->Increment(-1L);
 
+  VLOG_QUERY << "PlanFragment completed. instance_id=" << fragment_instance_id;
+
 #ifndef ADDRESS_SANITIZER
   // tcmalloc and address sanitizer can not be used together
   if (FLAGS_log_mem_usage_interval > 0) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9b3f43b9/tests/custom_cluster/test_rpc_timeout.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_rpc_timeout.py b/tests/custom_cluster/test_rpc_timeout.py
index 76eb3d6..4ea5351 100644
--- a/tests/custom_cluster/test_rpc_timeout.py
+++ b/tests/custom_cluster/test_rpc_timeout.py
@@ -78,7 +78,8 @@ class TestRPCTimeout(CustomClusterTestSuite):
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args("--backend_client_rpc_timeout_ms=1000"
-      " --fault_injection_rpc_delay_ms=3000 --fault_injection_rpc_type=1")
+      " --fault_injection_rpc_delay_ms=3000 --fault_injection_rpc_type=1"
+      " --datastream_sender_timeout_ms=30000")
   def test_execplanfragment_timeout(self, vector):
     for i in range(3):
       ex= self.execute_query_expect_failure(self.client, self.TEST_QUERY)
@@ -91,7 +92,8 @@ class TestRPCTimeout(CustomClusterTestSuite):
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args("--backend_client_rpc_timeout_ms=1000"
-      " --fault_injection_rpc_delay_ms=3000 --fault_injection_rpc_type=2")
+      " --fault_injection_rpc_delay_ms=3000 --fault_injection_rpc_type=2"
+      " --datastream_sender_timeout_ms=30000")
   def test_cancelplanfragment_timeout(self, vector):
     query = "select * from tpch.lineitem limit 5000"
     self.execute_query_then_cancel(query, vector)
@@ -117,12 +119,13 @@ class TestRPCTimeout(CustomClusterTestSuite):
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args("--backend_client_rpc_timeout_ms=1000"
       " --fault_injection_rpc_delay_ms=3000 --fault_injection_rpc_type=6"
-      " --status_report_interval=1 ")
+      " --status_report_interval=1")
   def test_reportexecstatus_timeout(self, vector):
     self.execute_query_verify_metrics(self.TEST_QUERY)
 
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args("--backend_client_rpc_timeout_ms=1000"
-      " --fault_injection_rpc_delay_ms=3000 --fault_injection_rpc_type=7")
+      " --fault_injection_rpc_delay_ms=3000 --fault_injection_rpc_type=7"
+      " --datastream_sender_timeout_ms=30000")
   def test_random_rpc_timeout(self, vector):
     self.execute_query_verify_metrics(self.TEST_QUERY, 10)