You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/02/22 17:34:48 UTC

[impala] 05/13: IMPALA-5202: Disallow PREPARE:WAIT debug action

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch 2.x
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 98e0126e95c7901665eec36dbbf5e1e5b3a6ee42
Author: Dan Hecht <dh...@cloudera.com>
AuthorDate: Wed Jun 20 14:49:57 2018 -0700

    IMPALA-5202: Disallow PREPARE:WAIT debug action
    
    In order to simplify FIS startup, we don't allow cancellation until all
    FIS have finished Prepare(), so we shouldn't allow PREPARE:WAIT since
    there will be no way to cancel out of the loop.  Make this explicit.
    
    Change-Id: I1caa4f8e6ce7f32a8a3722648e08e24f34dba35d
    Reviewed-on: http://gerrit.cloudera.org:8080/10776
    Reviewed-by: Dan Hecht <dh...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/exec-node.cc         |  2 ++
 be/src/runtime/debug-options.cc  | 11 ++++++++---
 tests/failure/test_failpoints.py |  8 +++++---
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/be/src/exec/exec-node.cc b/be/src/exec/exec-node.cc
index 504a419..7cdd054 100644
--- a/be/src/exec/exec-node.cc
+++ b/be/src/exec/exec-node.cc
@@ -408,6 +408,8 @@ Status ExecNode::ExecDebugActionImpl(TExecNodePhase::type phase, RuntimeState* s
   if (debug_options_.action == TDebugAction::FAIL) {
     return Status(TErrorCode::INTERNAL_ERROR, "Debug Action: FAIL");
   } else if (debug_options_.action == TDebugAction::WAIT) {
+    // See DebugOptions::DebugOptions().
+    DCHECK(phase != TExecNodePhase::PREPARE && phase != TExecNodePhase::CLOSE);
     while (!state->is_cancelled()) {
       sleep(1);
     }
diff --git a/be/src/runtime/debug-options.cc b/be/src/runtime/debug-options.cc
index 6e5062d..b8b6b35 100644
--- a/be/src/runtime/debug-options.cc
+++ b/be/src/runtime/debug-options.cc
@@ -65,9 +65,14 @@ DebugOptions::DebugOptions(const TQueryOptions& query_options)
       phase_ = TExecNodePhase::INVALID;
       return;
     }
-    if (phase_ == TExecNodePhase::CLOSE && action_ == TDebugAction::WAIT) {
-      LOG(WARNING) << "Do not use CLOSE:WAIT debug actions because nodes cannot be "
-          "cancelled in Close()";
+    if ((phase_ == TExecNodePhase::PREPARE || phase_ == TExecNodePhase::CLOSE) &&
+        action_ == TDebugAction::WAIT) {
+      // Cancellation is not allowed before PREPARE phase completes (to keep FIS
+      // startup simpler - it will be checked immediately after when execution begins).
+      // Close() must always run to completion to free resources, so cancellation does
+      // not make sense for the CLOSE phase and is not checked. So we disallow these.
+      LOG(WARNING) << Substitute("Do not use $0:$1 debug actions because nodes cannot "
+          "be cancelled in that phase.", *phase_str, *action_str);
       phase_ = TExecNodePhase::INVALID;
       return;
     }
diff --git a/tests/failure/test_failpoints.py b/tests/failure/test_failpoints.py
index 9f54257..7366900 100644
--- a/tests/failure/test_failpoints.py
+++ b/tests/failure/test_failpoints.py
@@ -75,6 +75,11 @@ class TestFailpoints(ImpalaTestSuite):
     cls.ImpalaTestMatrix.add_dimension(
         create_exec_option_dimension([0], [False], [0]))
 
+    # Don't create PREPARE:WAIT debug actions because cancellation is not checked until
+    # after the prepare phase once execution is started.
+    cls.ImpalaTestMatrix.add_constraint(
+        lambda v: not (v.get_value('action') == 'CANCEL'
+                     and v.get_value('location') == 'PREPARE'))
     # Don't create CLOSE:WAIT debug actions to avoid leaking plan fragments (there's no
     # way to cancel a plan fragment once Close() has been called)
     cls.ImpalaTestMatrix.add_constraint(
@@ -90,9 +95,6 @@ class TestFailpoints(ImpalaTestSuite):
     location = vector.get_value('location')
     vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
 
-    if action == "CANCEL" and location == "PREPARE":
-      pytest.xfail(reason="IMPALA-5202 leads to a hang.")
-
     try:
       plan_node_ids = self.__parse_plan_nodes_from_explain(query, vector)
     except ImpalaBeeswaxException as e: