You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/02/22 17:34:48 UTC
[impala] 05/13: IMPALA-5202: Disallow PREPARE:WAIT debug action
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch 2.x
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 98e0126e95c7901665eec36dbbf5e1e5b3a6ee42
Author: Dan Hecht <dh...@cloudera.com>
AuthorDate: Wed Jun 20 14:49:57 2018 -0700
IMPALA-5202: Disallow PREPARE:WAIT debug action
In order to simplify FIS startup, we don't allow cancellation until all
FIS have finished Prepare(), so we shouldn't allow PREPARE:WAIT since
there will be no way to cancel out of the loop. Make this explicit.
Change-Id: I1caa4f8e6ce7f32a8a3722648e08e24f34dba35d
Reviewed-on: http://gerrit.cloudera.org:8080/10776
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exec/exec-node.cc | 2 ++
be/src/runtime/debug-options.cc | 11 ++++++++---
tests/failure/test_failpoints.py | 8 +++++---
3 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/be/src/exec/exec-node.cc b/be/src/exec/exec-node.cc
index 504a419..7cdd054 100644
--- a/be/src/exec/exec-node.cc
+++ b/be/src/exec/exec-node.cc
@@ -408,6 +408,8 @@ Status ExecNode::ExecDebugActionImpl(TExecNodePhase::type phase, RuntimeState* s
if (debug_options_.action == TDebugAction::FAIL) {
return Status(TErrorCode::INTERNAL_ERROR, "Debug Action: FAIL");
} else if (debug_options_.action == TDebugAction::WAIT) {
+ // See DebugOptions::DebugOptions().
+ DCHECK(phase != TExecNodePhase::PREPARE && phase != TExecNodePhase::CLOSE);
while (!state->is_cancelled()) {
sleep(1);
}
diff --git a/be/src/runtime/debug-options.cc b/be/src/runtime/debug-options.cc
index 6e5062d..b8b6b35 100644
--- a/be/src/runtime/debug-options.cc
+++ b/be/src/runtime/debug-options.cc
@@ -65,9 +65,14 @@ DebugOptions::DebugOptions(const TQueryOptions& query_options)
phase_ = TExecNodePhase::INVALID;
return;
}
- if (phase_ == TExecNodePhase::CLOSE && action_ == TDebugAction::WAIT) {
- LOG(WARNING) << "Do not use CLOSE:WAIT debug actions because nodes cannot be "
- "cancelled in Close()";
+ if ((phase_ == TExecNodePhase::PREPARE || phase_ == TExecNodePhase::CLOSE) &&
+ action_ == TDebugAction::WAIT) {
+ // Cancellation is not allowed before PREPARE phase completes (to keep FIS
+ // startup simpler - it will be checked immediately after when execution begins).
+ // Close() must always run to completion to free resources, so cancellation does
+ // not make sense for the CLOSE phase and is not checked. So we disallow these.
+ LOG(WARNING) << Substitute("Do not use $0:$1 debug actions because nodes cannot "
+ "be cancelled in that phase.", *phase_str, *action_str);
phase_ = TExecNodePhase::INVALID;
return;
}
diff --git a/tests/failure/test_failpoints.py b/tests/failure/test_failpoints.py
index 9f54257..7366900 100644
--- a/tests/failure/test_failpoints.py
+++ b/tests/failure/test_failpoints.py
@@ -75,6 +75,11 @@ class TestFailpoints(ImpalaTestSuite):
cls.ImpalaTestMatrix.add_dimension(
create_exec_option_dimension([0], [False], [0]))
+ # Don't create PREPARE:WAIT debug actions because cancellation is not checked until
+ # after the prepare phase once execution is started.
+ cls.ImpalaTestMatrix.add_constraint(
+ lambda v: not (v.get_value('action') == 'CANCEL'
+ and v.get_value('location') == 'PREPARE'))
# Don't create CLOSE:WAIT debug actions to avoid leaking plan fragments (there's no
# way to cancel a plan fragment once Close() has been called)
cls.ImpalaTestMatrix.add_constraint(
@@ -90,9 +95,6 @@ class TestFailpoints(ImpalaTestSuite):
location = vector.get_value('location')
vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
- if action == "CANCEL" and location == "PREPARE":
- pytest.xfail(reason="IMPALA-5202 leads to a hang.")
-
try:
plan_node_ids = self.__parse_plan_nodes_from_explain(query, vector)
except ImpalaBeeswaxException as e: