You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2020/08/28 16:54:01 UTC
[impala] branch master updated (568b339 -> 3733c4c)
This is an automated email from the ASF dual-hosted git repository.
stakiar pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.
from 568b339 IMPALA-10080: Skip loading HDFS cache pools for non-HDFS file systems
new 34668fa IMPALA-10092: Do not skip test vectors of Kudu tests in a custom cluster
new 3733c4c IMPALA-10050: Fixed DCHECK error for backend in terminal state.
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
be/src/runtime/coordinator.cc | 7 ++++++-
be/src/runtime/query-exec-mgr.cc | 20 +++++++++++---------
be/src/runtime/query-state.cc | 3 +++
tests/common/custom_cluster_test_suite.py | 1 -
tests/custom_cluster/test_kudu.py | 26 +++++++++++++++++++++++---
5 files changed, 43 insertions(+), 14 deletions(-)
[impala] 01/02: IMPALA-10092: Do not skip test vectors of Kudu
tests in a custom cluster
Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
stakiar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 34668fab878c224632710670618b3f0176cbc78d
Author: Fang-Yu Rao <fa...@cloudera.com>
AuthorDate: Thu Aug 20 14:51:15 2020 -0700
IMPALA-10092: Do not skip test vectors of Kudu tests in a custom cluster
We found that the following 4 tests do not run even we remove all the
decorators like "@SkipIfKudu.no_hybrid_clock" or
"@SkipIfHive3.kudu_hms_notifications_not_supported" to skip the tests.
This is due to the fact that those 3 classes inherit the class of
CustomClusterTestSuite, which adds a constraint that only allows test
vectors with 'file_format' and 'compression_codec' being "text" and
"none", respectively, to be run.
1. TestKuduOperations::test_local_tz_conversion_ops
2. TestKuduClientTimeout::test_impalad_timeout
3. TestKuduHMSIntegration::test_create_managed_kudu_tables
4. TestKuduHMSIntegration::test_kudu_alter_table
To address this issue, in this patch we create a parent class for those
3 classes above and override the method of
add_custom_cluster_constraints() for this newly created parent class so
that we do not skip test vectors with 'file_format' and
'compression_codec' being "kudu" and "none", respectively.
On the other hand, this patch also removes a redundant method call to
super(CustomClusterTestSuite, cls).add_test_dimensions() in
CustomClusterTestSuite.add_custom_cluster_constraints() since
super(CustomClusterTestSuite, cls).add_test_dimensions() had
already been called immediately before the call to
add_custom_cluster_constraints() in
CustomClusterTestSuite.add_test_dimensions().
Testing:
- Manually verified that after removing the decorators to skip those
tests, those tests could be run.
Change-Id: I60a4bd4ac5a9026629fb840ab9cc7b5f9948290c
Reviewed-on: http://gerrit.cloudera.org:8080/16348
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
tests/common/custom_cluster_test_suite.py | 1 -
tests/custom_cluster/test_kudu.py | 26 +++++++++++++++++++++++---
2 files changed, 23 insertions(+), 4 deletions(-)
diff --git a/tests/common/custom_cluster_test_suite.py b/tests/common/custom_cluster_test_suite.py
index a3f2fd6..6e83046 100644
--- a/tests/common/custom_cluster_test_suite.py
+++ b/tests/common/custom_cluster_test_suite.py
@@ -74,7 +74,6 @@ class CustomClusterTestSuite(ImpalaTestSuite):
# Defines constraints for custom cluster tests, called by add_test_dimensions.
# By default, custom cluster tests only run on text/none and with a limited set of
# exec options. Subclasses may override this to relax these default constraints.
- super(CustomClusterTestSuite, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_constraint(lambda v:
v.get_value('table_format').file_format == 'text' and
v.get_value('table_format').compression_codec == 'none')
diff --git a/tests/custom_cluster/test_kudu.py b/tests/custom_cluster/test_kudu.py
index 96c8717..fb38114 100644
--- a/tests/custom_cluster/test_kudu.py
+++ b/tests/custom_cluster/test_kudu.py
@@ -28,7 +28,26 @@ from tests.common.test_dimensions import add_exec_option_dimension
KUDU_MASTER_HOSTS = pytest.config.option.kudu_master_hosts
LOG = logging.getLogger(__name__)
-class TestKuduOperations(CustomClusterTestSuite, KuduTestSuite):
+
+class CustomKuduTest(CustomClusterTestSuite, KuduTestSuite):
+
+ @classmethod
+ def get_workload(cls):
+ return 'functional-query'
+
+ @classmethod
+ def add_custom_cluster_constraints(cls):
+ # Override this method to relax the set of constraints added in
+ # CustomClusterTestSuite.add_custom_cluster_constraints() so that a test vector with
+ # 'file_format' and 'compression_codec' being "kudu" and "none" respectively will not
+ # be skipped.
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
+ v.get_value('exec_option')['batch_size'] == 0 and
+ v.get_value('exec_option')['disable_codegen'] is False and
+ v.get_value('exec_option')['num_nodes'] == 0)
+
+
+class TestKuduOperations(CustomKuduTest):
@classmethod
def get_workload(cls):
@@ -95,7 +114,8 @@ class TestKuduOperations(CustomClusterTestSuite, KuduTestSuite):
except Exception as e:
assert "Error overflow in Kudu session." in str(e)
-class TestKuduClientTimeout(CustomClusterTestSuite, KuduTestSuite):
+
+class TestKuduClientTimeout(CustomKuduTest):
"""Kudu tests that set the Kudu client operation timeout to 1ms and expect
specific timeout exceptions. While we expect all exercised operations to take at
least 1ms, it is possible that some may not and thus the test could be flaky. If
@@ -114,7 +134,7 @@ class TestKuduClientTimeout(CustomClusterTestSuite, KuduTestSuite):
self.run_test_case('QueryTest/kudu-timeouts-impalad', vector)
-class TestKuduHMSIntegration(CustomClusterTestSuite, KuduTestSuite):
+class TestKuduHMSIntegration(CustomKuduTest):
# TODO(IMPALA-8614): parameterize the common tests in query_test/test_kudu.py
# to run with HMS integration enabled. Also avoid restarting Impala to reduce
# tests time.
[impala] 02/02: IMPALA-10050: Fixed DCHECK error for backend in
terminal state.
Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
stakiar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 3733c4cc2cfb78d7f13463fb1ee9e1c4560d4a3d
Author: wzhou-code <wz...@cloudera.com>
AuthorDate: Thu Aug 6 15:52:47 2020 -0700
IMPALA-10050: Fixed DCHECK error for backend in terminal state.
Recent patch for IMPALA-6788 makes coordinator to cancel inflight
query fragment instances when it receives failure report from one
backend. It's possible the BackendState::Cancel() is called for
one fragment instance before the first execution status report
from its backend is received and processed by the coordinator.
Since the status of BackendState is set as Cancelled after Cancel()
is called, the execution of the fragment instance is treated as
Done in such case so that the status report will NOT be processed.
Hence the backend receives response OK from coordinator even it
sent a report with execution error. This make backend hit DCHECK
error if backend in the terminal state with error.
This patch fixs the issue by making coordinator send CANCELLED
status in the response of status report if the backend status is not
ok and the execution status report is not applied.
Testing:
- The issue could be reproduced by running test_failpoints for about
20 iterations. Verified the fixing by running test_failpoints over
200 iterations without DCHECK failure.
- Passed TestProcessFailures::test_kill_coordinator.
- Psssed TestRPCException::test_state_report_error.
- Passed exhaustive tests.
Change-Id: Iba6a72f98c0f9299c22c58830ec5a643335b966a
Reviewed-on: http://gerrit.cloudera.org:8080/16303
Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/runtime/coordinator.cc | 7 ++++++-
be/src/runtime/query-exec-mgr.cc | 20 +++++++++++---------
be/src/runtime/query-state.cc | 3 +++
3 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/be/src/runtime/coordinator.cc b/be/src/runtime/coordinator.cc
index b4f5178..119e40b 100644
--- a/be/src/runtime/coordinator.cc
+++ b/be/src/runtime/coordinator.cc
@@ -1005,7 +1005,12 @@ Status Coordinator::UpdateBackendExecStatus(const ReportExecStatusRequestPB& req
// If query execution has terminated, return a cancelled status to force the fragment
// instance to stop executing.
- return IsExecuting() ? Status::OK() : Status::CANCELLED;
+ // After cancelling backend_state, it's possible that current exec_state is still
+ // EXECUTING but the backend status is not OK since execution status report is not
+ // applied to update the overall status. In such case, we should return a cancelled
+ // status to backend.
+ return (IsExecuting() && backend_state->GetStatus().ok()) ? Status::OK() :
+ Status::CANCELLED;
}
Status Coordinator::UpdateBlacklistWithAuxErrorInfo(
diff --git a/be/src/runtime/query-exec-mgr.cc b/be/src/runtime/query-exec-mgr.cc
index ac07085..6874ca4 100644
--- a/be/src/runtime/query-exec-mgr.cc
+++ b/be/src/runtime/query-exec-mgr.cc
@@ -226,15 +226,17 @@ void QueryExecMgr::CancelQueriesForFailedCoordinators(
// Build a list of queries that are scheduled by failed coordinators (as
// evidenced by their absence from the cluster membership list).
std::vector<QueryCancellationTask> to_cancel;
- qs_map_.DoFuncForAllEntries([&](QueryState* qs) {
- if (qs != nullptr && !qs->IsCancelled()) {
- if (current_membership.find(qs->coord_backend_id()) == current_membership.end()) {
- // decremented by ReleaseQueryState()
- AcquireQueryStateLocked(qs);
- to_cancel.push_back(QueryCancellationTask(qs));
- }
- }
- });
+ ExecEnv::GetInstance()->query_exec_mgr()->qs_map_.DoFuncForAllEntries(
+ [&](QueryState* qs) {
+ if (qs != nullptr && !qs->IsCancelled()) {
+ if (current_membership.find(qs->coord_backend_id())
+ == current_membership.end()) {
+ // decremented by ReleaseQueryState()
+ AcquireQueryStateLocked(qs);
+ to_cancel.push_back(QueryCancellationTask(qs));
+ }
+ }
+ });
// Since we are the only producer for the cancellation thread pool, we can find the
// remaining capacity of the pool and submit the new cancellation requests without
diff --git a/be/src/runtime/query-state.cc b/be/src/runtime/query-state.cc
index ce61f99..8e071f9 100644
--- a/be/src/runtime/query-state.cc
+++ b/be/src/runtime/query-state.cc
@@ -806,6 +806,9 @@ done:
DCHECK(entry.second->IsDone());
}
} else {
+ // If the query execution hit an error, when the final status report is sent, the
+ // coordinator's response will instruct the QueryState to cancel itself, so Cancel()
+ // should have always been called by this point.
DCHECK_EQ(is_cancelled_.Load(), 1);
}
}