You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2022/08/27 06:57:58 UTC
[impala] branch master updated: IMPALA-11505: Fix flaky test test_kudu_txn_abort_partition_lock
This is an automated email from the ASF dual-hosted git repository.
wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new a26b98612 IMPALA-11505: Fix flaky test test_kudu_txn_abort_partition_lock
a26b98612 is described below
commit a26b98612ababa74dceb6469a79f475d0be50d9c
Author: wzhou-code <wz...@cloudera.com>
AuthorDate: Thu Aug 25 02:37:45 2022 -0700
IMPALA-11505: Fix flaky test test_kudu_txn_abort_partition_lock
TestKuduTransaction.test_kudu_txn_abort_partition_lock fails as
exception message is unexpected due to timing issue.
This patch added a new debug action so that test code could explicitly
control over calling Commit() for Kudu transaction. Changed the test
to run first query synchronously with new debug action.
Testing:
- Repeatedly ran test_kudu_txn_abort_partition_lock for 500 times
in Jenkins without failure.
- Passed core test.
Change-Id: If5a79bca6bfdac49192b76b1a31379d872db59e0
Reviewed-on: http://gerrit.cloudera.org:8080/18911
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/service/client-request-state.cc | 12 +++++++++++-
tests/custom_cluster/test_kudu.py | 18 +++++++++---------
2 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/be/src/service/client-request-state.cc b/be/src/service/client-request-state.cc
index d552a5012..00d6a1908 100644
--- a/be/src/service/client-request-state.cc
+++ b/be/src/service/client-request-state.cc
@@ -1812,7 +1812,17 @@ void ClientRequestState::AbortKuduTransaction() {
Status ClientRequestState::CommitKuduTransaction() {
DCHECK(InKuduTransaction());
- Status status = frontend_->CommitKuduTransaction(query_ctx_.query_id);
+ // Skip calling Commit() for Kudu Transaction with a debug action so that test code
+ // could explicitly control over calling Commit().
+ Status status = DebugAction(exec_request_->query_options, "CRS_NOT_COMMIT_KUDU_TXN");
+ if (UNLIKELY(!status.ok())) {
+ VLOG(1) << Substitute("Skip to commit Kudu transaction with query-id: $0",
+ PrintId(query_ctx_.query_id));
+ transaction_closed_ = true;
+ return Status::OK();
+ }
+
+ status = frontend_->CommitKuduTransaction(query_ctx_.query_id);
if (status.ok()) {
query_events_->MarkEvent("Kudu transaction committed");
transaction_closed_ = true;
diff --git a/tests/custom_cluster/test_kudu.py b/tests/custom_cluster/test_kudu.py
index 5caa50dd4..9759c331f 100644
--- a/tests/custom_cluster/test_kudu.py
+++ b/tests/custom_cluster/test_kudu.py
@@ -570,15 +570,17 @@ class TestKuduTransactionBase(CustomClusterTestSuite):
table_name = "%s.test_kudu_txn_abort_partition_lock" % unique_database
self.execute_query(self._create_kudu_table_query.format(table_name))
- # Enable Kudu transactions and run "insert" query with injected sleeping time for
- # 3 seconds. The query is started asynchronously.
+ # Enable Kudu transactions and run "insert" query with debug action to skip calling
+ # Commit for Kudu transaction so that partition locking is not released. The Kudu
+ # transaction object is held by KuduTransactionManager and the transaction is not
+ # cleaned up after this test. But the Impala daemon will be restarted after this
+ # class of custom cluster test so that the transaction will be cleaned up on Kudu
+ # server after Impala daemon is restarted since there is no heart beat for the
+ # uncommitted transaction.
self.execute_query("set ENABLE_KUDU_TRANSACTION=true")
- query_options = {'debug_action': 'FIS_KUDU_TABLE_SINK_WRITE_BATCH:SLEEP@3000'}
+ query_options = {'debug_action': 'CRS_NOT_COMMIT_KUDU_TXN:FAIL'}
query = "insert into %s values (0, 'a')" % table_name
- handle = self.execute_query_async(query, query_options)
- # Wait for it to start running. Kudu lock the partition for more than 3 seconds.
- self.wait_for_state(handle, self.client.QUERY_STATES['RUNNING'], 60)
- sleep(1)
+ self.execute_query(query, query_options)
# Launch the same query again. The query should fail with error message "aborted
# since it tries to acquire the partition lock that is held by another transaction".
try:
@@ -587,8 +589,6 @@ class TestKuduTransactionBase(CustomClusterTestSuite):
except ImpalaBeeswaxException as e:
assert "aborted since it tries to acquire the partition lock that is held by " \
"another transaction" in str(e)
- # Close the first query.
- self.client.close_query(handle)
class TestKuduTransaction(TestKuduTransactionBase):