You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2022/08/27 06:57:58 UTC

[impala] branch master updated: IMPALA-11505: Fix flaky test test_kudu_txn_abort_partition_lock

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new a26b98612 IMPALA-11505: Fix flaky test test_kudu_txn_abort_partition_lock
a26b98612 is described below

commit a26b98612ababa74dceb6469a79f475d0be50d9c
Author: wzhou-code <wz...@cloudera.com>
AuthorDate: Thu Aug 25 02:37:45 2022 -0700

    IMPALA-11505: Fix flaky test test_kudu_txn_abort_partition_lock
    
    TestKuduTransaction.test_kudu_txn_abort_partition_lock fails as
    exception message is unexpected due to timing issue.
    
    This patch added a new debug action so that test code could explicitly
    control over calling Commit() for Kudu transaction. Changed the test
    to run first query synchronously with new debug action.
    
    Testing:
     - Repeatedly ran test_kudu_txn_abort_partition_lock for 500 times
       in Jenkins without failure.
     - Passed core test.
    
    Change-Id: If5a79bca6bfdac49192b76b1a31379d872db59e0
    Reviewed-on: http://gerrit.cloudera.org:8080/18911
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/service/client-request-state.cc | 12 +++++++++++-
 tests/custom_cluster/test_kudu.py      | 18 +++++++++---------
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/be/src/service/client-request-state.cc b/be/src/service/client-request-state.cc
index d552a5012..00d6a1908 100644
--- a/be/src/service/client-request-state.cc
+++ b/be/src/service/client-request-state.cc
@@ -1812,7 +1812,17 @@ void ClientRequestState::AbortKuduTransaction() {
 
 Status ClientRequestState::CommitKuduTransaction() {
   DCHECK(InKuduTransaction());
-  Status status = frontend_->CommitKuduTransaction(query_ctx_.query_id);
+  // Skip calling Commit() for Kudu Transaction with a debug action so that test code
+  // could explicitly control over calling Commit().
+  Status status = DebugAction(exec_request_->query_options, "CRS_NOT_COMMIT_KUDU_TXN");
+  if (UNLIKELY(!status.ok())) {
+    VLOG(1) << Substitute("Skip to commit Kudu transaction with query-id: $0",
+        PrintId(query_ctx_.query_id));
+    transaction_closed_ = true;
+    return Status::OK();
+  }
+
+  status = frontend_->CommitKuduTransaction(query_ctx_.query_id);
   if (status.ok()) {
     query_events_->MarkEvent("Kudu transaction committed");
     transaction_closed_ = true;
diff --git a/tests/custom_cluster/test_kudu.py b/tests/custom_cluster/test_kudu.py
index 5caa50dd4..9759c331f 100644
--- a/tests/custom_cluster/test_kudu.py
+++ b/tests/custom_cluster/test_kudu.py
@@ -570,15 +570,17 @@ class TestKuduTransactionBase(CustomClusterTestSuite):
     table_name = "%s.test_kudu_txn_abort_partition_lock" % unique_database
     self.execute_query(self._create_kudu_table_query.format(table_name))
 
-    # Enable Kudu transactions and run "insert" query with injected sleeping time for
-    # 3 seconds. The query is started asynchronously.
+    # Enable Kudu transactions and run "insert" query with debug action to skip calling
+    # Commit for Kudu transaction so that partition locking is not released. The Kudu
+    # transaction object is held by KuduTransactionManager and the transaction is not
+    # cleaned up after this test. But the Impala daemon will be restarted after this
+    # class of custom cluster test so that the transaction will be cleaned up on Kudu
+    # server after Impala daemon is restarted since there is no heart beat for the
+    # uncommitted transaction.
     self.execute_query("set ENABLE_KUDU_TRANSACTION=true")
-    query_options = {'debug_action': 'FIS_KUDU_TABLE_SINK_WRITE_BATCH:SLEEP@3000'}
+    query_options = {'debug_action': 'CRS_NOT_COMMIT_KUDU_TXN:FAIL'}
     query = "insert into %s values (0, 'a')" % table_name
-    handle = self.execute_query_async(query, query_options)
-    # Wait for it to start running. Kudu lock the partition for more than 3 seconds.
-    self.wait_for_state(handle, self.client.QUERY_STATES['RUNNING'], 60)
-    sleep(1)
+    self.execute_query(query, query_options)
     # Launch the same query again. The query should fail with error message "aborted
     # since it tries to acquire the partition lock that is held by another transaction".
     try:
@@ -587,8 +589,6 @@ class TestKuduTransactionBase(CustomClusterTestSuite):
     except ImpalaBeeswaxException as e:
       assert "aborted since it tries to acquire the partition lock that is held by " \
           "another transaction" in str(e)
-    # Close the first query.
-    self.client.close_query(handle)
 
 
 class TestKuduTransaction(TestKuduTransactionBase):