You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2022/04/02 20:32:38 UTC

[impala] 01/02: IMPALA-11216: Fix flaky test_describe_history_params

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 72817181212372999b7920d7eadaca93dda8731a
Author: Tamas Mate <tm...@apache.org>
AuthorDate: Fri Apr 1 15:45:27 2022 +0200

    IMPALA-11216: Fix flaky test_describe_history_params
    
    The test_describe_history_params was flaky because the client can
    receive the timestamps in two formats, with or without nanoseconds.
    This commit updates the test to handle both patterns.
    
    Testing:
     - Executed the test locally.
    
    Change-Id: I6aaae9b9fc14dd20390c16f9555d6f43357814a5
    Reviewed-on: http://gerrit.cloudera.org:8080/18375
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/query_test/test_iceberg.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 322b7c9..725671a 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -126,7 +126,6 @@ class TestIcebergTable(ImpalaTestSuite):
 
   def test_describe_history_params(self, vector, unique_database):
     tbl_name = unique_database + ".describe_history"
-    time_format = '%Y-%m-%d %H:%M:%S.%f'
 
     def execute_query_ts(query):
       impalad_client.execute(query)
@@ -137,9 +136,8 @@ class TestIcebergTable(ImpalaTestSuite):
       data = impalad_client.execute(query)
       assert len(data.data) == expected_result_size
       for i in range(len(data.data)):
-        result_ts = data.data[i].split('\t')[0][:- 3]
-        result_ts_dt = datetime.datetime.strptime(result_ts, time_format)
-        assert result_ts_dt > ts
+        result_ts_dt = parse_timestamp(data.data[i].split('\t')[0])
+        assert result_ts_dt >= ts
 
     def expect_results_between(ts_start, ts_end, expected_result_size):
       query = "DESCRIBE HISTORY {0} BETWEEN {1} AND {2};".format(
@@ -147,9 +145,18 @@ class TestIcebergTable(ImpalaTestSuite):
       data = impalad_client.execute(query)
       assert len(data.data) == expected_result_size
       for i in range(len(data.data)):
-        result_ts = data.data[i].split('\t')[0][:- 3]
-        result_ts_dt = datetime.datetime.strptime(result_ts, time_format)
-        assert result_ts_dt > ts_start and result_ts_dt < ts_end
+        result_ts_dt = parse_timestamp(data.data[i].split('\t')[0])
+        assert result_ts_dt >= ts_start and result_ts_dt <= ts_end
+
+    def parse_timestamp(ts_string):
+      """The client can receive the timestamp in two formats, if the timestamp has
+      fractional seconds "yyyy-MM-dd HH:mm:ss.SSSSSSSSS" pattern is used, otherwise
+      "yyyy-MM-dd HH:mm:ss". Additionally, Python's datetime library cannot handle
+      nanoseconds, therefore in that case the timestamp has to be trimmed."""
+      if len(ts_string.split('.')) > 1:
+        return datetime.datetime.strptime(ts_string[:-3], '%Y-%m-%d %H:%M:%S.%f')
+      else:
+        return datetime.datetime.strptime(ts_string, '%Y-%m-%d %H:%M:%S')
 
     def quote(s):
       return "'{0}'".format(s)
@@ -159,8 +166,7 @@ class TestIcebergTable(ImpalaTestSuite):
 
     def impala_now():
       now_data = impalad_client.execute("select now()")
-      now_data_ts = now_data.data[0][:- 3]
-      now_data_ts_dt = datetime.datetime.strptime(now_data_ts, time_format)
+      now_data_ts_dt = parse_timestamp(now_data.data[0])
       return now_data_ts_dt
 
     # We are setting the TIMEZONE query option in this test, so let's create a local