You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2021/09/10 16:27:03 UTC
[impala] 04/04: IMPALA-10905: test_iceberg.py test_time_travel
fails in exhaustive builds
This is an automated email from the ASF dual-hosted git repository.
boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 8086c4457e02a5bc542d183b82b733891df4eddf
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Wed Sep 8 15:36:57 2021 +0200
IMPALA-10905: test_iceberg.py test_time_travel fails in exhaustive builds
test_iceberg.py::test_time_travel sets the TIMEZONE query option
during execution. In the end it sets the TIMEZONE to 'Asia/Tokyo'.
The problem is that it sets the TIMEZONE of the client of the
ImpalaTestSuite. This means that a subsequent test will use
timezone 'Asia/Tokyo' instead of the local one.
This patch fixes this issue by creating a local Impala client for
the test.
Change-Id: I5c172df865bc35dd1792146f4045fbc16e04d116
Reviewed-on: http://gerrit.cloudera.org:8080/17835
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
tests/common/impala_connection.py | 7 ++
tests/query_test/test_iceberg.py | 171 +++++++++++++++++++-------------------
2 files changed, 94 insertions(+), 84 deletions(-)
diff --git a/tests/common/impala_connection.py b/tests/common/impala_connection.py
index f51a85e..07e172f 100644
--- a/tests/common/impala_connection.py
+++ b/tests/common/impala_connection.py
@@ -77,6 +77,13 @@ class OperationHandle(object):
# Represents an Impala connection.
class ImpalaConnection(object):
__metaclass__ = abc.ABCMeta
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.close()
+
@abc.abstractmethod
def set_configuration_option(self, name, value):
"""Sets a configuraiton option name to the given value"""
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 5bbd376..ca75ba0 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -122,11 +122,11 @@ class TestIcebergTable(ImpalaTestSuite):
tbl_name = unique_database + ".time_travel"
def execute_query_ts(query):
- self.execute_query(query)
+ impalad_client.execute(query)
return str(datetime.datetime.now())
def expect_results(query, expected_results):
- data = self.execute_query(query)
+ data = impalad_client.execute(query)
assert len(data.data) == len(expected_results)
for r in expected_results:
assert r in data.data
@@ -148,7 +148,7 @@ class TestIcebergTable(ImpalaTestSuite):
return "CAST({0} as timestamp)".format(quote(ts))
def get_snapshots():
- data = self.execute_query("describe history {0}".format(tbl_name))
+ data = impalad_client.execute("describe history {0}".format(tbl_name))
ret = list()
for row in data.data:
fields = row.split('\t')
@@ -156,89 +156,92 @@ class TestIcebergTable(ImpalaTestSuite):
return ret
def impala_now():
- now_data = self.execute_query("select now()")
+ now_data = impalad_client.execute("select now()")
return now_data.data[0]
- # Iceberg doesn't create a snapshot entry for the initial empty table
- self.execute_query("create table {0} (i int) stored as iceberg".format(tbl_name))
- ts_1 = execute_query_ts("insert into {0} values (1)".format(tbl_name))
- ts_2 = execute_query_ts("insert into {0} values (2)".format(tbl_name))
- ts_3 = execute_query_ts("truncate table {0}".format(tbl_name))
- time.sleep(1)
- ts_4 = execute_query_ts("insert into {0} values (100)".format(tbl_name))
- # Query table as of timestamps.
- expect_results_t("now()", ['100'])
- expect_results_t(quote(ts_1), ['1'])
- expect_results_t(quote(ts_2), ['1', '2'])
- expect_results_t(quote(ts_3), [])
- expect_results_t(quote(ts_4), ['100'])
- expect_results_t(cast_ts(ts_4) + " - interval 1 seconds", [])
- # Future queries return the current snapshot.
- expect_results_t(cast_ts(ts_4) + " + interval 1 hours", ['100'])
- # Query table as of snapshot IDs.
- snapshots = get_snapshots()
- expect_results_v(snapshots[0], ['1'])
- expect_results_v(snapshots[1], ['1', '2'])
- expect_results_v(snapshots[2], [])
- expect_results_v(snapshots[3], ['100'])
-
- # SELECT diff
- expect_results("""SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_new}'
- MINUS
- SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_old}'""".format(
- tbl=tbl_name, ts_new=ts_2, ts_old=ts_1),
- ['2'])
- expect_results("""SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_new}
- MINUS
- SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_old}""".format(
- tbl=tbl_name, v_new=snapshots[1], v_old=snapshots[0]),
- ['2'])
- # Mix SYSTEM_TIME ans SYSTEM_VERSION
- expect_results("""SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_new}
- MINUS
- SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_old}'""".format(
- tbl=tbl_name, v_new=snapshots[1], ts_old=ts_1),
- ['2'])
- expect_results("""SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_new}'
- MINUS
- SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_old}""".format(
- tbl=tbl_name, ts_new=ts_2, v_old=snapshots[0]),
- ['2'])
-
- # Query old snapshot
- try:
- self.execute_query("SELECT * FROM {0} FOR SYSTEM_TIME AS OF {1}".format(
- tbl_name, "now() - interval 2 years"))
- assert False # Exception must be thrown
- except Exception as e:
- assert "Cannot find a snapshot older than" in str(e)
- # Query invalid snapshot
- try:
- self.execute_query("SELECT * FROM {0} FOR SYSTEM_VERSION AS OF 42".format(tbl_name))
- assert False # Exception must be thrown
- except Exception as e:
- assert "Cannot find snapshot with ID 42" in str(e)
-
- # Check that timezone is interpreted in local timezone controlled by query option
- # TIMEZONE
- self.execute_query("truncate table {0}".format(tbl_name))
- self.execute_query("insert into {0} values (1111)".format(tbl_name))
- self.execute_query("SET TIMEZONE='Europe/Budapest'")
- now_budapest = impala_now()
- expect_results_t(quote(now_budapest), ['1111'])
-
- # Let's switch to Tokyo time. Tokyo time is always greater than Budapest time.
- self.execute_query("SET TIMEZONE='Asia/Tokyo'")
- now_tokyo = impala_now()
- expect_results_t(quote(now_tokyo), ['1111'])
- try:
- # Interpreting Budapest time in Tokyo time points to the past when the table
- # didn't exist.
- expect_results_t(quote(now_budapest), [])
- assert False
- except Exception as e:
- assert "Cannot find a snapshot older than" in str(e)
-
+ # We are setting the TIMEZONE query option in this test, so let's create a local
+ # impala client.
+ with self.create_impala_client() as impalad_client:
+ # Iceberg doesn't create a snapshot entry for the initial empty table
+ impalad_client.execute("create table {0} (i int) stored as iceberg".format(tbl_name))
+ ts_1 = execute_query_ts("insert into {0} values (1)".format(tbl_name))
+ ts_2 = execute_query_ts("insert into {0} values (2)".format(tbl_name))
+ ts_3 = execute_query_ts("truncate table {0}".format(tbl_name))
+ time.sleep(1)
+ ts_4 = execute_query_ts("insert into {0} values (100)".format(tbl_name))
+ # Query table as of timestamps.
+ expect_results_t("now()", ['100'])
+ expect_results_t(quote(ts_1), ['1'])
+ expect_results_t(quote(ts_2), ['1', '2'])
+ expect_results_t(quote(ts_3), [])
+ expect_results_t(quote(ts_4), ['100'])
+ expect_results_t(cast_ts(ts_4) + " - interval 1 seconds", [])
+ # Future queries return the current snapshot.
+ expect_results_t(cast_ts(ts_4) + " + interval 1 hours", ['100'])
+ # Query table as of snapshot IDs.
+ snapshots = get_snapshots()
+ expect_results_v(snapshots[0], ['1'])
+ expect_results_v(snapshots[1], ['1', '2'])
+ expect_results_v(snapshots[2], [])
+ expect_results_v(snapshots[3], ['100'])
+
+ # SELECT diff
+ expect_results("""SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_new}'
+ MINUS
+ SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_old}'""".format(
+ tbl=tbl_name, ts_new=ts_2, ts_old=ts_1),
+ ['2'])
+ expect_results("""SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_new}
+ MINUS
+ SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_old}""".format(
+ tbl=tbl_name, v_new=snapshots[1], v_old=snapshots[0]),
+ ['2'])
+ # Mix SYSTEM_TIME ans SYSTEM_VERSION
+ expect_results("""SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_new}
+ MINUS
+ SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_old}'""".format(
+ tbl=tbl_name, v_new=snapshots[1], ts_old=ts_1),
+ ['2'])
+ expect_results("""SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_new}'
+ MINUS
+ SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_old}""".format(
+ tbl=tbl_name, ts_new=ts_2, v_old=snapshots[0]),
+ ['2'])
+
+ # Query old snapshot
+ try:
+ impalad_client.execute("SELECT * FROM {0} FOR SYSTEM_TIME AS OF {1}".format(
+ tbl_name, "now() - interval 2 years"))
+ assert False # Exception must be thrown
+ except Exception as e:
+ assert "Cannot find a snapshot older than" in str(e)
+ # Query invalid snapshot
+ try:
+ impalad_client.execute("SELECT * FROM {0} FOR SYSTEM_VERSION AS OF 42".format(
+ tbl_name))
+ assert False # Exception must be thrown
+ except Exception as e:
+ assert "Cannot find snapshot with ID 42" in str(e)
+
+ # Check that timezone is interpreted in local timezone controlled by query option
+ # TIMEZONE
+ impalad_client.execute("truncate table {0}".format(tbl_name))
+ impalad_client.execute("insert into {0} values (1111)".format(tbl_name))
+ impalad_client.execute("SET TIMEZONE='Europe/Budapest'")
+ now_budapest = impala_now()
+ expect_results_t(quote(now_budapest), ['1111'])
+
+ # Let's switch to Tokyo time. Tokyo time is always greater than Budapest time.
+ impalad_client.execute("SET TIMEZONE='Asia/Tokyo'")
+ now_tokyo = impala_now()
+ expect_results_t(quote(now_tokyo), ['1111'])
+ try:
+ # Interpreting Budapest time in Tokyo time points to the past when the table
+ # didn't exist.
+ expect_results_t(quote(now_budapest), [])
+ assert False
+ except Exception as e:
+ assert "Cannot find a snapshot older than" in str(e)
@SkipIf.not_hdfs
def test_strings_utf8(self, vector, unique_database):