You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2021/09/10 16:27:03 UTC

[impala] 04/04: IMPALA-10905: test_iceberg.py test_time_travel fails in exhaustive builds

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8086c4457e02a5bc542d183b82b733891df4eddf
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Wed Sep 8 15:36:57 2021 +0200

    IMPALA-10905: test_iceberg.py test_time_travel fails in exhaustive builds
    
    test_iceberg.py::test_time_travel sets the TIMEZONE query option
    during execution. In the end it sets the TIMEZONE to 'Asia/Tokyo'.
    
    The problem is that it sets the TIMEZONE of the client of the
    ImpalaTestSuite. This means that a subsequent test will use
    timezone 'Asia/Tokyo' instead of the local one.
    
    This patch fixes this issue by creating a local Impala client for
    the test.
    
    Change-Id: I5c172df865bc35dd1792146f4045fbc16e04d116
    Reviewed-on: http://gerrit.cloudera.org:8080/17835
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/common/impala_connection.py |   7 ++
 tests/query_test/test_iceberg.py  | 171 +++++++++++++++++++-------------------
 2 files changed, 94 insertions(+), 84 deletions(-)

diff --git a/tests/common/impala_connection.py b/tests/common/impala_connection.py
index f51a85e..07e172f 100644
--- a/tests/common/impala_connection.py
+++ b/tests/common/impala_connection.py
@@ -77,6 +77,13 @@ class OperationHandle(object):
 # Represents an Impala connection.
 class ImpalaConnection(object):
   __metaclass__ = abc.ABCMeta
+
+  def __enter__(self):
+    return self
+
+  def __exit__(self, exc_type, exc_value, traceback):
+    self.close()
+
   @abc.abstractmethod
   def set_configuration_option(self, name, value):
     """Sets a configuraiton option name to the given value"""
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 5bbd376..ca75ba0 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -122,11 +122,11 @@ class TestIcebergTable(ImpalaTestSuite):
     tbl_name = unique_database + ".time_travel"
 
     def execute_query_ts(query):
-      self.execute_query(query)
+      impalad_client.execute(query)
       return str(datetime.datetime.now())
 
     def expect_results(query, expected_results):
-      data = self.execute_query(query)
+      data = impalad_client.execute(query)
       assert len(data.data) == len(expected_results)
       for r in expected_results:
         assert r in data.data
@@ -148,7 +148,7 @@ class TestIcebergTable(ImpalaTestSuite):
         return "CAST({0} as timestamp)".format(quote(ts))
 
     def get_snapshots():
-      data = self.execute_query("describe history {0}".format(tbl_name))
+      data = impalad_client.execute("describe history {0}".format(tbl_name))
       ret = list()
       for row in data.data:
         fields = row.split('\t')
@@ -156,89 +156,92 @@ class TestIcebergTable(ImpalaTestSuite):
       return ret
 
     def impala_now():
-      now_data = self.execute_query("select now()")
+      now_data = impalad_client.execute("select now()")
       return now_data.data[0]
 
-    # Iceberg doesn't create a snapshot entry for the initial empty table
-    self.execute_query("create table {0} (i int) stored as iceberg".format(tbl_name))
-    ts_1 = execute_query_ts("insert into {0} values (1)".format(tbl_name))
-    ts_2 = execute_query_ts("insert into {0} values (2)".format(tbl_name))
-    ts_3 = execute_query_ts("truncate table {0}".format(tbl_name))
-    time.sleep(1)
-    ts_4 = execute_query_ts("insert into {0} values (100)".format(tbl_name))
-    # Query table as of timestamps.
-    expect_results_t("now()", ['100'])
-    expect_results_t(quote(ts_1), ['1'])
-    expect_results_t(quote(ts_2), ['1', '2'])
-    expect_results_t(quote(ts_3), [])
-    expect_results_t(quote(ts_4), ['100'])
-    expect_results_t(cast_ts(ts_4) + " - interval 1 seconds", [])
-    # Future queries return the current snapshot.
-    expect_results_t(cast_ts(ts_4) + " + interval 1 hours", ['100'])
-    # Query table as of snapshot IDs.
-    snapshots = get_snapshots()
-    expect_results_v(snapshots[0], ['1'])
-    expect_results_v(snapshots[1], ['1', '2'])
-    expect_results_v(snapshots[2], [])
-    expect_results_v(snapshots[3], ['100'])
-
-    # SELECT diff
-    expect_results("""SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_new}'
-                      MINUS
-                      SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_old}'""".format(
-                   tbl=tbl_name, ts_new=ts_2, ts_old=ts_1),
-                   ['2'])
-    expect_results("""SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_new}
-                      MINUS
-                      SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_old}""".format(
-                   tbl=tbl_name, v_new=snapshots[1], v_old=snapshots[0]),
-                   ['2'])
-    # Mix SYSTEM_TIME ans SYSTEM_VERSION
-    expect_results("""SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_new}
-                      MINUS
-                      SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_old}'""".format(
-                   tbl=tbl_name, v_new=snapshots[1], ts_old=ts_1),
-                   ['2'])
-    expect_results("""SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_new}'
-                      MINUS
-                      SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_old}""".format(
-                   tbl=tbl_name, ts_new=ts_2, v_old=snapshots[0]),
-                   ['2'])
-
-    # Query old snapshot
-    try:
-      self.execute_query("SELECT * FROM {0} FOR SYSTEM_TIME AS OF {1}".format(
-          tbl_name, "now() - interval 2 years"))
-      assert False  # Exception must be thrown
-    except Exception as e:
-      assert "Cannot find a snapshot older than" in str(e)
-    # Query invalid snapshot
-    try:
-      self.execute_query("SELECT * FROM {0} FOR SYSTEM_VERSION AS OF 42".format(tbl_name))
-      assert False  # Exception must be thrown
-    except Exception as e:
-      assert "Cannot find snapshot with ID 42" in str(e)
-
-    # Check that timezone is interpreted in local timezone controlled by query option
-    # TIMEZONE
-    self.execute_query("truncate table {0}".format(tbl_name))
-    self.execute_query("insert into {0} values (1111)".format(tbl_name))
-    self.execute_query("SET TIMEZONE='Europe/Budapest'")
-    now_budapest = impala_now()
-    expect_results_t(quote(now_budapest), ['1111'])
-
-    # Let's switch to Tokyo time. Tokyo time is always greater than Budapest time.
-    self.execute_query("SET TIMEZONE='Asia/Tokyo'")
-    now_tokyo = impala_now()
-    expect_results_t(quote(now_tokyo), ['1111'])
-    try:
-      # Interpreting Budapest time in Tokyo time points to the past when the table
-      # didn't exist.
-      expect_results_t(quote(now_budapest), [])
-      assert False
-    except Exception as e:
-      assert "Cannot find a snapshot older than" in str(e)
-
+    # We are setting the TIMEZONE query option in this test, so let's create a local
+    # impala client.
+    with self.create_impala_client() as impalad_client:
+      # Iceberg doesn't create a snapshot entry for the initial empty table
+      impalad_client.execute("create table {0} (i int) stored as iceberg".format(tbl_name))
+      ts_1 = execute_query_ts("insert into {0} values (1)".format(tbl_name))
+      ts_2 = execute_query_ts("insert into {0} values (2)".format(tbl_name))
+      ts_3 = execute_query_ts("truncate table {0}".format(tbl_name))
+      time.sleep(1)
+      ts_4 = execute_query_ts("insert into {0} values (100)".format(tbl_name))
+      # Query table as of timestamps.
+      expect_results_t("now()", ['100'])
+      expect_results_t(quote(ts_1), ['1'])
+      expect_results_t(quote(ts_2), ['1', '2'])
+      expect_results_t(quote(ts_3), [])
+      expect_results_t(quote(ts_4), ['100'])
+      expect_results_t(cast_ts(ts_4) + " - interval 1 seconds", [])
+      # Future queries return the current snapshot.
+      expect_results_t(cast_ts(ts_4) + " + interval 1 hours", ['100'])
+      # Query table as of snapshot IDs.
+      snapshots = get_snapshots()
+      expect_results_v(snapshots[0], ['1'])
+      expect_results_v(snapshots[1], ['1', '2'])
+      expect_results_v(snapshots[2], [])
+      expect_results_v(snapshots[3], ['100'])
+
+      # SELECT diff
+      expect_results("""SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_new}'
+                        MINUS
+                        SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_old}'""".format(
+                     tbl=tbl_name, ts_new=ts_2, ts_old=ts_1),
+                     ['2'])
+      expect_results("""SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_new}
+                        MINUS
+                        SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_old}""".format(
+                     tbl=tbl_name, v_new=snapshots[1], v_old=snapshots[0]),
+                     ['2'])
+      # Mix SYSTEM_TIME ans SYSTEM_VERSION
+      expect_results("""SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_new}
+                        MINUS
+                        SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_old}'""".format(
+                     tbl=tbl_name, v_new=snapshots[1], ts_old=ts_1),
+                     ['2'])
+      expect_results("""SELECT * FROM {tbl} FOR SYSTEM_TIME AS OF '{ts_new}'
+                        MINUS
+                        SELECT * FROM {tbl} FOR SYSTEM_VERSION AS OF {v_old}""".format(
+                     tbl=tbl_name, ts_new=ts_2, v_old=snapshots[0]),
+                     ['2'])
+
+      # Query old snapshot
+      try:
+        impalad_client.execute("SELECT * FROM {0} FOR SYSTEM_TIME AS OF {1}".format(
+            tbl_name, "now() - interval 2 years"))
+        assert False  # Exception must be thrown
+      except Exception as e:
+        assert "Cannot find a snapshot older than" in str(e)
+      # Query invalid snapshot
+      try:
+        impalad_client.execute("SELECT * FROM {0} FOR SYSTEM_VERSION AS OF 42".format(
+            tbl_name))
+        assert False  # Exception must be thrown
+      except Exception as e:
+        assert "Cannot find snapshot with ID 42" in str(e)
+
+      # Check that timezone is interpreted in local timezone controlled by query option
+      # TIMEZONE
+      impalad_client.execute("truncate table {0}".format(tbl_name))
+      impalad_client.execute("insert into {0} values (1111)".format(tbl_name))
+      impalad_client.execute("SET TIMEZONE='Europe/Budapest'")
+      now_budapest = impala_now()
+      expect_results_t(quote(now_budapest), ['1111'])
+
+      # Let's switch to Tokyo time. Tokyo time is always greater than Budapest time.
+      impalad_client.execute("SET TIMEZONE='Asia/Tokyo'")
+      now_tokyo = impala_now()
+      expect_results_t(quote(now_tokyo), ['1111'])
+      try:
+        # Interpreting Budapest time in Tokyo time points to the past when the table
+        # didn't exist.
+        expect_results_t(quote(now_budapest), [])
+        assert False
+      except Exception as e:
+        assert "Cannot find a snapshot older than" in str(e)
 
   @SkipIf.not_hdfs
   def test_strings_utf8(self, vector, unique_database):