You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by wz...@apache.org on 2022/04/02 20:32:37 UTC

[impala] branch master updated (0fb1496 -> c1f99d1)

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from 0fb1496  IMPALA-11039: Fix incorrect page jumping in late materialization of Parquet
     new 7281718  IMPALA-11216: Fix flaky test_describe_history_params
     new c1f99d1  IMPALA-11213: Fixed impala-shell strict hs2 mode for large fetches

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 shell/impala_client.py                |  9 ++++++++-
 tests/query_test/test_iceberg.py      | 24 +++++++++++++++---------
 tests/shell/test_shell_commandline.py | 11 +++++++++++
 3 files changed, 34 insertions(+), 10 deletions(-)

[impala] 01/02: IMPALA-11216: Fix flaky test_describe_history_params

Posted by wz...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 72817181212372999b7920d7eadaca93dda8731a
Author: Tamas Mate <tm...@apache.org>
AuthorDate: Fri Apr 1 15:45:27 2022 +0200

    IMPALA-11216: Fix flaky test_describe_history_params
    
    The test_describe_history_params was flaky because the client can
    receive the timestamps in two formats, with or without nanoseconds.
    This commit updates the test to handle both patterns.
    
    Testing:
     - Executed the test locally.
    
    Change-Id: I6aaae9b9fc14dd20390c16f9555d6f43357814a5
    Reviewed-on: http://gerrit.cloudera.org:8080/18375
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/query_test/test_iceberg.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 322b7c9..725671a 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -126,7 +126,6 @@ class TestIcebergTable(ImpalaTestSuite):
 
   def test_describe_history_params(self, vector, unique_database):
     tbl_name = unique_database + ".describe_history"
-    time_format = '%Y-%m-%d %H:%M:%S.%f'
 
     def execute_query_ts(query):
       impalad_client.execute(query)
@@ -137,9 +136,8 @@ class TestIcebergTable(ImpalaTestSuite):
       data = impalad_client.execute(query)
       assert len(data.data) == expected_result_size
       for i in range(len(data.data)):
-        result_ts = data.data[i].split('\t')[0][:- 3]
-        result_ts_dt = datetime.datetime.strptime(result_ts, time_format)
-        assert result_ts_dt > ts
+        result_ts_dt = parse_timestamp(data.data[i].split('\t')[0])
+        assert result_ts_dt >= ts
 
     def expect_results_between(ts_start, ts_end, expected_result_size):
       query = "DESCRIBE HISTORY {0} BETWEEN {1} AND {2};".format(
@@ -147,9 +145,18 @@ class TestIcebergTable(ImpalaTestSuite):
       data = impalad_client.execute(query)
       assert len(data.data) == expected_result_size
       for i in range(len(data.data)):
-        result_ts = data.data[i].split('\t')[0][:- 3]
-        result_ts_dt = datetime.datetime.strptime(result_ts, time_format)
-        assert result_ts_dt > ts_start and result_ts_dt < ts_end
+        result_ts_dt = parse_timestamp(data.data[i].split('\t')[0])
+        assert result_ts_dt >= ts_start and result_ts_dt <= ts_end
+
+    def parse_timestamp(ts_string):
+      """The client can receive the timestamp in two formats, if the timestamp has
+      fractional seconds "yyyy-MM-dd HH:mm:ss.SSSSSSSSS" pattern is used, otherwise
+      "yyyy-MM-dd HH:mm:ss". Additionally, Python's datetime library cannot handle
+      nanoseconds, therefore in that case the timestamp has to be trimmed."""
+      if len(ts_string.split('.')) > 1:
+        return datetime.datetime.strptime(ts_string[:-3], '%Y-%m-%d %H:%M:%S.%f')
+      else:
+        return datetime.datetime.strptime(ts_string, '%Y-%m-%d %H:%M:%S')
 
     def quote(s):
       return "'{0}'".format(s)
@@ -159,8 +166,7 @@ class TestIcebergTable(ImpalaTestSuite):
 
     def impala_now():
       now_data = impalad_client.execute("select now()")
-      now_data_ts = now_data.data[0][:- 3]
-      now_data_ts_dt = datetime.datetime.strptime(now_data_ts, time_format)
+      now_data_ts_dt = parse_timestamp(now_data.data[0])
       return now_data_ts_dt
 
     # We are setting the TIMEZONE query option in this test, so let's create a local

[impala] 02/02: IMPALA-11213: Fixed impala-shell strict hs2 mode for large fetches

Posted by wz...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit c1f99d13699d1c63e7d74308a25c46798ea410d7
Author: Steve Carlin <sc...@cloudera.com>
AuthorDate: Wed Mar 30 15:55:38 2022 -0700

    IMPALA-11213: Fixed impala-shell strict hs2 mode for large fetches
    
    The strict hs2 protocol mode is broken when fetching large results.
    The FetchResults.hasMoreRows field is always returned as false.  When
    there are no more results, Hive returns an empty batch with no rows.
    HIVE-26108 has been filed to support the hasMoreRows field.
    
    Added a framework test that retrieves 1M rows from tpcds. The default
    number of rows returned from Hive is 10K so this should be more than
    enough to ensure that multiple fetches are done.
    
    Change-Id: Ife436d91e7fe0c30bf020024e20a5d8ad89faa24
    Reviewed-on: http://gerrit.cloudera.org:8080/18370
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
---
 shell/impala_client.py                |  9 ++++++++-
 tests/shell/test_shell_commandline.py | 11 +++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/shell/impala_client.py b/shell/impala_client.py
index 3a2dcf3..0790b1b 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -840,9 +840,12 @@ class ImpalaHS2Client(ImpalaClient):
       # for the display code. This is somewhat inefficient, but performance is comparable
       # to the old Beeswax code.
       yield self._transpose(col_value_converters, resp.results.columns)
-      if not resp.hasMoreRows:
+      if not self._hasMoreRows(resp, col_value_converters):
         return
 
+  def _hasMoreRows(self, resp, col_value_converters):
+    return resp.hasMoreRows
+
   def _transpose(self, col_value_converters, columns):
     """Transpose the columns from a TFetchResultsResp into the row format returned
     by fetch() with all the values converted into their string representations for
@@ -1129,6 +1132,10 @@ class StrictHS2Client(ImpalaHS2Client):
   def _populate_query_options(self):
     return
 
+  def _hasMoreRows(self, resp, col_value_converters):
+    tcol = col_value_converters[0][0](resp.results.columns[0])
+    return len(tcol.values)
+
 
 class ImpalaBeeswaxClient(ImpalaClient):
   """Legacy Beeswax client. Uses the Beeswax protocol plus Impala-specific extensions.
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 6bcb13e..14a6fdc 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -1099,6 +1099,17 @@ class TestImpalaShell(ImpalaTestSuite):
 
     assert "4\t4\t40.4" in result.stdout, result.stdout
 
+  def test_large_fetch(self, vector):
+    query = "select ss_sold_time_sk from tpcds.store_sales limit 50000"
+    output = run_impala_shell_cmd(vector, ['-q', query, '-B', '--output_delimiter=;'])
+    assert "Fetched 50000 row(s)" in output.stderr
+
+  def test_single_null_fetch(self, vector):
+    query = "select null"
+    output = run_impala_shell_cmd(vector, ['-q', query, '-B', '--output_delimiter=;'])
+    assert "NULL" in output.stdout
+    assert "Fetched 1 row(s)" in output.stderr
+
   def test_fetch_size(self, vector):
     """Test the --fetch_size option with and without result spooling enabled."""
     if vector.get_value('strict_hs2_protocol'):