You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2022/04/02 20:32:39 UTC

[impala] 02/02: IMPALA-11213: Fixed impala-shell strict hs2 mode for large fetches

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit c1f99d13699d1c63e7d74308a25c46798ea410d7
Author: Steve Carlin <sc...@cloudera.com>
AuthorDate: Wed Mar 30 15:55:38 2022 -0700

    IMPALA-11213: Fixed impala-shell strict hs2 mode for large fetches
    
    The strict hs2 protocol mode is broken when fetching large results.
    The FetchResults.hasMoreRows field is always returned as false.  When
    there are no more results, Hive returns an empty batch with no rows.
    HIVE-26108 has been filed to support the hasMoreRows field.
    
    Added a framework test that retrieves 1M rows from tpcds. The default
    number of rows returned from Hive is 10K so this should be more than
    enough to ensure that multiple fetches are done.
    
    Change-Id: Ife436d91e7fe0c30bf020024e20a5d8ad89faa24
    Reviewed-on: http://gerrit.cloudera.org:8080/18370
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
---
 shell/impala_client.py                |  9 ++++++++-
 tests/shell/test_shell_commandline.py | 11 +++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/shell/impala_client.py b/shell/impala_client.py
index 3a2dcf3..0790b1b 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -840,9 +840,12 @@ class ImpalaHS2Client(ImpalaClient):
       # for the display code. This is somewhat inefficient, but performance is comparable
       # to the old Beeswax code.
       yield self._transpose(col_value_converters, resp.results.columns)
-      if not resp.hasMoreRows:
+      if not self._hasMoreRows(resp, col_value_converters):
         return
 
+  def _hasMoreRows(self, resp, col_value_converters):
+    return resp.hasMoreRows
+
   def _transpose(self, col_value_converters, columns):
     """Transpose the columns from a TFetchResultsResp into the row format returned
     by fetch() with all the values converted into their string representations for
@@ -1129,6 +1132,10 @@ class StrictHS2Client(ImpalaHS2Client):
   def _populate_query_options(self):
     return
 
+  def _hasMoreRows(self, resp, col_value_converters):
+    tcol = col_value_converters[0][0](resp.results.columns[0])
+    return len(tcol.values)
+
 
 class ImpalaBeeswaxClient(ImpalaClient):
   """Legacy Beeswax client. Uses the Beeswax protocol plus Impala-specific extensions.
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 6bcb13e..14a6fdc 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -1099,6 +1099,17 @@ class TestImpalaShell(ImpalaTestSuite):
 
     assert "4\t4\t40.4" in result.stdout, result.stdout
 
+  def test_large_fetch(self, vector):
+    query = "select ss_sold_time_sk from tpcds.store_sales limit 50000"
+    output = run_impala_shell_cmd(vector, ['-q', query, '-B', '--output_delimiter=;'])
+    assert "Fetched 50000 row(s)" in output.stderr
+
+  def test_single_null_fetch(self, vector):
+    query = "select null"
+    output = run_impala_shell_cmd(vector, ['-q', query, '-B', '--output_delimiter=;'])
+    assert "NULL" in output.stdout
+    assert "Fetched 1 row(s)" in output.stderr
+
   def test_fetch_size(self, vector):
     """Test the --fetch_size option with and without result spooling enabled."""
     if vector.get_value('strict_hs2_protocol'):