You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2020/07/22 23:30:11 UTC
[impala] 01/02: IMPALA-9953: Shell should continue fetching even when 0 rows are returned

This is an automated email from the ASF dual-hosted git repository.

stakiar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit ea95691b775ef0edd032a2590a119e6841cc2129
Author: Sahil Takiar <ta...@gmail.com>
AuthorDate: Mon Jul 20 12:35:26 2020 -0700

    IMPALA-9953: Shell should continue fetching even when 0 rows are returned
    
    The Impala shell stops fetching rows if it receives a batch that
    contains 0 rows. This is incorrect because a batch with 0 rows can be
    returned if the fetch request hits a timeout. Instead, the shell should
    rely on the value of has_rows / hasMoreRows to determine when to stop
    issuing fetch requests.
    
    Tests:
    * Added a regression test to test_shell_commandline.py
    * Ran all shell tests
    
    Change-Id: I5f8527aea9e433f8cf426435c0ba41355bbf9d88
    Reviewed-on: http://gerrit.cloudera.org:8080/16222
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 shell/impala_shell.py                 |  2 +-
 tests/shell/test_shell_commandline.py | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index e0d8026..8c18408 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -1183,7 +1183,7 @@ class ImpalaShell(cmd.Cmd, object):
         for rows in rows_fetched:
           # IMPALA-4418: Break out of the loop to prevent printing an unnecessary empty line.
           if len(rows) == 0:
-            break
+            continue
           self.output_stream.write(rows)
           num_rows += len(rows)
 
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 1782990..673c7c2 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -1025,3 +1025,19 @@ class TestImpalaShell(ImpalaTestSuite):
       result = run_impala_shell_cmd(vector, ['-q', query, '-B', '--fetch_size', '512'])
       result_rows = result.stdout.strip().split('\n')
       assert len(result_rows) == 1024
+
+  def test_result_spooling_timeout(self, vector):
+    """Regression test for IMPALA-9953. Validates that if a fetch timeout occurs in the
+    middle of reading rows from Impala that all rows are still printed by the Impala
+    shell."""
+    # This query was stolen from __test_fetch_timeout in test_fetch_timeout.py. The query
+    # has a large delay between RowBatch production. So a fetch timeout will occur while
+    # fetching rows.
+    query_options = "set num_nodes=1; \
+                     set fetch_rows_timeout_ms=1; \
+                     set batch_size=1; \
+                     set spool_query_results=true;"
+    query = "select bool_col, avg(id) from functional.alltypes group by bool_col"
+    result = run_impala_shell_cmd(vector, ['-q', query_options + query, '-B'])
+    result_rows = result.stdout.strip().split('\n')
+    assert len(result_rows) == 2