You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2020/07/22 23:30:11 UTC
[impala] 01/02: IMPALA-9953: Shell should continue fetching even
when 0 rows are returned
This is an automated email from the ASF dual-hosted git repository.
stakiar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit ea95691b775ef0edd032a2590a119e6841cc2129
Author: Sahil Takiar <ta...@gmail.com>
AuthorDate: Mon Jul 20 12:35:26 2020 -0700
IMPALA-9953: Shell should continue fetching even when 0 rows are returned
The Impala shell stops fetching rows if it receives a batch that
contains 0 rows. This is incorrect because a batch with 0 rows can be
returned if the fetch request hits a timeout. Instead, the shell should
rely on the value of has_rows / hasMoreRows to determine when to stop
issuing fetch requests.
Tests:
* Added a regression test to test_shell_commandline.py
* Ran all shell tests
Change-Id: I5f8527aea9e433f8cf426435c0ba41355bbf9d88
Reviewed-on: http://gerrit.cloudera.org:8080/16222
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
shell/impala_shell.py | 2 +-
tests/shell/test_shell_commandline.py | 16 ++++++++++++++++
2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index e0d8026..8c18408 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -1183,7 +1183,7 @@ class ImpalaShell(cmd.Cmd, object):
for rows in rows_fetched:
# IMPALA-4418: Break out of the loop to prevent printing an unnecessary empty line.
if len(rows) == 0:
- break
+ continue
self.output_stream.write(rows)
num_rows += len(rows)
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 1782990..673c7c2 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -1025,3 +1025,19 @@ class TestImpalaShell(ImpalaTestSuite):
result = run_impala_shell_cmd(vector, ['-q', query, '-B', '--fetch_size', '512'])
result_rows = result.stdout.strip().split('\n')
assert len(result_rows) == 1024
+
+ def test_result_spooling_timeout(self, vector):
+ """Regression test for IMPALA-9953. Validates that if a fetch timeout occurs in the
+ middle of reading rows from Impala that all rows are still printed by the Impala
+ shell."""
+ # This query was stolen from __test_fetch_timeout in test_fetch_timeout.py. The query
+ # has a large delay between RowBatch production. So a fetch timeout will occur while
+ # fetching rows.
+ query_options = "set num_nodes=1; \
+ set fetch_rows_timeout_ms=1; \
+ set batch_size=1; \
+ set spool_query_results=true;"
+ query = "select bool_col, avg(id) from functional.alltypes group by bool_col"
+ result = run_impala_shell_cmd(vector, ['-q', query_options + query, '-B'])
+ result_rows = result.stdout.strip().split('\n')
+ assert len(result_rows) == 2