You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/06/02 09:37:23 UTC

[impala] branch master updated: IMPALA-11332: Fix trailing whitespace for CSV output

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new c41e6941c IMPALA-11332: Fix trailing whitespace for CSV output
c41e6941c is described below

commit c41e6941cafad453819b57e78b2083a3e64496e0
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Wed Jun 1 13:34:09 2022 -0700

    IMPALA-11332: Fix trailing whitespace for CSV output
    
    The current CSV output is stripping trailing
    whitespaces from the last line of CSV output. This
    rstrip() was intended to remove an extra newline,
    but it is matching other white space. This is a
    problem for a SQL query like:
    select 'Trailing whitespace          ';
    
    This changes the rstrip() to rstrip('\n') to
    avoid removing the other white space.
    
    Testing:
     - Current shell tests pass
     - Added a shell test that verifies trailing whitespace
       is not being stripped.
    
    Change-Id: I69d032ca2f581587b0938d0878fdf402fee0d57e
    Reviewed-on: http://gerrit.cloudera.org:8080/18580
    Reviewed-by: Quanlong Huang <hu...@gmail.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 shell/shell_output.py                 |  9 ++++++++-
 tests/shell/test_shell_commandline.py | 10 ++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/shell/shell_output.py b/shell/shell_output.py
index becc4dd06..608a8ca92 100644
--- a/shell/shell_output.py
+++ b/shell/shell_output.py
@@ -90,7 +90,14 @@ class DelimitedOutputFormatter(object):
         row = [val.encode('utf-8', 'replace') if isinstance(val, unicode) else val
             for val in row]
       writer.writerow(row)
-    rows = temp_buffer.getvalue().rstrip()
+    # The CSV writer produces an extra newline. Strip that extra newline (and
+    # only that extra newline). csv wraps newlines for data values in quotes,
+    # so rstrip will be limited to the extra newline.
+    if sys.version_info.major == 2:
+      # Python 2 is in encoded Unicode bytes, so this needs to be a bytes \n.
+      rows = temp_buffer.getvalue().rstrip(b'\n')
+    else:
+      rows = temp_buffer.getvalue().rstrip('\n')
     temp_buffer.close()
     return rows
 
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 7b410d333..6fda37a32 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -1254,3 +1254,13 @@ class TestImpalaShell(ImpalaTestSuite):
     result = run_impala_shell_cmd(vector, args + ['--http_socket_timeout_s=None'])
     assert result.stderr == ""
     assert result.stdout == "0\n"
+
+  def test_trailing_whitespace(self, vector):
+    """Test CSV output with trailing whitespace"""
+
+    # Ten trailing spaces
+    query = "select 'Trailing Whitespace          '"
+    # Only one column, no need for output_delimiter
+    output = run_impala_shell_cmd(vector, ['-q', query, '-B'])
+    assert "Fetched 1 row(s)" in output.stderr
+    assert "Trailing Whitespace          \n" in output.stdout