You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/08/08 23:37:05 UTC

[impala] 08/27: IMPALA-11332: Fix trailing whitespace for CSV output

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch branch-4.1.1
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f7f1909120f5b962b7ed14646d0d9d7dd5b5eb91
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Wed Jun 1 13:34:09 2022 -0700

    IMPALA-11332: Fix trailing whitespace for CSV output
    
    The current CSV output is stripping trailing
    whitespaces from the last line of CSV output. This
    rstrip() was intended to remove an extra newline,
    but it is matching other white space. This is a
    problem for a SQL query like:
    select 'Trailing whitespace          ';
    
    This changes the rstrip() to rstrip('\n') to
    avoid removing the other white space.
    
    Testing:
     - Current shell tests pass
     - Added a shell test that verifies trailing whitespace
       is not being stripped.
    
    Change-Id: I69d032ca2f581587b0938d0878fdf402fee0d57e
    Reviewed-on: http://gerrit.cloudera.org:8080/18580
    Reviewed-by: Quanlong Huang <hu...@gmail.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 shell/shell_output.py                 |  9 ++++++++-
 tests/shell/test_shell_commandline.py | 10 ++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/shell/shell_output.py b/shell/shell_output.py
index becc4dd06..608a8ca92 100644
--- a/shell/shell_output.py
+++ b/shell/shell_output.py
@@ -90,7 +90,14 @@ class DelimitedOutputFormatter(object):
         row = [val.encode('utf-8', 'replace') if isinstance(val, unicode) else val
             for val in row]
       writer.writerow(row)
-    rows = temp_buffer.getvalue().rstrip()
+    # The CSV writer produces an extra newline. Strip that extra newline (and
+    # only that extra newline). csv wraps newlines for data values in quotes,
+    # so rstrip will be limited to the extra newline.
+    if sys.version_info.major == 2:
+      # Python 2 is in encoded Unicode bytes, so this needs to be a bytes \n.
+      rows = temp_buffer.getvalue().rstrip(b'\n')
+    else:
+      rows = temp_buffer.getvalue().rstrip('\n')
     temp_buffer.close()
     return rows
 
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index c8393ecc4..1cfd90746 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -1240,3 +1240,13 @@ class TestImpalaShell(ImpalaTestSuite):
     result = run_impala_shell_cmd(vector, args + ['--http_socket_timeout_s=None'])
     assert result.stderr == ""
     assert result.stdout == "0\n"
+
+  def test_trailing_whitespace(self, vector):
+    """Test CSV output with trailing whitespace"""
+
+    # Ten trailing spaces
+    query = "select 'Trailing Whitespace          '"
+    # Only one column, no need for output_delimiter
+    output = run_impala_shell_cmd(vector, ['-q', query, '-B'])
+    assert "Fetched 1 row(s)" in output.stderr
+    assert "Trailing Whitespace          \n" in output.stdout