You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2022/06/13 17:15:25 UTC

[impala] 02/05: IMPALA-1682: Support printing the output of a query (rows) vertically.

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit c7784bde55d86a25388f5a61e64cda47a3ccaeee
Author: yx91490 <yx...@126.com>
AuthorDate: Tue Mar 29 14:19:21 2022 +0800

    IMPALA-1682: Support printing the output of a query (rows) vertically.
    
    In vertical mode, impala-shell will print each row in the format:
    firstly print a line contains line number, then print this row's columns
    line by line, each column line started with it's name and a colon.
    
    To enable it: use shell option '-E' or '--vertical', or 'set VERTICAL=
    true' in interactive mode. to disable it in interactive mode: 'set
    VERTICAL=false'. NOTICE: it will be disabled if '-B' option or 'set
    WRITE_DELIMITED=true' is specified.
    
    Tests:
    add methods in test_shell_interactive.py and test_shell_commandline.py.
    
    Change-Id: I5cee48d5a239d6b7c0f51331275524a25130fadf
    Reviewed-on: http://gerrit.cloudera.org:8080/18549
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 docs/topics/impala_shell_options.xml  | 16 ++++++++++++++++
 shell/impala_shell.py                 |  7 ++++++-
 shell/impala_shell_config_defaults.py |  1 +
 shell/option_parser.py                |  4 ++++
 shell/shell_output.py                 | 36 ++++++++++++++++++++++++++++-------
 tests/shell/test_shell_commandline.py | 18 +++++++++++++-----
 tests/shell/test_shell_interactive.py | 13 +++++++++++++
 7 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/docs/topics/impala_shell_options.xml b/docs/topics/impala_shell_options.xml
index 87496f465..d6660832c 100644
--- a/docs/topics/impala_shell_options.xml
+++ b/docs/topics/impala_shell_options.xml
@@ -188,6 +188,22 @@ under the License.
                 </p>
               </entry>
             </row>
+            <row>
+              <entry>
+                <p> -E or </p>
+                <p>--vertical </p>
+              </entry>
+              <entry rev="2.0.0">
+                <p>
+                  vertical=true
+                </p>
+              </entry>
+              <entry>
+                <p>
+                  Print the output of a query (rows) vertically, no effects if <codeph>-B</codeph> is specified. Added in Impala 4.2.
+                </p>
+              </entry>
+            </row>
             <row>
               <entry>
                 <p> -p or </p>
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 6d7b1e59d..735f2354a 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -44,7 +44,7 @@ from impala_client import ImpalaHS2Client, StrictHS2Client, \
 from impala_shell_config_defaults import impala_shell_defaults
 from option_parser import get_option_parser, get_config_from_file
 from shell_output import (DelimitedOutputFormatter, OutputStream, PrettyOutputFormatter,
-                          OverwritingStdErrOutputStream)
+                          OverwritingStdErrOutputStream, VerticalOutputFormatter)
 from subprocess import call
 from shell_exceptions import (RPCException, DisconnectedException, QueryStateException,
     QueryCancelledByShellException, MissingThriftMethodException)
@@ -167,6 +167,7 @@ class ImpalaShell(cmd.Cmd, object):
     'VERBOSE' : (lambda x: x in ImpalaShell.TRUE_STRINGS, "verbose"),
     'DELIMITER' : (lambda x: " " if x == '\\s' else x, "output_delimiter"),
     'OUTPUT_FILE' : (lambda x: None if x == '' else x, "output_file"),
+    'VERTICAL': (lambda x: x in ImpalaShell.TRUE_STRINGS, "vertical"),
   }
 
   # Minimum time in seconds between two calls to get the exec summary.
@@ -219,6 +220,7 @@ class ImpalaShell(cmd.Cmd, object):
         else options.output_delimiter
     self.write_delimited = options.write_delimited
     self.print_header = options.print_header
+    self.vertical = options.vertical
 
     self.progress_stream = OverwritingStdErrOutputStream()
 
@@ -1200,6 +1202,9 @@ class ImpalaShell(cmd.Cmd, object):
       # print the column names
       if self.print_header:
         self.output_stream.write([column_names])
+    elif self.vertical:
+      formatter = VerticalOutputFormatter(column_names)
+      self.output_stream = OutputStream(formatter, filename=self.output_file)
     else:
       prettytable = self.construct_table_with_header(column_names)
       formatter = PrettyOutputFormatter(prettytable)
diff --git a/shell/impala_shell_config_defaults.py b/shell/impala_shell_config_defaults.py
index dbaa2b62b..513a16af7 100644
--- a/shell/impala_shell_config_defaults.py
+++ b/shell/impala_shell_config_defaults.py
@@ -41,6 +41,7 @@ impala_shell_defaults = {
             'output_delimiter': '\\t',
             'output_file': None,
             'print_header': False,
+            'vertical': False,
             'live_progress': True,  # The option only applies to interactive shell session
             'live_summary': False,
             'query': None,
diff --git a/shell/option_parser.py b/shell/option_parser.py
index d5274d3b8..beee7f412 100755
--- a/shell/option_parser.py
+++ b/shell/option_parser.py
@@ -191,6 +191,10 @@ def get_option_parser(defaults):
                     action="store_true",
                     help="Print column names in delimited mode"
                          " when pretty-printed.")
+  parser.add_option("-E", "--vertical",
+                    dest="vertical",
+                    action="store_true",
+                    help="Print the output of a query (rows) vertically.")
   parser.add_option("--output_delimiter", dest="output_delimiter",
                     help="Field delimiter to use for output in delimited mode")
   parser.add_option("-s", "--kerberos_service_name",
diff --git a/shell/shell_output.py b/shell/shell_output.py
index 608a8ca92..371cb911d 100644
--- a/shell/shell_output.py
+++ b/shell/shell_output.py
@@ -70,7 +70,8 @@ class DelimitedOutputFormatter(object):
         field_delim_bytes = bytearray(field_delim, 'utf-8')
         self.field_delim = field_delim_bytes.decode('unicode_escape')
       else:
-        self.field_delim = field_delim.decode('unicode_escape')
+        # csv.writer in python2 requires an ascii string delimiter
+        self.field_delim = field_delim.decode('unicode_escape').encode('ascii', 'ignore')
       # IMPALA-8652, the delimiter should be a 1-character string and verified already
       assert len(self.field_delim) == 1
 
@@ -78,12 +79,7 @@ class DelimitedOutputFormatter(object):
     """Returns string containing UTF-8-encoded representation of the table data."""
     # csv.writer expects a file handle to the input.
     temp_buffer = StringIO()
-    if sys.version_info.major == 2:
-      # csv.writer in python2 requires an ascii string delimiter
-      delim = self.field_delim.encode('ascii', 'ignore')
-    else:
-      delim = self.field_delim
-    writer = csv.writer(temp_buffer, delimiter=delim,
+    writer = csv.writer(temp_buffer, delimiter=self.field_delim,
                         lineterminator='\n', quoting=csv.QUOTE_MINIMAL)
     for row in rows:
       if sys.version_info.major == 2:
@@ -102,6 +98,32 @@ class DelimitedOutputFormatter(object):
     return rows
 
 
+class VerticalOutputFormatter(DelimitedOutputFormatter):
+  def __init__(self, column_names):
+    DelimitedOutputFormatter.__init__(self, field_delim="\n")
+    self.column_names = column_names
+    self.column_name_max_len = max([len(s) for s in column_names])
+
+  def format(self, rows):
+    """Returns string containing UTF-8-encoded representation of the table data."""
+    # csv.writer expects a file handle to the input.
+    temp_buffer = StringIO()
+    writer = csv.writer(temp_buffer, delimiter=self.field_delim,
+                        lineterminator='\n', quoting=csv.QUOTE_MINIMAL)
+    for r, row in enumerate(rows):
+      if sys.version_info.major == 2:
+        row = [val.encode('utf-8', 'replace') if isinstance(val, unicode) else val
+            for val in row]
+      writer.writerow(["************************************** " +
+        str(r + 1) + ".row **************************************"])
+      for c, val in enumerate(row):
+        row[c] = self.column_names[c].rjust(self.column_name_max_len) + ": " + val
+      writer.writerow(row)
+    rows = temp_buffer.getvalue().rstrip()
+    temp_buffer.close()
+    return rows
+
+
 class OutputStream(object):
   def __init__(self, formatter, filename=None):
     """Helper class for writing query output.
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 6fda37a32..7f146855e 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -297,17 +297,25 @@ class TestImpalaShell(ImpalaTestSuite):
         result.stderr
 
   def test_output_format(self, vector):
-    expected_output = ['1'] * 3
-    args = ['-q', 'select 1,1,1', '-B', '--quiet']
-    result = run_impala_shell_cmd(vector, args)
+    expected_output = ['1', '2', '3']
+    args = ['-q', 'select 1 as col_00001, 2 as col_2, 3 as col_03', '--quiet']
+    result = run_impala_shell_cmd(vector, args + ['-B'])
     actual_output = [r.strip() for r in result.stdout.split('\t')]
     assert actual_output == expected_output
-    result = run_impala_shell_cmd(vector, args + ['--output_delim=|'])
+    result = run_impala_shell_cmd(vector, args + ['-B', '--output_delim=|'])
     actual_output = [r.strip() for r in result.stdout.split('|')]
     assert actual_output == expected_output
-    result = run_impala_shell_cmd(vector, args + ['--output_delim=||'],
+    result = run_impala_shell_cmd(vector, args + ['-B', '--output_delim=||'],
                                   expect_success=False)
     assert "Illegal delimiter" in result.stderr
+    result = run_impala_shell_cmd(vector, args + ['-E'])
+    result_rows = result.stdout.strip().split('\n')
+    assert len(result_rows) == 4
+    assert "************************************** " \
+      "1.row **************************************" == result_rows[0]
+    assert "col_00001: 1" == result_rows[1]
+    assert "    col_2: 2" == result_rows[2]
+    assert "   col_03: 3" == result_rows[3]
 
   def test_do_methods(self, vector, empty_table):
     """Ensure that the do_ methods in the shell work.
diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py
index cd2db996b..300b042e6 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -254,6 +254,19 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
     result = p.get_result()
     assert "21,VIETNAM,2" in result.stdout
 
+  def test_vertical(self, vector):
+    """Test output rows in vertical mode"""
+    p = ImpalaShell(vector)
+    p.send_cmd("use tpch")
+    p.send_cmd("set vertical=true")
+    p.send_cmd("select N_NATIONKEY, N_NAME from nation limit 1")
+    result = p.get_result()
+    assert "+----------------+" not in result.stdout
+    assert "************************************** " \
+      "1.row **************************************" in result.stdout, result.stdout
+    assert "n_nationkey: " in result.stdout, result.stdout
+    assert "n_name: " in result.stdout, result.stdout
+
   @pytest.mark.execute_serially
   def test_print_to_file(self, vector):
     """Test print to output file and unset"""