You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2017/11/03 15:15:42 UTC

[2/5] incubator-impala git commit: IMPALA-5736: Add impala-shell argument to set default query options

IMPALA-5736: Add impala-shell argument to set default query options

Query options can be set from command line and impala rc as
key=value pairs, where key is case insensitive.

Examples:
command line:
impala-shell.sh -Q MT_DOP=1 --query_option=MAX_ERRORS=200

.impalarc:
[impala.query_options]
EXPLAIN_LEVEL=2
MT_DOP=2

The options set in command line will update the ones
in impalarc one by one, so the result of the example
above will be:
EXPLAIN_LEVEL=2
MT_DOP=1
MAX_ERRORS=200

Additional changes:
- 0 and 1 are accepted as bools in section [impala] to
  make it more consistent with [impala.query_options]
- options that are expected to be bool but are not
  0/1/true/false lead to error instead of warning

Change-Id: I26a3b67230c80a99bd246b6af205d558fec9a986
Reviewed-on: http://gerrit.cloudera.org:8080/8038
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0a0affb6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0a0affb6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0a0affb6

Branch: refs/heads/master
Commit: 0a0affb692fa4dfe27bdcc233312c8665f51b55b
Parents: eeb3242
Author: Csaba Ringhofer <cs...@cloudera.com>
Authored: Tue Sep 12 17:42:43 2017 +0200
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Nov 3 00:11:31 2017 +0000

----------------------------------------------------------------------
 bin/rat_exclude_files.txt               |   3 +
 shell/impala_shell.py                   |  44 +++++++---
 shell/option_parser.py                  | 125 +++++++++++++++++++--------
 tests/shell/impalarc_with_error         |   2 +
 tests/shell/impalarc_with_query_options |   6 ++
 tests/shell/impalarc_with_warnings      |   3 +
 tests/shell/test_shell_commandline.py   |  14 +++
 tests/shell/test_shell_interactive.py   |  12 +++
 8 files changed, 161 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/bin/rat_exclude_files.txt
----------------------------------------------------------------------
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index e356a9e..73cd073 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -110,6 +110,9 @@ testdata/hive_benchmark/grepTiny/part-00000
 tests/pytest.ini
 tests/shell/bad_impalarc
 tests/shell/good_impalarc
+tests/shell/impalarc_with_error
+tests/shell/impalarc_with_query_options
+tests/shell/impalarc_with_warnings
 tests/shell/shell.cmds
 tests/shell/shell2.cmds
 tests/shell/shell_error.cmds

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/shell/impala_shell.py
----------------------------------------------------------------------
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 502a1cf..641060f 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -120,7 +120,7 @@ class ImpalaShell(object, cmd.Cmd):
   # Minimum time in seconds between two calls to get the exec summary.
   PROGRESS_UPDATE_INTERVAL = 1.0
 
-  def __init__(self, options):
+  def __init__(self, options, query_options):
     cmd.Cmd.__init__(self)
     self.is_alive = True
 
@@ -156,7 +156,7 @@ class ImpalaShell(object, cmd.Cmd):
 
     self.progress_stream = OverwritingStdErrOutputStream()
 
-    self.set_query_options = {}
+    self.set_query_options = query_options
     self.set_variables = options.variables
 
     self._populate_command_list()
@@ -683,11 +683,16 @@ class ImpalaShell(object, cmd.Cmd):
     self.partial_cmd = str()
     # Check if any of query options set by the user are inconsistent
     # with the impalad being connected to
-    for set_option in self.set_query_options:
+
+    # Use a temporary to avoid changing set_query_options during iteration.
+    new_query_options = {}
+    for set_option, value in self.set_query_options.iteritems():
       if set_option not in set(self.imp_client.default_query_options):
         print ('%s is not supported for the impalad being '
                'connected to, ignoring.' % set_option)
-        del self.set_query_options[set_option]
+      else:
+        new_query_options[set_option] = value
+    self.set_query_options = new_query_options
 
   def _connect(self):
     try:
@@ -1308,7 +1313,7 @@ def parse_variables(keyvals):
         vars[match.groups()[0].upper()] = match.groups()[1]
   return vars
 
-def execute_queries_non_interactive_mode(options):
+def execute_queries_non_interactive_mode(options, query_options):
   """Run queries in non-interactive mode."""
   if options.query_file:
     try:
@@ -1328,12 +1333,19 @@ def execute_queries_non_interactive_mode(options):
     return
 
   queries = parse_query_text(query_text)
-  shell = ImpalaShell(options)
+  shell = ImpalaShell(options, query_options)
   if not (shell.execute_query_list(shell.cmdqueue) and
           shell.execute_query_list(queries)):
     sys.exit(1)
 
 if __name__ == "__main__":
+  """
+  There are two types of options: shell options and query_options. Both can be set in the
+  command line, which override the options set in config file (.impalarc). The default
+  shell options come from impala_shell_config_defaults.py. Query options have no defaults
+  within the impala-shell, but they do have defaults on the server. Query options can be
+  also changed in impala-shell with the 'set' command.
+  """
   # pass defaults into option parser
   parser = get_option_parser(impala_shell_defaults)
   options, args = parser.parse_args()
@@ -1351,13 +1363,15 @@ if __name__ == "__main__":
     print_to_stderr('%s not found.\n' % user_config)
     sys.exit(1)
 
-  # default options loaded in from impala_shell_config_defaults.py
+  query_options = {}
+
+  # default shell options loaded in from impala_shell_config_defaults.py
   # options defaults overwritten by those in config file
   try:
-    impala_shell_defaults.update(get_config_from_file(config_to_load))
+    loaded_shell_options, query_options = get_config_from_file(config_to_load)
+    impala_shell_defaults.update(loaded_shell_options)
   except Exception, e:
-    msg = "Unable to read configuration file correctly. Check formatting: %s\n" % e
-    print_to_stderr(msg)
+    print_to_stderr(e)
     sys.exit(1)
 
   parser = get_option_parser(impala_shell_defaults)
@@ -1436,12 +1450,17 @@ if __name__ == "__main__":
       sys.exit(1)
 
   options.variables = parse_variables(options.keyval)
+
+  # Override query_options from config file with those specified on the command line.
+  query_options.update(
+     [(k.upper(), v) for k, v in parse_variables(options.query_options).items()])
+
   if options.query or options.query_file:
     if options.print_progress or options.print_summary:
       print_to_stderr("Error: Live reporting is available for interactive mode only.")
       sys.exit(1)
 
-    execute_queries_non_interactive_mode(options)
+    execute_queries_non_interactive_mode(options, query_options)
     sys.exit(0)
 
   intro = WELCOME_STRING
@@ -1451,7 +1470,8 @@ if __name__ == "__main__":
 
   if options.refresh_after_connect:
     intro += REFRESH_AFTER_CONNECT_DEPRECATION_WARNING
-  shell = ImpalaShell(options)
+
+  shell = ImpalaShell(options, query_options)
   while shell.is_alive:
     try:
       try:

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/shell/option_parser.py
----------------------------------------------------------------------
diff --git a/shell/option_parser.py b/shell/option_parser.py
index f0f3573..a1c37d2 100755
--- a/shell/option_parser.py
+++ b/shell/option_parser.py
@@ -19,56 +19,101 @@
 
 # Example .impalarc file:
 #
-# [Options]
+# [impala]
 # impalad=localhost:21002
 # verbose=false
 # refresh_after_connect=true
 #
+# [impala.query_options]
+# EXPLAIN_LEVEL=2
+# MT_DOP=2
 
 import ConfigParser
 import sys
 from impala_shell_config_defaults import impala_shell_defaults
 from optparse import OptionParser
 
+class ConfigFileFormatError(Exception):
+  """Raised when the config file cannot be read by ConfigParser."""
+  pass
+
+class InvalidOptionValueError(Exception):
+  """Raised when an option contains an invalid value."""
+  pass
+
+def parse_bool_option(value):
+  """Returns True for '1' and 'True', and False for '0' and 'False'.
+     Throws ValueError for other values.
+  """
+  if value.lower() in ["true", "1"]:
+    return True
+  elif value.lower() in ["false", "0"]:
+    return False
+  else:
+    raise InvalidOptionValueError("Unexpected value in configuration file. '" + value \
+      + "' is not a valid value for a boolean option.")
+
+def parse_shell_options(options, defaults):
+  """Filters unknown options and converts some values from string to their corresponding
+     python types (booleans and None).
+
+     Returns a dictionary with option names as keys and option values as values.
+  """
+  result = {}
+  for option, value in options:
+    if option not in defaults:
+      print >> sys.stderr, "WARNING: Unable to read configuration file correctly. " \
+        "Ignoring unrecognized config option: '%s'\n" % option
+    elif isinstance(defaults[option], bool):
+      result[option] = parse_bool_option(value)
+    elif value.lower() == "none":
+      result[option] = None
+    else:
+      result[option] = value
+  return result
+
 def get_config_from_file(config_filename):
   """Reads contents of configuration file
 
-  Validates some values (False, True, None)
-  because ConfigParser reads values as strings
+  Two config sections are supported:
+  "[impala]":
+  Overrides the defaults of the shell arguments. Unknown options are filtered
+  and some values are converted from string to their corresponding python types
+  (booleans and None).
 
+  Setting 'config_filename' in the config file would have no effect,
+  so its original value is kept.
+
+  "[impala.query_options]"
+  Overrides the defaults of the query options. Not validated here,
+  because validation will take place after connecting to impalad.
+
+  Returns a pair of dictionaries (shell_options, query_options), with option names
+  as keys and option values as values.
   """
 
   config = ConfigParser.ConfigParser()
-  config.read(config_filename)
-  section_title = "impala"
-  if config.has_section(section_title):
-    loaded_options = config.items(section_title);
-
-    for i, (option, value) in enumerate(loaded_options):
-      if option not in impala_shell_defaults:
-        print >> sys.stderr, "WARNING: Unable to read configuration file correctly. " \
-          "Check formatting: '%s'\n" % option;
-        continue
-
-      if impala_shell_defaults[option] in [True, False]:
-        # validate the option if it can only be a boolean value
-        # the only choice for these options is true or false
-        if value.lower() == "true":
-          loaded_options[i] = (option, True)
-        elif value.lower() == 'false':
-          loaded_options[i] = (option, False)
-        else:
-          # if the option is not set to either true or false, use the default
-          loaded_options[i] = (option, impala_shell_defaults[option])
-      elif value.lower() == "none":
-        loaded_options[i] = (option, None)
-      elif option.lower() == "config_file":
-        loaded_options[i] = (option, config_filename)
-      else:
-        loaded_options[i] = (option, value)
-  else:
-    loaded_options = [];
-  return loaded_options
+  try:
+    config.read(config_filename)
+  except Exception, e:
+    raise ConfigFileFormatError( \
+      "Unable to read configuration file correctly. Check formatting: %s" % e)
+
+  shell_options = {}
+  if config.has_section("impala"):
+    shell_options = parse_shell_options(config.items("impala"), impala_shell_defaults)
+    if "config_file" in shell_options:
+      print >> sys.stderr, "WARNING: Option 'config_file' can be only set from shell."
+      shell_options["config_file"] = config_filename
+
+  query_options = {}
+  if config.has_section("impala.query_options"):
+    # Query option keys must be "normalized" to upper case before updating with
+    # options coming from command line.
+    query_options = dict( \
+      [ (k.upper(), v) for k, v in config.items("impala.query_options") ])
+
+  return shell_options, query_options
 
 def get_option_parser(defaults):
   """Creates OptionParser and adds shell options (flags)
@@ -142,9 +187,10 @@ def get_option_parser(defaults):
                     "certificate"))
   parser.add_option("--config_file", dest="config_file",
                     help=("Specify the configuration file to load options. "
-                          "File must have case-sensitive '[impala]' header. "
+                          "The following sections are used: [impala], "
+                          "[impala.query_options]. Section names are case sensitive. "
                           "Specifying this option within a config file will have "
-                          "no effect. Only specify this as a option in the commandline."
+                          "no effect. Only specify this as an option in the commandline."
                           ))
   parser.add_option("--live_summary", dest="print_summary", action="store_true",
                     help="Print a query summary every 1s while the query is running.")
@@ -158,10 +204,17 @@ def get_option_parser(defaults):
   parser.add_option("--ldap_password_cmd",
                     help="Shell command to run to retrieve the LDAP password")
   parser.add_option("--var", dest="keyval", action="append",
-                    help="Define variable(s) to be used within the Impala session."
+                    help="Defines a variable to be used within the Impala session."
+                         " Can be used multiple times to set different variables."
                          " It must follow the pattern \"KEY=VALUE\","
                          " KEY starts with an alphabetic character and"
                          " contains alphanumeric characters or underscores.")
+  parser.add_option("-Q", "--query_option", dest="query_options", action="append",
+                    help="Sets the default for a query option."
+                         " Can be used multiple times to set different query options."
+                         " It must follow the pattern \"KEY=VALUE\","
+                         " KEY must be a valid query option. Valid query options "
+                         " can be listed by command 'set'.")
 
   # add default values to the help text
   for option in parser.option_list:

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/tests/shell/impalarc_with_error
----------------------------------------------------------------------
diff --git a/tests/shell/impalarc_with_error b/tests/shell/impalarc_with_error
new file mode 100644
index 0000000..569827c
--- /dev/null
+++ b/tests/shell/impalarc_with_error
@@ -0,0 +1,2 @@
+[impala]
+ignore_query_failure=maybe

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/tests/shell/impalarc_with_query_options
----------------------------------------------------------------------
diff --git a/tests/shell/impalarc_with_query_options b/tests/shell/impalarc_with_query_options
new file mode 100644
index 0000000..2ae2129
--- /dev/null
+++ b/tests/shell/impalarc_with_query_options
@@ -0,0 +1,6 @@
+[impala.query_options]
+EXPLAIN_LEVEL=1
+explain_LEVEL=2
+MT_DOP=2
+invalid_query_option=1
+

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/tests/shell/impalarc_with_warnings
----------------------------------------------------------------------
diff --git a/tests/shell/impalarc_with_warnings b/tests/shell/impalarc_with_warnings
new file mode 100644
index 0000000..0c2f695
--- /dev/null
+++ b/tests/shell/impalarc_with_warnings
@@ -0,0 +1,3 @@
+[impala]
+config_file=a
+invalid_option=a

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/tests/shell/test_shell_commandline.py
----------------------------------------------------------------------
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 0602e77..218224b 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -376,6 +376,20 @@ class TestImpalaShell(ImpalaTestSuite):
     args = '--config_file=%s/bad_impalarc' % QUERY_FILE_PATH
     run_impala_shell_cmd(args, expect_success=False)
 
+    # Testing config file related warning and error messages
+    args = '--config_file=%s/impalarc_with_warnings' % QUERY_FILE_PATH
+    result = run_impala_shell_cmd(args, expect_success=True)
+    assert "WARNING: Option 'config_file' can be only set from shell." in result.stderr
+    err_msg = ("WARNING: Unable to read configuration file correctly. "
+               "Ignoring unrecognized config option: 'invalid_option'\n")
+    assert  err_msg in result.stderr
+
+    args = '--config_file=%s/impalarc_with_error' % QUERY_FILE_PATH
+    result = run_impala_shell_cmd(args, expect_success=False)
+    err_msg = ("Unexpected value in configuration file. "
+               "'maybe' is not a valid value for a boolean option.")
+    assert  err_msg in result.stderr
+
   def test_execute_queries_from_stdin(self):
     """Test that queries get executed correctly when STDIN is given as the sql file."""
     args = '-f - --quiet -B'

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/tests/shell/test_shell_interactive.py
----------------------------------------------------------------------
diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py
index c37c2be..316e8c0 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -289,6 +289,18 @@ class TestImpalaShellInteractive(object):
     assert_var_substitution(result)
 
   @pytest.mark.execute_serially
+  def test_query_option_configuration(self):
+    rcfile_path = os.path.join(QUERY_FILE_PATH, 'impalarc_with_query_options')
+    args = '-Q MT_dop=1 --query_option=MAX_ERRORS=200 --config_file="%s"' % rcfile_path
+    cmds = "set;"
+    result = run_impala_shell_interactive(cmds, shell_args=args)
+    assert "\tMT_DOP: 1" in result.stdout
+    assert "\tMAX_ERRORS: 200" in result.stdout
+    assert "\tEXPLAIN_LEVEL: 2" in result.stdout
+    assert "INVALID_QUERY_OPTION is not supported for the impalad being "
+    "connected to, ignoring." in result.stdout
+
+  @pytest.mark.execute_serially
   def test_source_file(self):
     cwd = os.getcwd()
     try: