You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2017/11/03 15:15:42 UTC
[2/5] incubator-impala git commit: IMPALA-5736: Add impala-shell
argument to set default query options
IMPALA-5736: Add impala-shell argument to set default query options
Query options can be set from command line and impala rc as
key=value pairs, where key is case insensitive.
Examples:
command line:
impala-shell.sh -Q MT_DOP=1 --query_option=MAX_ERRORS=200
.impalarc:
[impala.query_options]
EXPLAIN_LEVEL=2
MT_DOP=2
The options set in command line will update the ones
in impalarc one by one, so the result of the example
above will be:
EXPLAIN_LEVEL=2
MT_DOP=1
MAX_ERRORS=200
Additional changes:
- 0 and 1 are accepted as bools in section [impala] to
make it more consistent with [impala.query_options]
- options that are expected to be bool but are not
0/1/true/false lead to error instead of warning
Change-Id: I26a3b67230c80a99bd246b6af205d558fec9a986
Reviewed-on: http://gerrit.cloudera.org:8080/8038
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0a0affb6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0a0affb6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0a0affb6
Branch: refs/heads/master
Commit: 0a0affb692fa4dfe27bdcc233312c8665f51b55b
Parents: eeb3242
Author: Csaba Ringhofer <cs...@cloudera.com>
Authored: Tue Sep 12 17:42:43 2017 +0200
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Nov 3 00:11:31 2017 +0000
----------------------------------------------------------------------
bin/rat_exclude_files.txt | 3 +
shell/impala_shell.py | 44 +++++++---
shell/option_parser.py | 125 +++++++++++++++++++--------
tests/shell/impalarc_with_error | 2 +
tests/shell/impalarc_with_query_options | 6 ++
tests/shell/impalarc_with_warnings | 3 +
tests/shell/test_shell_commandline.py | 14 +++
tests/shell/test_shell_interactive.py | 12 +++
8 files changed, 161 insertions(+), 48 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/bin/rat_exclude_files.txt
----------------------------------------------------------------------
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index e356a9e..73cd073 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -110,6 +110,9 @@ testdata/hive_benchmark/grepTiny/part-00000
tests/pytest.ini
tests/shell/bad_impalarc
tests/shell/good_impalarc
+tests/shell/impalarc_with_error
+tests/shell/impalarc_with_query_options
+tests/shell/impalarc_with_warnings
tests/shell/shell.cmds
tests/shell/shell2.cmds
tests/shell/shell_error.cmds
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/shell/impala_shell.py
----------------------------------------------------------------------
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 502a1cf..641060f 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -120,7 +120,7 @@ class ImpalaShell(object, cmd.Cmd):
# Minimum time in seconds between two calls to get the exec summary.
PROGRESS_UPDATE_INTERVAL = 1.0
- def __init__(self, options):
+ def __init__(self, options, query_options):
cmd.Cmd.__init__(self)
self.is_alive = True
@@ -156,7 +156,7 @@ class ImpalaShell(object, cmd.Cmd):
self.progress_stream = OverwritingStdErrOutputStream()
- self.set_query_options = {}
+ self.set_query_options = query_options
self.set_variables = options.variables
self._populate_command_list()
@@ -683,11 +683,16 @@ class ImpalaShell(object, cmd.Cmd):
self.partial_cmd = str()
# Check if any of query options set by the user are inconsistent
# with the impalad being connected to
- for set_option in self.set_query_options:
+
+ # Use a temporary to avoid changing set_query_options during iteration.
+ new_query_options = {}
+ for set_option, value in self.set_query_options.iteritems():
if set_option not in set(self.imp_client.default_query_options):
print ('%s is not supported for the impalad being '
'connected to, ignoring.' % set_option)
- del self.set_query_options[set_option]
+ else:
+ new_query_options[set_option] = value
+ self.set_query_options = new_query_options
def _connect(self):
try:
@@ -1308,7 +1313,7 @@ def parse_variables(keyvals):
vars[match.groups()[0].upper()] = match.groups()[1]
return vars
-def execute_queries_non_interactive_mode(options):
+def execute_queries_non_interactive_mode(options, query_options):
"""Run queries in non-interactive mode."""
if options.query_file:
try:
@@ -1328,12 +1333,19 @@ def execute_queries_non_interactive_mode(options):
return
queries = parse_query_text(query_text)
- shell = ImpalaShell(options)
+ shell = ImpalaShell(options, query_options)
if not (shell.execute_query_list(shell.cmdqueue) and
shell.execute_query_list(queries)):
sys.exit(1)
if __name__ == "__main__":
+ """
+ There are two types of options: shell options and query_options. Both can be set in the
+ command line, which override the options set in config file (.impalarc). The default
+ shell options come from impala_shell_config_defaults.py. Query options have no defaults
+ within the impala-shell, but they do have defaults on the server. Query options can be
+ also changed in impala-shell with the 'set' command.
+ """
# pass defaults into option parser
parser = get_option_parser(impala_shell_defaults)
options, args = parser.parse_args()
@@ -1351,13 +1363,15 @@ if __name__ == "__main__":
print_to_stderr('%s not found.\n' % user_config)
sys.exit(1)
- # default options loaded in from impala_shell_config_defaults.py
+ query_options = {}
+
+ # default shell options loaded in from impala_shell_config_defaults.py
# options defaults overwritten by those in config file
try:
- impala_shell_defaults.update(get_config_from_file(config_to_load))
+ loaded_shell_options, query_options = get_config_from_file(config_to_load)
+ impala_shell_defaults.update(loaded_shell_options)
except Exception, e:
- msg = "Unable to read configuration file correctly. Check formatting: %s\n" % e
- print_to_stderr(msg)
+ print_to_stderr(e)
sys.exit(1)
parser = get_option_parser(impala_shell_defaults)
@@ -1436,12 +1450,17 @@ if __name__ == "__main__":
sys.exit(1)
options.variables = parse_variables(options.keyval)
+
+ # Override query_options from config file with those specified on the command line.
+ query_options.update(
+ [(k.upper(), v) for k, v in parse_variables(options.query_options).items()])
+
if options.query or options.query_file:
if options.print_progress or options.print_summary:
print_to_stderr("Error: Live reporting is available for interactive mode only.")
sys.exit(1)
- execute_queries_non_interactive_mode(options)
+ execute_queries_non_interactive_mode(options, query_options)
sys.exit(0)
intro = WELCOME_STRING
@@ -1451,7 +1470,8 @@ if __name__ == "__main__":
if options.refresh_after_connect:
intro += REFRESH_AFTER_CONNECT_DEPRECATION_WARNING
- shell = ImpalaShell(options)
+
+ shell = ImpalaShell(options, query_options)
while shell.is_alive:
try:
try:
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/shell/option_parser.py
----------------------------------------------------------------------
diff --git a/shell/option_parser.py b/shell/option_parser.py
index f0f3573..a1c37d2 100755
--- a/shell/option_parser.py
+++ b/shell/option_parser.py
@@ -19,56 +19,101 @@
# Example .impalarc file:
#
-# [Options]
+# [impala]
# impalad=localhost:21002
# verbose=false
# refresh_after_connect=true
#
+# [impala.query_options]
+# EXPLAIN_LEVEL=2
+# MT_DOP=2
import ConfigParser
import sys
from impala_shell_config_defaults import impala_shell_defaults
from optparse import OptionParser
+class ConfigFileFormatError(Exception):
+ """Raised when the config file cannot be read by ConfigParser."""
+ pass
+
+class InvalidOptionValueError(Exception):
+ """Raised when an option contains an invalid value."""
+ pass
+
+def parse_bool_option(value):
+ """Returns True for '1' and 'True', and False for '0' and 'False'.
+ Throws ValueError for other values.
+ """
+ if value.lower() in ["true", "1"]:
+ return True
+ elif value.lower() in ["false", "0"]:
+ return False
+ else:
+ raise InvalidOptionValueError("Unexpected value in configuration file. '" + value \
+ + "' is not a valid value for a boolean option.")
+
+def parse_shell_options(options, defaults):
+ """Filters unknown options and converts some values from string to their corresponding
+ python types (booleans and None).
+
+ Returns a dictionary with option names as keys and option values as values.
+ """
+ result = {}
+ for option, value in options:
+ if option not in defaults:
+ print >> sys.stderr, "WARNING: Unable to read configuration file correctly. " \
+ "Ignoring unrecognized config option: '%s'\n" % option
+ elif isinstance(defaults[option], bool):
+ result[option] = parse_bool_option(value)
+ elif value.lower() == "none":
+ result[option] = None
+ else:
+ result[option] = value
+ return result
+
def get_config_from_file(config_filename):
"""Reads contents of configuration file
- Validates some values (False, True, None)
- because ConfigParser reads values as strings
+ Two config sections are supported:
+ "[impala]":
+ Overrides the defaults of the shell arguments. Unknown options are filtered
+ and some values are converted from string to their corresponding python types
+ (booleans and None).
+ Setting 'config_filename' in the config file would have no effect,
+ so its original value is kept.
+
+ "[impala.query_options]"
+ Overrides the defaults of the query options. Not validated here,
+ because validation will take place after connecting to impalad.
+
+ Returns a pair of dictionaries (shell_options, query_options), with option names
+ as keys and option values as values.
"""
config = ConfigParser.ConfigParser()
- config.read(config_filename)
- section_title = "impala"
- if config.has_section(section_title):
- loaded_options = config.items(section_title);
-
- for i, (option, value) in enumerate(loaded_options):
- if option not in impala_shell_defaults:
- print >> sys.stderr, "WARNING: Unable to read configuration file correctly. " \
- "Check formatting: '%s'\n" % option;
- continue
-
- if impala_shell_defaults[option] in [True, False]:
- # validate the option if it can only be a boolean value
- # the only choice for these options is true or false
- if value.lower() == "true":
- loaded_options[i] = (option, True)
- elif value.lower() == 'false':
- loaded_options[i] = (option, False)
- else:
- # if the option is not set to either true or false, use the default
- loaded_options[i] = (option, impala_shell_defaults[option])
- elif value.lower() == "none":
- loaded_options[i] = (option, None)
- elif option.lower() == "config_file":
- loaded_options[i] = (option, config_filename)
- else:
- loaded_options[i] = (option, value)
- else:
- loaded_options = [];
- return loaded_options
+ try:
+ config.read(config_filename)
+ except Exception, e:
+ raise ConfigFileFormatError( \
+ "Unable to read configuration file correctly. Check formatting: %s" % e)
+
+ shell_options = {}
+ if config.has_section("impala"):
+ shell_options = parse_shell_options(config.items("impala"), impala_shell_defaults)
+ if "config_file" in shell_options:
+ print >> sys.stderr, "WARNING: Option 'config_file' can be only set from shell."
+ shell_options["config_file"] = config_filename
+
+ query_options = {}
+ if config.has_section("impala.query_options"):
+ # Query option keys must be "normalized" to upper case before updating with
+ # options coming from command line.
+ query_options = dict( \
+ [ (k.upper(), v) for k, v in config.items("impala.query_options") ])
+
+ return shell_options, query_options
def get_option_parser(defaults):
"""Creates OptionParser and adds shell options (flags)
@@ -142,9 +187,10 @@ def get_option_parser(defaults):
"certificate"))
parser.add_option("--config_file", dest="config_file",
help=("Specify the configuration file to load options. "
- "File must have case-sensitive '[impala]' header. "
+ "The following sections are used: [impala], "
+ "[impala.query_options]. Section names are case sensitive. "
"Specifying this option within a config file will have "
- "no effect. Only specify this as a option in the commandline."
+ "no effect. Only specify this as an option in the commandline."
))
parser.add_option("--live_summary", dest="print_summary", action="store_true",
help="Print a query summary every 1s while the query is running.")
@@ -158,10 +204,17 @@ def get_option_parser(defaults):
parser.add_option("--ldap_password_cmd",
help="Shell command to run to retrieve the LDAP password")
parser.add_option("--var", dest="keyval", action="append",
- help="Define variable(s) to be used within the Impala session."
+ help="Defines a variable to be used within the Impala session."
+ " Can be used multiple times to set different variables."
" It must follow the pattern \"KEY=VALUE\","
" KEY starts with an alphabetic character and"
" contains alphanumeric characters or underscores.")
+ parser.add_option("-Q", "--query_option", dest="query_options", action="append",
+ help="Sets the default for a query option."
+ " Can be used multiple times to set different query options."
+ " It must follow the pattern \"KEY=VALUE\","
+ " KEY must be a valid query option. Valid query options "
+ " can be listed by command 'set'.")
# add default values to the help text
for option in parser.option_list:
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/tests/shell/impalarc_with_error
----------------------------------------------------------------------
diff --git a/tests/shell/impalarc_with_error b/tests/shell/impalarc_with_error
new file mode 100644
index 0000000..569827c
--- /dev/null
+++ b/tests/shell/impalarc_with_error
@@ -0,0 +1,2 @@
+[impala]
+ignore_query_failure=maybe
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/tests/shell/impalarc_with_query_options
----------------------------------------------------------------------
diff --git a/tests/shell/impalarc_with_query_options b/tests/shell/impalarc_with_query_options
new file mode 100644
index 0000000..2ae2129
--- /dev/null
+++ b/tests/shell/impalarc_with_query_options
@@ -0,0 +1,6 @@
+[impala.query_options]
+EXPLAIN_LEVEL=1
+explain_LEVEL=2
+MT_DOP=2
+invalid_query_option=1
+
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/tests/shell/impalarc_with_warnings
----------------------------------------------------------------------
diff --git a/tests/shell/impalarc_with_warnings b/tests/shell/impalarc_with_warnings
new file mode 100644
index 0000000..0c2f695
--- /dev/null
+++ b/tests/shell/impalarc_with_warnings
@@ -0,0 +1,3 @@
+[impala]
+config_file=a
+invalid_option=a
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/tests/shell/test_shell_commandline.py
----------------------------------------------------------------------
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 0602e77..218224b 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -376,6 +376,20 @@ class TestImpalaShell(ImpalaTestSuite):
args = '--config_file=%s/bad_impalarc' % QUERY_FILE_PATH
run_impala_shell_cmd(args, expect_success=False)
+ # Testing config file related warning and error messages
+ args = '--config_file=%s/impalarc_with_warnings' % QUERY_FILE_PATH
+ result = run_impala_shell_cmd(args, expect_success=True)
+ assert "WARNING: Option 'config_file' can be only set from shell." in result.stderr
+ err_msg = ("WARNING: Unable to read configuration file correctly. "
+ "Ignoring unrecognized config option: 'invalid_option'\n")
+ assert err_msg in result.stderr
+
+ args = '--config_file=%s/impalarc_with_error' % QUERY_FILE_PATH
+ result = run_impala_shell_cmd(args, expect_success=False)
+ err_msg = ("Unexpected value in configuration file. "
+ "'maybe' is not a valid value for a boolean option.")
+ assert err_msg in result.stderr
+
def test_execute_queries_from_stdin(self):
"""Test that queries get executed correctly when STDIN is given as the sql file."""
args = '-f - --quiet -B'
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0a0affb6/tests/shell/test_shell_interactive.py
----------------------------------------------------------------------
diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py
index c37c2be..316e8c0 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -289,6 +289,18 @@ class TestImpalaShellInteractive(object):
assert_var_substitution(result)
@pytest.mark.execute_serially
+ def test_query_option_configuration(self):
+ rcfile_path = os.path.join(QUERY_FILE_PATH, 'impalarc_with_query_options')
+ args = '-Q MT_dop=1 --query_option=MAX_ERRORS=200 --config_file="%s"' % rcfile_path
+ cmds = "set;"
+ result = run_impala_shell_interactive(cmds, shell_args=args)
+ assert "\tMT_DOP: 1" in result.stdout
+ assert "\tMAX_ERRORS: 200" in result.stdout
+ assert "\tEXPLAIN_LEVEL: 2" in result.stdout
+ assert "INVALID_QUERY_OPTION is not supported for the impalad being "
+ "connected to, ignoring." in result.stdout
+
+ @pytest.mark.execute_serially
def test_source_file(self):
cwd = os.getcwd()
try: