You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/09/06 21:08:46 UTC

[2/3] incubator-impala git commit: IMPALA-5589: change "set" in impala-shell to show empty string for unset query options

IMPALA-5589: change "set" in impala-shell to show empty string for unset query options

When converting TQueryOptions to a map<string,string>, we now convert
unset options to the empty string. Within TQueryOptions we have optional
options (like mt_dop or compression_codec) with no default specified. In
this case, the user was seeing 0 for numeric types and the first enum
option for enumeration types (e.g., "NONE" in the compression case).
This was confusing as the implementation handles this "null" case
differently (e.g., using SNAPPY as the default codec in the case
reported in the JIRA).

When running "set" in impala-shell, the difference is as
follows:

    -        BUFFER_POOL_LIMIT: [0]
    +        BUFFER_POOL_LIMIT: []
    -        COMPRESSION_CODEC: [NONE]
    +        COMPRESSION_CODEC: []
    -        MT_DOP: [0]
    +        MT_DOP: []
    -        RESERVATION_REQUEST_TIMEOUT: [0]
    +        RESERVATION_REQUEST_TIMEOUT: []
    -        SEQ_COMPRESSION_MODE: [0]
    +        SEQ_COMPRESSION_MODE: []
    -        V_CPU_CORES: [0]
    +        V_CPU_CORES: []

Obviously, the empty string is a valid value for a string-typed option, where
it will be impossible to tell the difference between "unset" and "set to empty
string." Today, there are two string-typed options: debug_string defaults to ""
and request_pool has no default. An alternative would have been to use
a special token like "_unset" or to introduce a new field in the beeswax
Thrift ConfigVariable struct. I think the empty string approach
is clearest.

The other users of this state, which I believe are HiveServer2's OpenSession()
call and HiveServer2's response to a "SET" query are affected. They
benefit from the same fix, and a new test has been added to test_hs2.py.

I did a mild refactoring in the HS2 tests to write a helper method
for the very common pattern of excecuting a query.

Testing:
* Manual testing with impala-shell
* Modified impala-shell tests to check this explicitly for one case.
* Modified HS2 test to check this as well as the SET k=v statement,
  which looked otherwise untested.

Change-Id: I86bc06a58d67b099da911293202dae9e844c439b
Reviewed-on: http://gerrit.cloudera.org:8080/7886
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/387bde06
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/387bde06
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/387bde06

Branch: refs/heads/master
Commit: 387bde0639ffd8ef580ccbf727152954e62bacbe
Parents: f8e7c31
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Tue Aug 29 15:39:24 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Sep 6 19:43:57 2017 +0000

----------------------------------------------------------------------
 be/src/service/query-options-test.cc            |  12 +++
 be/src/service/query-options.cc                 |  10 +-
 be/src/service/query-options.h                  |   4 +-
 .../functional-query/queries/QueryTest/set.test |  24 ++---
 tests/hs2/hs2_test_suite.py                     |  10 +-
 tests/hs2/test_hs2.py                           | 104 +++++++++++--------
 tests/shell/test_shell_commandline.py           |   2 +
 7 files changed, 106 insertions(+), 60 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/387bde06/be/src/service/query-options-test.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-options-test.cc b/be/src/service/query-options-test.cc
index 397df5a..1bec770 100644
--- a/be/src/service/query-options-test.cc
+++ b/be/src/service/query-options-test.cc
@@ -135,4 +135,16 @@ TEST(QueryOptions, ParseQueryOptions) {
   }
 }
 
+TEST(QueryOptions, MapOptionalDefaultlessToEmptyString) {
+  TQueryOptions options;
+  map<string, string> map;
+
+  TQueryOptionsToMap(options, &map);
+  // No default set
+  EXPECT_EQ(map["COMPRESSION_CODEC"], "");
+  EXPECT_EQ(map["MT_DOP"], "");
+  // Has defaults
+  EXPECT_EQ(map["EXPLAIN_LEVEL"], "1");
+}
+
 IMPALA_TEST_MAIN();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/387bde06/be/src/service/query-options.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index 21327f3..e950d31 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -67,9 +67,13 @@ void impala::TQueryOptionsToMap(const TQueryOptions& query_options,
     map<string, string>* configuration) {
 #define QUERY_OPT_FN(NAME, ENUM)\
   {\
-    stringstream val;\
-    val << query_options.NAME;\
-    (*configuration)[#ENUM] = val.str();\
+    if (query_options.__isset.NAME) { \
+      stringstream val;\
+      val << query_options.NAME;\
+      (*configuration)[#ENUM] = val.str();\
+    } else { \
+      (*configuration)[#ENUM] = ""; \
+    }\
   }
   QUERY_OPTS_TABLE
 #undef QUERY_OPT_FN

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/387bde06/be/src/service/query-options.h
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index bb8c301..3dada7d 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -99,7 +99,9 @@ class TQueryOptions;
   ;
 
 
-/// Converts a TQueryOptions struct into a map of key, value pairs.
+/// Converts a TQueryOptions struct into a map of key, value pairs.  Options that
+/// aren't set and lack defaults in common/thrift/ImpalaInternalService.thrift are
+/// mapped to the empty string.
 void TQueryOptionsToMap(const TQueryOptions& query_options,
     std::map<std::string, std::string>* configuration);
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/387bde06/testdata/workloads/functional-query/queries/QueryTest/set.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/set.test b/testdata/workloads/functional-query/queries/QueryTest/set.test
index a7004e0..45c8343 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/set.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/set.test
@@ -20,13 +20,13 @@ set
 'MEM_LIMIT','0'
 'NUM_NODES','0'
 'NUM_SCANNER_THREADS','0'
-'COMPRESSION_CODEC','NONE'
+'COMPRESSION_CODEC',''
 'PARQUET_FILE_SIZE','0'
 'REQUEST_POOL',''
-'RESERVATION_REQUEST_TIMEOUT','0'
+'RESERVATION_REQUEST_TIMEOUT',''
 'RM_INITIAL_MEM','0'
 'SYNC_DDL','0'
-'V_CPU_CORES','0'
+'V_CPU_CORES',''
 ---- TYPES
 STRING, STRING
 ====
@@ -52,13 +52,13 @@ set;
 'MEM_LIMIT','0'
 'NUM_NODES','0'
 'NUM_SCANNER_THREADS','0'
-'COMPRESSION_CODEC','NONE'
+'COMPRESSION_CODEC',''
 'PARQUET_FILE_SIZE','0'
 'REQUEST_POOL',''
-'RESERVATION_REQUEST_TIMEOUT','0'
+'RESERVATION_REQUEST_TIMEOUT',''
 'RM_INITIAL_MEM','0'
 'SYNC_DDL','0'
-'V_CPU_CORES','0'
+'V_CPU_CORES',''
 ---- TYPES
 STRING, STRING
 ====
@@ -84,13 +84,13 @@ set;
 'MEM_LIMIT','0'
 'NUM_NODES','0'
 'NUM_SCANNER_THREADS','0'
-'COMPRESSION_CODEC','NONE'
+'COMPRESSION_CODEC',''
 'PARQUET_FILE_SIZE','0'
 'REQUEST_POOL',''
-'RESERVATION_REQUEST_TIMEOUT','0'
+'RESERVATION_REQUEST_TIMEOUT',''
 'RM_INITIAL_MEM','0'
 'SYNC_DDL','0'
-'V_CPU_CORES','0'
+'V_CPU_CORES',''
 ---- TYPES
 STRING, STRING
 ====
@@ -117,13 +117,13 @@ set;
 'MEM_LIMIT','0'
 'NUM_NODES','0'
 'NUM_SCANNER_THREADS','0'
-'COMPRESSION_CODEC','NONE'
+'COMPRESSION_CODEC',''
 'PARQUET_FILE_SIZE','1610612736'
 'REQUEST_POOL',''
-'RESERVATION_REQUEST_TIMEOUT','0'
+'RESERVATION_REQUEST_TIMEOUT',''
 'RM_INITIAL_MEM','0'
 'SYNC_DDL','0'
-'V_CPU_CORES','0'
+'V_CPU_CORES',''
 ---- TYPES
 STRING, STRING
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/387bde06/tests/hs2/hs2_test_suite.py
----------------------------------------------------------------------
diff --git a/tests/hs2/hs2_test_suite.py b/tests/hs2/hs2_test_suite.py
index 2c2cd51..1b2f89f 100644
--- a/tests/hs2/hs2_test_suite.py
+++ b/tests/hs2/hs2_test_suite.py
@@ -124,7 +124,6 @@ class HS2TestSuite(ImpalaTestSuite):
     fetch_results_req.maxRows = size
     fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
     HS2TestSuite.check_response(fetch_results_resp)
-    num_rows = size
     if expected_num_rows is not None:
       assert self.get_num_rows(fetch_results_resp.results) == expected_num_rows
     return fetch_results_resp
@@ -228,3 +227,12 @@ class HS2TestSuite(ImpalaTestSuite):
       sleep(interval)
     assert False, 'Did not reach expected operation state %s in time, actual state was ' \
         '%s' % (expected_state, get_operation_status_resp.operationState)
+
+  def execute_statement(self, statement):
+    """Executes statement and returns response, which is checked."""
+    execute_statement_req = TCLIService.TExecuteStatementReq()
+    execute_statement_req.sessionHandle = self.session_handle
+    execute_statement_req.statement = statement
+    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
+    HS2TestSuite.check_response(execute_statement_resp)
+    return execute_statement_resp

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/387bde06/tests/hs2/test_hs2.py
----------------------------------------------------------------------
diff --git a/tests/hs2/test_hs2.py b/tests/hs2/test_hs2.py
index f42b29a..2f984b1 100644
--- a/tests/hs2/test_hs2.py
+++ b/tests/hs2/test_hs2.py
@@ -45,6 +45,48 @@ class TestHS2(HS2TestSuite):
     for k, v in open_session_req.configuration.items():
       assert open_session_resp.configuration[k] == v
 
+  @needs_session()
+  def test_session_options_via_set(self):
+    """
+    Tests that session options are returned by a SET
+    query and can be updated by a "SET k=v" query.
+    """
+    def get_session_options():
+      """Returns dictionary of query options."""
+      execute_statement_resp = self.execute_statement("SET")
+
+      fetch_results_req = TCLIService.TFetchResultsReq()
+      fetch_results_req.operationHandle = execute_statement_resp.operationHandle
+      fetch_results_req.maxRows = 1000
+      fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
+      TestHS2.check_response(fetch_results_resp)
+
+      # Close the query
+      close_operation_req = TCLIService.TCloseOperationReq()
+      close_operation_req.operationHandle = execute_statement_resp.operationHandle
+      TestHS2.check_response(self.hs2_client.CloseOperation(close_operation_req))
+
+      # Results are returned in a columnar way:
+      cols = fetch_results_resp.results.columns
+      assert len(cols) == 2
+      vals = dict(zip(cols[0].stringVal.values, cols[1].stringVal.values))
+      return vals
+
+    vals = get_session_options()
+
+    # No default; should be empty string.
+    assert vals["COMPRESSION_CODEC"] == ""
+    # Has default of 0
+    assert vals["SYNC_DDL"] == "0"
+
+    # Set an option using SET
+    self.execute_statement("SET COMPRESSION_CODEC=gzip")
+
+    vals2 = get_session_options()
+    assert vals2["COMPRESSION_CODEC"] == "GZIP"
+    # Should be unchanged
+    assert vals2["SYNC_DDL"] == "0"
+
   def test_open_session_http_addr(self):
     """Check that OpenSession returns the coordinator's http address."""
     open_session_req = TCLIService.TOpenSessionReq()
@@ -172,11 +214,8 @@ class TestHS2(HS2TestSuite):
   @needs_session()
   def test_get_operation_status(self):
     """Tests that GetOperationStatus returns a valid result for a running query"""
-    execute_statement_req = TCLIService.TExecuteStatementReq()
-    execute_statement_req.sessionHandle = self.session_handle
-    execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes"
-    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
-    TestHS2.check_response(execute_statement_resp)
+    statement = "SELECT COUNT(*) FROM functional.alltypes"
+    execute_statement_resp = self.execute_statement(statement)
 
     get_operation_status_resp = \
         self.get_operation_status(execute_statement_resp.operationHandle)
@@ -213,11 +252,8 @@ class TestHS2(HS2TestSuite):
   def test_get_operation_status_error(self):
     """Tests that GetOperationStatus returns a valid result for a query that encountered
     an error"""
-    execute_statement_req = TCLIService.TExecuteStatementReq()
-    execute_statement_req.sessionHandle = self.session_handle
-    execute_statement_req.statement = "SELECT * FROM functional.alltypeserror"
-    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
-    TestHS2.check_response(execute_statement_resp)
+    statement = "SELECT * FROM functional.alltypeserror"
+    execute_statement_resp = self.execute_statement(statement)
 
     get_operation_status_resp = self.wait_for_operation_state( \
         execute_statement_resp.operationHandle, TCLIService.TOperationState.ERROR_STATE)
@@ -332,17 +368,15 @@ class TestHS2(HS2TestSuite):
     assert "Sql Statement: GET_SCHEMAS" in profile_page
     assert "Query Type: DDL" in profile_page
 
+
   @needs_session(conf_overlay={"idle_session_timeout": "5"})
   def test_get_operation_status_session_timeout(self):
     """Regression test for IMPALA-4488: GetOperationStatus() would not keep a session
     alive"""
-    execute_statement_req = TCLIService.TExecuteStatementReq()
-    execute_statement_req.sessionHandle = self.session_handle
     # Choose a long-running query so that it can't finish before the session timeout.
-    execute_statement_req.statement = """select * from functional.alltypes a
+    statement = """select * from functional.alltypes a
     join functional.alltypes b join functional.alltypes c"""
-    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
-    TestHS2.check_response(execute_statement_resp)
+    execute_statement_resp = self.execute_statement(statement)
 
     now = time.time()
     # Loop until the session would be timed-out if IMPALA-4488 had not been fixed.
@@ -354,11 +388,7 @@ class TestHS2(HS2TestSuite):
       time.sleep(0.1)
 
   def get_log(self, query_stmt):
-    execute_statement_req = TCLIService.TExecuteStatementReq()
-    execute_statement_req.sessionHandle = self.session_handle
-    execute_statement_req.statement = query_stmt
-    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
-    TestHS2.check_response(execute_statement_resp)
+    execute_statement_resp = self.execute_statement(query_stmt)
 
     # Fetch results to make sure errors are generated. Errors are only guaranteed to be
     # seen by the coordinator after FetchResults() returns eos.
@@ -389,11 +419,8 @@ class TestHS2(HS2TestSuite):
 
   @needs_session()
   def test_get_exec_summary(self):
-    execute_statement_req = TCLIService.TExecuteStatementReq()
-    execute_statement_req.sessionHandle = self.session_handle
-    execute_statement_req.statement = "SELECT COUNT(1) FROM functional.alltypes"
-    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
-    TestHS2.check_response(execute_statement_resp)
+    statement = "SELECT COUNT(1) FROM functional.alltypes"
+    execute_statement_resp = self.execute_statement(statement)
 
     exec_summary_req = ImpalaHiveServer2Service.TGetExecSummaryReq()
     exec_summary_req.operationHandle = execute_statement_resp.operationHandle
@@ -415,18 +442,15 @@ class TestHS2(HS2TestSuite):
 
   @needs_session()
   def test_get_profile(self):
-    execute_statement_req = TCLIService.TExecuteStatementReq()
-    execute_statement_req.sessionHandle = self.session_handle
-    execute_statement_req.statement = "SELECT COUNT(2) FROM functional.alltypes"
-    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
-    TestHS2.check_response(execute_statement_resp)
+    statement = "SELECT COUNT(2) FROM functional.alltypes"
+    execute_statement_resp = self.execute_statement(statement)
 
     get_profile_req = ImpalaHiveServer2Service.TGetRuntimeProfileReq()
     get_profile_req.operationHandle = execute_statement_resp.operationHandle
     get_profile_req.sessionHandle = self.session_handle
     get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req)
     TestHS2.check_response(get_profile_resp)
-    assert execute_statement_req.statement in get_profile_resp.profile
+    assert statement in get_profile_resp.profile
     # If ExecuteStatement() has completed but the results haven't been fetched yet, the
     # query must have at least reached RUNNING.
     assert "Query State: RUNNING" in get_profile_resp.profile or \
@@ -439,7 +463,7 @@ class TestHS2(HS2TestSuite):
 
     get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req)
     TestHS2.check_response(get_profile_resp)
-    assert execute_statement_req.statement in get_profile_resp.profile
+    assert statement in get_profile_resp.profile
     # After fetching the results, we must be in state FINISHED.
     assert "Query State: FINISHED" in get_profile_resp.profile, get_profile_resp.profile
 
@@ -449,26 +473,20 @@ class TestHS2(HS2TestSuite):
 
     get_profile_resp = self.hs2_client.GetRuntimeProfile(get_profile_req)
     TestHS2.check_response(get_profile_resp)
-    assert execute_statement_req.statement in get_profile_resp.profile
+    assert statement in get_profile_resp.profile
     assert "Query State: FINISHED" in get_profile_resp.profile, get_profile_resp.profile
 
   @needs_session(conf_overlay={"use:database": "functional"})
   def test_change_default_database(self):
-    execute_statement_req = TCLIService.TExecuteStatementReq()
-    execute_statement_req.sessionHandle = self.session_handle
-    execute_statement_req.statement = "SELECT 1 FROM alltypes LIMIT 1"
-    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
+    statement = "SELECT 1 FROM alltypes LIMIT 1"
     # Will fail if there's no table called 'alltypes' in the database
-    TestHS2.check_response(execute_statement_resp)
+    self.execute_statement(statement)
 
   @needs_session(conf_overlay={"use:database": "FUNCTIONAL"})
   def test_change_default_database_case_insensitive(self):
-    execute_statement_req = TCLIService.TExecuteStatementReq()
-    execute_statement_req.sessionHandle = self.session_handle
-    execute_statement_req.statement = "SELECT 1 FROM alltypes LIMIT 1"
-    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
+    statement = "SELECT 1 FROM alltypes LIMIT 1"
     # Will fail if there's no table called 'alltypes' in the database
-    TestHS2.check_response(execute_statement_resp)
+    self.execute_statement(statement)
 
   @needs_session(conf_overlay={"use:database": "doesnt-exist"})
   def test_bad_default_database(self):

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/387bde06/tests/shell/test_shell_commandline.py
----------------------------------------------------------------------
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index 488de49..0602e77 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -248,6 +248,8 @@ class TestImpalaShell(ImpalaTestSuite):
     assert 'MEM_LIMIT: [0]' in result_set.stdout
     # test to check that explain_level is 1
     assert 'EXPLAIN_LEVEL: [1]' in result_set.stdout
+    # test to check that configs without defaults show up as []
+    assert 'COMPRESSION_CODEC: []' in result_set.stdout
     # test values displayed after setting value
     args = '-q "set mem_limit=1g;set"'
     result_set = run_impala_shell_cmd(args)