You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jr...@apache.org on 2018/01/19 21:41:32 UTC

[3/8] impala git commit: IMPALA-6382: Cap spillable buffer size and max row size query options

IMPALA-6382: Cap spillable buffer size and max row size query options

Currently the default and min spillable buffer size and max row size
query options accept any valid int64 value. Since the planner depends
on these values for memory estimations, if a very large value close to
the limits of int64 is set, the variables representing or relying on
these estimates can overflow during different phases of query execution.

This patch puts a reasonable upper limit of 1TB to these query options
to prevent such a situation.

Testing:
Added backend query option tests.

Change-Id: I36d3915f7019b13c3eb06f08bfdb38c71ec864f1
Reviewed-on: http://gerrit.cloudera.org:8080/9023
Reviewed-by: Bikramjeet Vig <bi...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/028a83e6
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/028a83e6
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/028a83e6

Branch: refs/heads/2.x
Commit: 028a83e6543a18dd3b9161226355f1e8d36c4ed7
Parents: ca7d03c
Author: Bikramjeet Vig <bi...@cloudera.com>
Authored: Fri Jan 12 17:23:15 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Jan 18 23:08:26 2018 +0000

----------------------------------------------------------------------
 be/src/service/query-options-test.cc            | 12 +++++++++-
 be/src/service/query-options.cc                 | 23 +++++++++++++++-----
 be/src/service/query-options.h                  |  3 +++
 .../functional-query/queries/QueryTest/set.test |  4 ++--
 4 files changed, 34 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/028a83e6/be/src/service/query-options-test.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-options-test.cc b/be/src/service/query-options-test.cc
index efc5307..80c9866 100644
--- a/be/src/service/query-options-test.cc
+++ b/be/src/service/query-options-test.cc
@@ -140,7 +140,7 @@ TEST(QueryOptions, SetByteOptions) {
       {MAKE_OPTIONDEF(max_scan_range_length), {-1, I64_MAX}},
       {MAKE_OPTIONDEF(rm_initial_mem),        {-1, I64_MAX}},
       {MAKE_OPTIONDEF(buffer_pool_limit),     {-1, I64_MAX}},
-      {MAKE_OPTIONDEF(max_row_size),          {1, I64_MAX}},
+      {MAKE_OPTIONDEF(max_row_size),          {1, ROW_SIZE_LIMIT}},
       {MAKE_OPTIONDEF(parquet_file_size),     {-1, I32_MAX}}
   };
   vector<pair<OptionDef<int32_t>, Range<int32_t>>> case_set_i32 {
@@ -302,8 +302,18 @@ TEST(QueryOptions, SetSpecialOptions) {
       TestOk("2MB", 2 * 1024 * 1024);
       TestOk("32G", 32ll * 1024 * 1024 * 1024);
       TestError("10MB");
+      TestOk(to_string(SPILLABLE_BUFFER_LIMIT).c_str(), SPILLABLE_BUFFER_LIMIT);
+      TestError(to_string(2 * SPILLABLE_BUFFER_LIMIT).c_str());
     }
   }
+  // MAX_ROW_SIZE should be between 1 and ROW_SIZE_LIMIT
+  {
+    OptionDef<int64_t> key_def = MAKE_OPTIONDEF(max_row_size);
+    auto TestError = MakeTestErrFn(options, key_def);
+    TestError("-1");
+    TestError("0");
+    TestError(to_string(ROW_SIZE_LIMIT + 1).c_str());
+  }
 }
 
 TEST(QueryOptions, ParseQueryOptions) {

http://git-wip-us.apache.org/repos/asf/impala/blob/028a83e6/be/src/service/query-options.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index e8e8c7b..e3b5a1f 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -552,7 +552,13 @@ Status impala::SetQueryOption(const string& key, const string& value,
             ParseMemValue(value, "Default spillable buffer size", &buffer_size_bytes));
         if (!BitUtil::IsPowerOf2(buffer_size_bytes)) {
           return Status(
-              Substitute("Buffer size must be a power of two: $0", buffer_size_bytes));
+              Substitute("Default spillable buffer size must be a power of two: $0",
+                  buffer_size_bytes));
+        }
+        if (buffer_size_bytes > SPILLABLE_BUFFER_LIMIT) {
+          return Status(Substitute(
+              "Default spillable buffer size must be less than or equal to: $0",
+              SPILLABLE_BUFFER_LIMIT));
         }
         query_options->__set_default_spillable_buffer_size(buffer_size_bytes);
         break;
@@ -563,7 +569,13 @@ Status impala::SetQueryOption(const string& key, const string& value,
             ParseMemValue(value, "Minimum spillable buffer size", &buffer_size_bytes));
         if (!BitUtil::IsPowerOf2(buffer_size_bytes)) {
           return Status(
-              Substitute("Buffer size must be a power of two: $0", buffer_size_bytes));
+              Substitute("Minimum spillable buffer size must be a power of two: $0",
+                  buffer_size_bytes));
+        }
+        if (buffer_size_bytes > SPILLABLE_BUFFER_LIMIT) {
+          return Status(Substitute(
+              "Minimum spillable buffer size must be less than or equal to: $0",
+              SPILLABLE_BUFFER_LIMIT));
         }
         query_options->__set_min_spillable_buffer_size(buffer_size_bytes);
         break;
@@ -571,9 +583,10 @@ Status impala::SetQueryOption(const string& key, const string& value,
       case TImpalaQueryOptions::MAX_ROW_SIZE: {
         int64_t max_row_size_bytes;
         RETURN_IF_ERROR(ParseMemValue(value, "Max row size", &max_row_size_bytes));
-        if (max_row_size_bytes <= 0) {
-          return Status(Substitute(
-              "Max row size must be a positive number of bytes: $0", value));
+        if (max_row_size_bytes <= 0 || max_row_size_bytes > ROW_SIZE_LIMIT) {
+          return Status(
+              Substitute("Invalid max row size of $0. Valid sizes are in [$1, $2]", value,
+                  1, ROW_SIZE_LIMIT));
         }
         query_options->__set_max_row_size(max_row_size_bytes);
         break;

http://git-wip-us.apache.org/repos/asf/impala/blob/028a83e6/be/src/service/query-options.h
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index b7065f4..be3607f 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -132,6 +132,9 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
   QUERY_OPT_FN(idle_session_timeout, IDLE_SESSION_TIMEOUT, TQueryOptionLevel::REGULAR)\
   ;
 
+/// Enforce practical limits on some query options to avoid undesired query state.
+  static const int64_t SPILLABLE_BUFFER_LIMIT = 1LL << 40; // 1 TB
+  static const int64_t ROW_SIZE_LIMIT = 1LL << 40; // 1 TB
 
 /// Converts a TQueryOptions struct into a map of key, value pairs.  Options that
 /// aren't set and lack defaults in common/thrift/ImpalaInternalService.thrift are

http://git-wip-us.apache.org/repos/asf/impala/blob/028a83e6/testdata/workloads/functional-query/queries/QueryTest/set.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/set.test b/testdata/workloads/functional-query/queries/QueryTest/set.test
index 14ac629..32ad938 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/set.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/set.test
@@ -234,10 +234,10 @@ explain select count(distinct double_col) from functional.alltypesagg;
 ---- QUERY
 set max_row_size=-1;
 ---- CATCH
-Max row size must be a positive number of bytes: -1
+Invalid max row size of -1. Valid sizes are in [1, 1099511627776]
 ====
 ---- QUERY
 set max_row_size=0;
 ---- CATCH
-Max row size must be a positive number of bytes: 0
+Invalid max row size of 0. Valid sizes are in [1, 1099511627776]
 ====