You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/08/08 19:02:25 UTC

[11/11] incubator-impala git commit: IMPALA-5661: buffer pool limit

IMPALA-5661: buffer pool limit

Adds the --buffer_pool_limit flag to control the buffer pool size.
It can be specified as either an absolute memory value or a percentage
of the process memory limit

Testing:
Started up a cluster with --buffer_pool_limit=10%, confirmed via
/metrics page that the buffer pool limit was reduced to ~800MB on
my system.

Change-Id: Ia64e21e0d5a7cf35a9064f365c6c86db13fbd73d
Reviewed-on: http://gerrit.cloudera.org:8080/7462
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/f14e68c7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/f14e68c7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/f14e68c7

Branch: refs/heads/master
Commit: f14e68c7255927a595b5fc017cc86960f59bc8df
Parents: 68df21b
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Tue Jul 18 16:57:30 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Aug 8 10:45:33 2017 +0000

----------------------------------------------------------------------
 be/src/common/constant-strings.h          | 31 ++++++++++++++++++++++++++
 be/src/common/global-flags.cc             | 22 ++++++++++++++----
 be/src/runtime/exec-env.cc                | 19 ++++++++++------
 be/src/scheduling/request-pool-service.cc | 13 ++++++-----
 4 files changed, 68 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f14e68c7/be/src/common/constant-strings.h
----------------------------------------------------------------------
diff --git a/be/src/common/constant-strings.h b/be/src/common/constant-strings.h
new file mode 100644
index 0000000..66f0b0b
--- /dev/null
+++ b/be/src/common/constant-strings.h
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This file includes constant strings that are used in multiple places in the codebase.
+
+#ifndef IMPALA_COMMON_CONSTANT_STRINGS_H_
+#define IMPALA_COMMON_CONSTANT_STRINGS_H_
+
+// Template for a description of the ways to specify bytes. strings::Substitute() must
+// used to fill in the blanks.
+#define MEM_UNITS_HELP_MSG "Specified as number of bytes ('<int>[bB]?'), " \
+                          "megabytes ('<float>[mM]'), " \
+                          "gigabytes ('<float>[gG]'), " \
+                          "or percentage of $0 ('<int>%'). " \
+                          "Defaults to bytes if no unit is given."
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f14e68c7/be/src/common/global-flags.cc
----------------------------------------------------------------------
diff --git a/be/src/common/global-flags.cc b/be/src/common/global-flags.cc
index 0a6ca28..98417f2 100644
--- a/be/src/common/global-flags.cc
+++ b/be/src/common/global-flags.cc
@@ -21,7 +21,13 @@
 // a main()), or flags that are referenced from multiple places and having them here
 // calms the linker errors that would otherwise ensue.
 
+#include <string>
+
+#include "common/constant-strings.h"
 #include "common/logging.h"
+#include "gutil/strings/substitute.h"
+
+#include "common/names.h"
 
 // This will be defaulted to the host name returned by the OS.
 // This name is used in the principal generated for Kerberos authorization.
@@ -41,10 +47,18 @@ DEFINE_string(krb5_conf, "", "Absolute path to Kerberos krb5.conf if in a non-st
     "location. Does not normally need to be set.");
 DEFINE_string(krb5_debug_file, "", "Turn on Kerberos debugging and output to this file");
 
-DEFINE_string(mem_limit, "80%", "Process memory limit specified as number of bytes "
-              "('<int>[bB]?'), megabytes ('<float>[mM]'), gigabytes ('<float>[gG]'), "
-              "or percentage of the physical memory ('<int>%'). "
-              "Defaults to bytes if no unit is given");
+static const string mem_limit_help_msg = "Limit on process memory consumption, "
+    "excluding the JVM's memory consumption. "
+    + Substitute(MEM_UNITS_HELP_MSG, "the physical memory");
+DEFINE_string(mem_limit, "80%",  mem_limit_help_msg.c_str());
+
+static const string buffer_pool_limit_help_msg = "(Advanced) Limit on buffer pool size. "
+     + Substitute(MEM_UNITS_HELP_MSG, "the process memory limit") + " "
+    "The default value and behaviour of this flag may change between releases.";
+DEFINE_string(buffer_pool_limit, "80%", buffer_pool_limit_help_msg.c_str());
+
+DEFINE_int64(min_buffer_size, 64 * 1024,
+    "(Advanced) The minimum buffer size to use in the buffer pool");
 
 DEFINE_bool(enable_process_lifetime_heap_profiling, false, "(Advanced) Enables heap "
     "profiling for the lifetime of the process. Profile output will be stored in the "

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f14e68c7/be/src/runtime/exec-env.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/exec-env.cc b/be/src/runtime/exec-env.cc
index f2ee6f0..8239a68 100644
--- a/be/src/runtime/exec-env.cc
+++ b/be/src/runtime/exec-env.cc
@@ -75,14 +75,14 @@ DEFINE_int32(state_store_subscriber_port, 23000,
 DEFINE_int32(num_hdfs_worker_threads, 16,
     "(Advanced) The number of threads in the global HDFS operation pool");
 DEFINE_bool(disable_admission_control, false, "Disables admission control.");
-DEFINE_int64(min_buffer_size, 64 * 1024,
-    "(Advanced) The minimum buffer size to use in the buffer pool");
 
 DECLARE_int32(state_store_port);
 DECLARE_int32(num_threads_per_core);
 DECLARE_int32(num_cores);
 DECLARE_int32(be_port);
 DECLARE_string(mem_limit);
+DECLARE_string(buffer_pool_limit);
+DECLARE_int64(min_buffer_size);
 DECLARE_bool(is_coordinator);
 DECLARE_int32(webserver_port);
 
@@ -241,9 +241,14 @@ Status ExecEnv::StartServices() {
     return Status(Substitute(
         "--min_buffer_size must be a power-of-two: $0", FLAGS_min_buffer_size));
   }
-  int64_t buffer_pool_capacity = BitUtil::RoundDown(
-      no_process_mem_limit ? system_mem : bytes_limit * 4 / 5, FLAGS_min_buffer_size);
-  InitBufferPool(FLAGS_min_buffer_size, buffer_pool_capacity);
+  int64_t buffer_pool_limit = ParseUtil::ParseMemSpec(FLAGS_buffer_pool_limit,
+      &is_percent, no_process_mem_limit ? system_mem : bytes_limit);
+  if (buffer_pool_limit <= 0) {
+    return Status(Substitute("Invalid --buffer_pool_limit value, must be a percentage or "
+          "positive bytes value: $0", FLAGS_buffer_pool_limit));
+  }
+  buffer_pool_limit = BitUtil::RoundDown(buffer_pool_limit, FLAGS_min_buffer_size);
+  InitBufferPool(FLAGS_min_buffer_size, buffer_pool_limit);
 
   metrics_->Init(enable_webserver_ ? webserver_.get() : nullptr);
   impalad_client_cache_->InitMetrics(metrics_.get(), "impala-server.backends");
@@ -282,8 +287,8 @@ Status ExecEnv::StartServices() {
   }
   LOG(INFO) << "Using global memory limit: "
             << PrettyPrinter::Print(bytes_limit, TUnit::BYTES);
-  LOG(INFO) << "Buffer pool capacity: "
-            << PrettyPrinter::Print(buffer_pool_capacity, TUnit::BYTES);
+  LOG(INFO) << "Buffer pool limit: "
+            << PrettyPrinter::Print(buffer_pool_limit, TUnit::BYTES);
 
   RETURN_IF_ERROR(disk_io_mgr_->Init(mem_tracker_.get()));
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f14e68c7/be/src/scheduling/request-pool-service.cc
----------------------------------------------------------------------
diff --git a/be/src/scheduling/request-pool-service.cc b/be/src/scheduling/request-pool-service.cc
index b674bf0..bad5ac1 100644
--- a/be/src/scheduling/request-pool-service.cc
+++ b/be/src/scheduling/request-pool-service.cc
@@ -23,6 +23,7 @@
 #include <string>
 #include <gutil/strings/substitute.h>
 
+#include "common/constant-strings.h"
 #include "common/logging.h"
 #include "rpc/jni-thrift-util.h"
 #include "service/query-options.h"
@@ -56,12 +57,12 @@ DEFINE_int64(default_pool_max_requests, -1, "Maximum number of concurrent outsta
     "requests allowed to run before queueing incoming requests. A negative value "
     "indicates no limit. 0 indicates no requests will be admitted. Ignored if "
     "fair_scheduler_config_path and llama_site_path are set.");
-DEFINE_string(default_pool_mem_limit, "", "Maximum amount of memory that all "
-    "outstanding requests in this pool may use before new requests to this pool"
-    " are queued. Specified as a number of bytes ('<int>[bB]?'), megabytes "
-    "('<float>[mM]'), gigabytes ('<float>[gG]'), or percentage of the physical memory "
-    "('<int>%'). 0 or not setting indicates no limit. Defaults to bytes if no unit is "
-    "given. Ignored if fair_scheduler_config_path and llama_site_path are set.");
+
+static const string default_pool_mem_limit_help_msg = "Maximum amount of memory that all "
+    "outstanding requests in this pool may use before new requests to this pool "
+    "are queued. " + Substitute(MEM_UNITS_HELP_MSG, "the physical memory") + " "
+    "Ignored if fair_scheduler_config_path and llama_site_path are set.";
+DEFINE_string(default_pool_mem_limit, "", default_pool_mem_limit_help_msg.c_str());
 DEFINE_int64(default_pool_max_queued, 200, "Maximum number of requests allowed to be "
     "queued before rejecting requests. A negative value or 0 indicates requests "
     "will always be rejected once the maximum number of concurrent requests are "