You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/04/19 21:07:14 UTC
[impala] branch master updated: IMPALA-8270: fix MemTracker
teardown in FeSupport
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 8412c77 IMPALA-8270: fix MemTracker teardown in FeSupport
8412c77 is described below
commit 8412c772cb551c59fdb6e9dd59b49043f4aa9ef6
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Wed Apr 17 09:30:14 2019 -0700
IMPALA-8270: fix MemTracker teardown in FeSupport
This patch tries to simplify and standardise the order
in which control structures are torn down. As a
consequence the bug is fixed. I've described the bug
below.
The changes are:
* Make more control structures owned directly by
QueryState::obj_pool_, so that they are all
destroyed at the same time via ~QueryState.
* Tear down local_query_state_ explicitly before
other destructors run.
Either change is sufficient to fix the bug, but
I preferred to do both to reduce the chances
of similar bugs in future.
Description of bug:
===================
In the normal query execution flow:
- RuntimeState is in QueryState::obj_pool_
- RuntimeState owns RuntimeState::instance_mem_tracker_ via scoped_ptr
- QueryState::query_mem_tracker_ is in QueryState::obj_pool_
- QueryState::query_mem_tracker_ has a reference to
RuntimeState::instance_mem_tracker_
The tear-down works because ~QueryState unregisters query_mem_tracker_
from its parent, making the whole subtree unreachable before destroying
QueryState::obj_pool_. It is thus safe to destroy instance_mem_tracker_
along with the rest of obj_pool_.
FeSupport messes this up by having RuntimeState own the QueryState
RuntimeState::local_query_state_ via a scoped_ptr, and the implied
destructor order means that RuntimeState::instance_mem_tracker_ is
destroyed before RuntimeState::local_query_state_, which breaks the
above flow and the destroyed instance_mem_tracker_ is reachable from
the process MemTracker via QueryState::query_mem_tracker_ for a
small window until it is unregistered.
Testing:
Added a backend test that reproduced the ASAN use-after-free
failure when run against unmodified RuntimeState code. I did
not make it a unified backend test so that it would be easier
to backport this fix to older versions that don't have unified
tests.
Change-Id: If815130cd4db00917746f10b28514f779ee254f0
Reviewed-on: http://gerrit.cloudera.org:8080/13057
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Reviewed-by: Bikramjeet Vig <bi...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/runtime/CMakeLists.txt | 1 +
be/src/runtime/runtime-state-test.cc | 71 ++++++++++++++++++++++++++++++++++++
be/src/runtime/runtime-state.cc | 17 ++++++---
be/src/runtime/runtime-state.h | 15 +++++---
4 files changed, 92 insertions(+), 12 deletions(-)
diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt
index a84344d..a8c9b8b 100644
--- a/be/src/runtime/CMakeLists.txt
+++ b/be/src/runtime/CMakeLists.txt
@@ -101,3 +101,4 @@ ADD_BE_LSAN_TEST(tmp-file-mgr-test)
ADD_BE_LSAN_TEST(row-batch-serialize-test)
ADD_BE_LSAN_TEST(row-batch-test)
ADD_BE_LSAN_TEST(collection-value-builder-test)
+ADD_BE_LSAN_TEST(runtime-state-test)
diff --git a/be/src/runtime/runtime-state-test.cc b/be/src/runtime/runtime-state-test.cc
new file mode 100644
index 0000000..0e536f7
--- /dev/null
+++ b/be/src/runtime/runtime-state-test.cc
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+
+#include <boost/thread/thread.hpp>
+
+#include "codegen/llvm-codegen.h"
+#include "common/atomic.h"
+#include "runtime/mem-tracker.h"
+#include "runtime/runtime-state.h"
+#include "runtime/test-env.h"
+#include "service/fe-support.h"
+#include "testutil/gtest-util.h"
+
+#include "common/names.h"
+
+namespace impala {
+
+// Regression test for IMPALA-8270: MemTracker teardown order leading to
+// use-after-free from LogUsage.
+TEST(RuntimeStateTest, MemTrackerTeardown) {
+ const int LOG_USAGE_THREADS = 4;
+ const int CONSTRUCT_ITERS = 10;
+ TestEnv test_env;
+ ASSERT_OK(test_env.Init());
+ // Poll LogUsage() from other threads to reproduce race.
+ AtomicBool stop_threads(false);
+ thread_group log_usage_threads;
+ for (int i = 0; i < LOG_USAGE_THREADS; ++i) {
+ log_usage_threads.add_thread(new thread([&test_env, &stop_threads]() {
+ while (!stop_threads.Load()) {
+ string usage = test_env.exec_env()->process_mem_tracker()->LogUsage(100);
+ }
+ }));
+ }
+
+ // Construct and tear down RuntimeState repeatedly to try to reproduce race.
+ for (int i = 0; i < CONSTRUCT_ITERS; ++i) {
+ TQueryCtx query_ctx;
+ unique_ptr<RuntimeState> state(new RuntimeState(query_ctx, test_env.exec_env()));
+ ASSERT_TRUE(state->instance_mem_tracker() != nullptr);
+ state->ReleaseResources();
+ state.reset();
+ }
+ stop_threads.Store(true);
+ log_usage_threads.join_all();
+}
+}
+
+int main(int argc, char** argv) {
+ ::testing::InitGoogleTest(&argc, argv);
+ impala::InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
+ impala::InitFeSupport();
+ ABORT_IF_ERROR(impala::LlvmCodeGen::InitializeLlvm());
+ return RUN_ALL_TESTS();
+}
diff --git a/be/src/runtime/runtime-state.cc b/be/src/runtime/runtime-state.cc
index 8ab60b7..09dbd60 100644
--- a/be/src/runtime/runtime-state.cc
+++ b/be/src/runtime/runtime-state.cc
@@ -75,7 +75,7 @@ RuntimeState::RuntimeState(QueryState* query_state, const TPlanFragmentCtx& frag
local_time_zone_(&TimezoneDatabase::GetUtcTimezone()),
profile_(RuntimeProfile::Create(
obj_pool(), "Fragment " + PrintId(instance_ctx.fragment_instance_id))),
- instance_buffer_reservation_(new ReservationTracker) {
+ instance_buffer_reservation_(obj_pool()->Add(new ReservationTracker)) {
Init();
}
@@ -95,7 +95,8 @@ RuntimeState::RuntimeState(
now_(new TimestampValue(TimestampValue::Parse(qctx.now_string))),
utc_timestamp_(new TimestampValue(TimestampValue::Parse(qctx.utc_timestamp_string))),
local_time_zone_(&TimezoneDatabase::GetUtcTimezone()),
- profile_(RuntimeProfile::Create(obj_pool(), "<unnamed>")) {
+ profile_(RuntimeProfile::Create(obj_pool(), "<unnamed>")),
+ instance_buffer_reservation_(nullptr) {
// We may use execution resources while evaluating exprs, etc. Decremented in
// ReleaseResources() to release resources.
local_query_state_->AcquireBackendResourceRefcount();
@@ -108,6 +109,10 @@ RuntimeState::RuntimeState(
RuntimeState::~RuntimeState() {
DCHECK(released_resources_) << "Must call ReleaseResources()";
+ // IMPALA-8270: run local_query_state_ destructor *before* other destructors so that
+ // teardown order for the TestEnv/fe-support RuntimeState matches the teardown order
+ // for the "real" RuntimeState. The previous divergence lead to hard-to-find bugs.
+ if (local_query_state_ != nullptr) local_query_state_.reset();
}
void RuntimeState::Init() {
@@ -126,12 +131,12 @@ void RuntimeState::Init() {
total_network_send_timer_ = ADD_TIMER(runtime_profile(), "TotalNetworkSendTime");
total_network_receive_timer_ = ADD_TIMER(runtime_profile(), "TotalNetworkReceiveTime");
- instance_mem_tracker_.reset(new MemTracker(
+ instance_mem_tracker_ = obj_pool()->Add(new MemTracker(
runtime_profile(), -1, runtime_profile()->name(), query_mem_tracker()));
if (instance_buffer_reservation_ != nullptr) {
instance_buffer_reservation_->InitChildTracker(profile_,
- query_state_->buffer_reservation(), instance_mem_tracker_.get(),
+ query_state_->buffer_reservation(), instance_mem_tracker_,
numeric_limits<int64_t>::max());
}
@@ -163,7 +168,7 @@ Status RuntimeState::CreateCodegen() {
if (codegen_.get() != NULL) return Status::OK();
// TODO: add the fragment ID to the codegen ID as well
RETURN_IF_ERROR(LlvmCodeGen::CreateImpalaCodegen(this,
- instance_mem_tracker_.get(), PrintId(fragment_instance_id()), &codegen_));
+ instance_mem_tracker_, PrintId(fragment_instance_id()), &codegen_));
codegen_->EnableOptimizations(true);
profile_->AddChild(codegen_->runtime_profile());
return Status::OK();
@@ -269,7 +274,7 @@ void RuntimeState::SetMemLimitExceeded(MemTracker* tracker,
Status RuntimeState::CheckQueryState() {
DCHECK(instance_mem_tracker_ != nullptr);
if (UNLIKELY(instance_mem_tracker_->AnyLimitExceeded(MemLimit::HARD))) {
- SetMemLimitExceeded(instance_mem_tracker_.get());
+ SetMemLimitExceeded(instance_mem_tracker_);
}
return GetQueryStatus();
}
diff --git a/be/src/runtime/runtime-state.h b/be/src/runtime/runtime-state.h
index 5025215..d619bb0 100644
--- a/be/src/runtime/runtime-state.h
+++ b/be/src/runtime/runtime-state.h
@@ -107,10 +107,10 @@ class RuntimeState {
? instance_ctx_->fragment_instance_id
: no_instance_id_;
}
- MemTracker* instance_mem_tracker() { return instance_mem_tracker_.get(); }
+ MemTracker* instance_mem_tracker() { return instance_mem_tracker_; }
MemTracker* query_mem_tracker(); // reference to the query_state_'s memtracker
ReservationTracker* instance_buffer_reservation() {
- return instance_buffer_reservation_.get();
+ return instance_buffer_reservation_;
}
ThreadResourcePool* resource_pool() { return resource_pool_.get(); }
@@ -368,12 +368,15 @@ class RuntimeState {
/// Counters for bytes sent over the network in this instance's tree, not owned.
std::vector<RuntimeProfile::Counter*> bytes_sent_counters_;
- /// Memory usage of this fragment instance, a child of 'query_mem_tracker_'.
- boost::scoped_ptr<MemTracker> instance_mem_tracker_;
+ /// Memory usage of this fragment instance, a child of 'query_mem_tracker_'. Owned by
+ /// 'query_state_' and destroyed with the rest of the query's MemTracker hierarchy.
+ /// See IMPALA-8270 for a reason why having the QueryState own this is important.
+ MemTracker* instance_mem_tracker_ = nullptr;
/// Buffer reservation for this fragment instance - a child of the query buffer
- /// reservation. Non-NULL if 'query_state_' is not NULL.
- boost::scoped_ptr<ReservationTracker> instance_buffer_reservation_;
+ /// reservation. Non-NULL if this is a finstance's RuntimeState used for query
+ /// execution. Owned by 'query_state_'.
+ ReservationTracker* const instance_buffer_reservation_;
/// if true, execution should stop with a CANCELLED status
bool is_cancelled_ = false;