You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2020/03/24 02:17:53 UTC
[impala] branch master updated: IMPALA-5904: (part 3) Fix more TSAN
bugs
This is an automated email from the ASF dual-hosted git repository.
stakiar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 8b162ff IMPALA-5904: (part 3) Fix more TSAN bugs
8b162ff is described below
commit 8b162ff480aa346d1d32830097586d681976e2d1
Author: Sahil Takiar <ta...@gmail.com>
AuthorDate: Thu Feb 20 17:10:25 2020 -0800
IMPALA-5904: (part 3) Fix more TSAN bugs
As of this commit, all backend tests are TSAN clean (or suppressions
exist for any races that are considered benign).
TSAN Fixes:
* Fixes IMPALA-9374: Possible data race in TupleDescriptor::GetLlvmStruct
* Replaces the assignment of llvm_field_idx_ with slot_idx_, since
they are the same; added a DCHECK to enforce this
* Data race in be/src/exprs/scalar-expr-evaluator.cc:285:26
impala::ScalarExprEvaluator::GetValue(...)
* This race seems specific to data-stream-test, and does not occur
during any other tests
* I made some changes to DataStreamTest to fix the sharing of
TupleRowComparators between threads
Suppresions:
* Added a suppresion for IMPALA-9455: Possible data race in
kudu::security::InitKerberosForServer
* Added suppresions for hs2-http-test since it uses the ThriftServer, which
is known to be racey (IMPALA-9314)
Testing:
* Ran exhaustive tests
* Re-ran TSAN tests and confirmed the data races have been fixed
Change-Id: I99b7b119e256085d1ba6977e1161fc658273b242
Reviewed-on: http://gerrit.cloudera.org:8080/15363
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
---
be/src/rpc/hs2-http-test.cc | 6 ++++--
be/src/runtime/data-stream-test.cc | 34 +++++++++++++++++++++-------------
be/src/runtime/descriptors.cc | 7 +++++--
be/src/runtime/descriptors.h | 13 ++++++-------
bin/tsan-suppressions.txt | 11 +++++++++++
5 files changed, 47 insertions(+), 24 deletions(-)
diff --git a/be/src/rpc/hs2-http-test.cc b/be/src/rpc/hs2-http-test.cc
index 544b739..e3d1819 100644
--- a/be/src/rpc/hs2-http-test.cc
+++ b/be/src/rpc/hs2-http-test.cc
@@ -34,11 +34,12 @@ static string IMPALA_HOME(getenv("IMPALA_HOME"));
namespace filesystem = boost::filesystem;
-using namespace impala;
using namespace apache::hive::service::cli::thrift;
using namespace apache::thrift;
using strings::Substitute;
+namespace impala {
+
class TestHS2Service : public ImpalaHiveServer2ServiceIf {
public:
virtual ~TestHS2Service() {}
@@ -166,9 +167,10 @@ TEST(Hs2HttpTest, TestSpnego) {
http_server->StopForTesting();
}
+}
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
- InitCommonRuntime(argc, argv, false, TestInfo::BE_TEST);
+ impala::InitCommonRuntime(argc, argv, false, impala::TestInfo::BE_TEST);
return RUN_ALL_TESTS();
}
diff --git a/be/src/runtime/data-stream-test.cc b/be/src/runtime/data-stream-test.cc
index 8cbf2ac..863db94 100644
--- a/be/src/runtime/data-stream-test.cc
+++ b/be/src/runtime/data-stream-test.cc
@@ -159,10 +159,13 @@ class DataStreamTest : public testing::Test {
CreateRowDesc();
+ SlotRef* lhs_slot = obj_pool_.Add(new SlotRef(TYPE_BIGINT, 0));
+ ASSERT_OK(lhs_slot->Init(RowDescriptor(), true, runtime_state_.get()));
+ ordering_exprs_.push_back(lhs_slot);
+
tsort_info_.sorting_order = TSortingOrder::LEXICAL;
tsort_info_.is_asc_order.push_back(true);
tsort_info_.nulls_first.push_back(true);
- CreateTupleComparator();
next_instance_id_.lo = 0;
next_instance_id_.hi = 0;
@@ -217,7 +220,9 @@ class DataStreamTest : public testing::Test {
virtual void TearDown() {
desc_tbl_->ReleaseResources();
- less_than_->Close(runtime_state_.get());
+ for (auto less_than_comparator : less_than_comparators_) {
+ less_than_comparator->Close(runtime_state_.get());
+ }
ScalarExpr::Close(ordering_exprs_);
mem_pool_->FreeAll();
StopKrpcBackend();
@@ -236,7 +241,7 @@ class DataStreamTest : public testing::Test {
DescriptorTbl* desc_tbl_;
const RowDescriptor* row_desc_;
TSortInfo tsort_info_;
- TupleRowComparator* less_than_;
+ vector<TupleRowComparator*> less_than_comparators_;
boost::scoped_ptr<ExecEnv> exec_env_;
scoped_ptr<RuntimeState> runtime_state_;
TUniqueId next_instance_id_;
@@ -342,14 +347,12 @@ class DataStreamTest : public testing::Test {
}
// Create a tuple comparator to sort in ascending order on the single bigint column.
- void CreateTupleComparator() {
- SlotRef* lhs_slot = obj_pool_.Add(new SlotRef(TYPE_BIGINT, 0));
- ASSERT_OK(lhs_slot->Init(RowDescriptor(), true, runtime_state_.get()));
- ordering_exprs_.push_back(lhs_slot);
+ void CreateTupleComparator(TupleRowComparator** less_than_comparator) {
TupleRowComparatorConfig* comparator_config =
obj_pool_.Add(new TupleRowComparatorConfig(tsort_info_, ordering_exprs_));
- less_than_ = obj_pool_.Add(new TupleRowLexicalComparator(*comparator_config));
- ASSERT_OK(less_than_->Open(
+ *less_than_comparator =
+ obj_pool_.Add(new TupleRowLexicalComparator(*comparator_config));
+ ASSERT_OK((*less_than_comparator)->Open(
&obj_pool_, runtime_state_.get(), mem_pool_.get(), mem_pool_.get()));
}
@@ -389,11 +392,15 @@ class DataStreamTest : public testing::Test {
info->stream_recvr = stream_mgr_->CreateRecvr(row_desc_, *runtime_state_.get(),
instance_id, DEST_NODE_ID, num_senders, buffer_size, is_merging, profile,
&tracker_, &buffer_pool_client_);
- if (!is_merging) {
+ if (!is_merging) {
info->thread_handle.reset(new thread(&DataStreamTest::ReadStream, this, info));
} else {
+ TupleRowComparator* less_than_comparator = nullptr;
+ CreateTupleComparator(&less_than_comparator);
+ DCHECK(less_than_comparator != nullptr);
+ less_than_comparators_.push_back(less_than_comparator);
info->thread_handle.reset(new thread(&DataStreamTest::ReadStreamMerging, this, info,
- profile));
+ profile, less_than_comparator));
}
if (out_id != nullptr) *out_id = instance_id;
}
@@ -423,8 +430,9 @@ class DataStreamTest : public testing::Test {
VLOG_QUERY << "done reading";
}
- void ReadStreamMerging(ReceiverInfo* info, RuntimeProfile* profile) {
- info->status = info->stream_recvr->CreateMerger(*less_than_);
+ void ReadStreamMerging(ReceiverInfo* info, RuntimeProfile* profile,
+ TupleRowComparator* less_than_comparator) {
+ info->status = info->stream_recvr->CreateMerger(*less_than_comparator);
if (info->status.IsCancelled()) return;
RowBatch batch(row_desc_, 1024, &tracker_);
VLOG_QUERY << "start reading merging";
diff --git a/be/src/runtime/descriptors.cc b/be/src/runtime/descriptors.cc
index 0ed4519..b9a96fc 100644
--- a/be/src/runtime/descriptors.cc
+++ b/be/src/runtime/descriptors.cc
@@ -142,7 +142,7 @@ string SlotDescriptor::DebugString() const {
out << " collection_item_tuple_id=" << collection_item_descriptor_->id();
}
out << " offset=" << tuple_offset_ << " null=" << null_indicator_offset_.DebugString()
- << " slot_idx=" << slot_idx_ << " field_idx=" << llvm_field_idx_
+ << " slot_idx=" << slot_idx_ << " field_idx=" << slot_idx_
<< ")";
return out.str();
}
@@ -724,6 +724,10 @@ llvm::Value* SlotDescriptor::CodegenGetNullByte(
vector<SlotDescriptor*> TupleDescriptor::SlotsOrderedByIdx() const {
vector<SlotDescriptor*> sorted_slots(slots().size());
for (SlotDescriptor* slot: slots()) sorted_slots[slot->slot_idx_] = slot;
+ // Check that the size of sorted_slots has not changed. This ensures that the series
+ // of slot indexes starts at 0 and increases by 1 for each slot. This also ensures that
+ // the returned vector has no nullptr elements.
+ DCHECK_EQ(slots().size(), sorted_slots.size());
return sorted_slots;
}
@@ -736,7 +740,6 @@ llvm::StructType* TupleDescriptor::GetLlvmStruct(LlvmCodeGen* codegen) const {
int curr_struct_offset = 0;
for (SlotDescriptor* slot: sorted_slots) {
DCHECK_EQ(curr_struct_offset, slot->tuple_offset());
- slot->llvm_field_idx_ = struct_fields.size();
struct_fields.push_back(codegen->GetSlotType(slot->type()));
curr_struct_offset = slot->tuple_offset() + slot->slot_size();
}
diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h
index a54e4c6..c056b18 100644
--- a/be/src/runtime/descriptors.h
+++ b/be/src/runtime/descriptors.h
@@ -124,8 +124,12 @@ class SlotDescriptor {
/// convenient for table formats for which we only support flat data.
int col_pos() const { return col_path_[0]; }
const SchemaPath& col_path() const { return col_path_; }
- /// Returns the field index in the generated llvm struct for this slot's tuple
- int llvm_field_idx() const { return llvm_field_idx_; }
+
+ /// Returns the field index in the generated llvm struct for this slot's tuple.
+ /// The 'llvm_field_idx' is the index of the slot in the llvm codegen'd tuple struct.
+ /// This takes into account any padding bytes.
+ int llvm_field_idx() const { return slot_idx_; }
+
int tuple_offset() const { return tuple_offset_; }
const NullIndicatorOffset& null_indicator_offset() const {
return null_indicator_offset_;
@@ -185,11 +189,6 @@ class SlotDescriptor {
/// the byte size of this slot.
const int slot_size_;
- /// The idx of the slot in the llvm codegen'd tuple struct
- /// This is set by TupleDescriptor during codegen and takes into account
- /// any padding bytes.
- int llvm_field_idx_ = -1;
-
/// collection_item_descriptor should be non-NULL iff this is a collection slot
SlotDescriptor(const TSlotDescriptor& tdesc, const TupleDescriptor* parent,
const TupleDescriptor* collection_item_descriptor);
diff --git a/bin/tsan-suppressions.txt b/bin/tsan-suppressions.txt
index adf67c0..678ddf8 100644
--- a/bin/tsan-suppressions.txt
+++ b/bin/tsan-suppressions.txt
@@ -30,3 +30,14 @@ race:impala::StatestoreTest_SmokeTest_Test
thread:impala::StatestoreTest_SmokeTest_Test
race:impala::SessionTest_TestExpiry_Test
race:impala::StatestoreSslTest_ValidCertSmokeTest_Test
+
+# TODO: IMPALA-9314: The ThriftServer used by the tests in hs2-http-test.cc has various
+# race conditions during the shutdown process
+race:impala::ThriftHttpTest_TestChunkedRequests_Test
+race:impala::Hs2HttpTest_TestSpnego_Test
+
+# TODO: IMPALA-9455 / IMPALA-6085: 'RpcMgrKerberizedTest' calls 'InitAuth'
+# (auth-provider.h) multiple times per process. This causes a race condition on
+# 'g_kinit_ctx' (init.cc). On a production cluster, Impala daemons should only call
+# 'InitAuth' once.
+race:impala::RpcMgrKerberizedTest