You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2020/03/24 02:17:53 UTC

[impala] branch master updated: IMPALA-5904: (part 3) Fix more TSAN bugs

This is an automated email from the ASF dual-hosted git repository.

stakiar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 8b162ff  IMPALA-5904: (part 3) Fix more TSAN bugs
8b162ff is described below

commit 8b162ff480aa346d1d32830097586d681976e2d1
Author: Sahil Takiar <ta...@gmail.com>
AuthorDate: Thu Feb 20 17:10:25 2020 -0800

    IMPALA-5904: (part 3) Fix more TSAN bugs
    
    As of this commit, all backend tests are TSAN clean (or suppressions
    exist for any races that are considered benign).
    
    TSAN Fixes:
    * Fixes IMPALA-9374: Possible data race in TupleDescriptor::GetLlvmStruct
        * Replaces the assignment of llvm_field_idx_ with slot_idx_, since
          they are the same; added a DCHECK to enforce this
    * Data race in be/src/exprs/scalar-expr-evaluator.cc:285:26
      impala::ScalarExprEvaluator::GetValue(...)
        * This race seems specific to data-stream-test, and does not occur
          during any other tests
        * I made some changes to DataStreamTest to fix the sharing of
          TupleRowComparators between threads
    
    Suppresions:
    * Added a suppresion for IMPALA-9455: Possible data race in
      kudu::security::InitKerberosForServer
    * Added suppresions for hs2-http-test since it uses the ThriftServer, which
      is known to be racey (IMPALA-9314)
    
    Testing:
    * Ran exhaustive tests
    * Re-ran TSAN tests and confirmed the data races have been fixed
    
    Change-Id: I99b7b119e256085d1ba6977e1161fc658273b242
    Reviewed-on: http://gerrit.cloudera.org:8080/15363
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
---
 be/src/rpc/hs2-http-test.cc        |  6 ++++--
 be/src/runtime/data-stream-test.cc | 34 +++++++++++++++++++++-------------
 be/src/runtime/descriptors.cc      |  7 +++++--
 be/src/runtime/descriptors.h       | 13 ++++++-------
 bin/tsan-suppressions.txt          | 11 +++++++++++
 5 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/be/src/rpc/hs2-http-test.cc b/be/src/rpc/hs2-http-test.cc
index 544b739..e3d1819 100644
--- a/be/src/rpc/hs2-http-test.cc
+++ b/be/src/rpc/hs2-http-test.cc
@@ -34,11 +34,12 @@ static string IMPALA_HOME(getenv("IMPALA_HOME"));
 
 namespace filesystem = boost::filesystem;
 
-using namespace impala;
 using namespace apache::hive::service::cli::thrift;
 using namespace apache::thrift;
 using strings::Substitute;
 
+namespace impala {
+
 class TestHS2Service : public ImpalaHiveServer2ServiceIf {
  public:
   virtual ~TestHS2Service() {}
@@ -166,9 +167,10 @@ TEST(Hs2HttpTest, TestSpnego) {
 
   http_server->StopForTesting();
 }
+}
 
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
-  InitCommonRuntime(argc, argv, false, TestInfo::BE_TEST);
+  impala::InitCommonRuntime(argc, argv, false, impala::TestInfo::BE_TEST);
   return RUN_ALL_TESTS();
 }
diff --git a/be/src/runtime/data-stream-test.cc b/be/src/runtime/data-stream-test.cc
index 8cbf2ac..863db94 100644
--- a/be/src/runtime/data-stream-test.cc
+++ b/be/src/runtime/data-stream-test.cc
@@ -159,10 +159,13 @@ class DataStreamTest : public testing::Test {
 
     CreateRowDesc();
 
+    SlotRef* lhs_slot = obj_pool_.Add(new SlotRef(TYPE_BIGINT, 0));
+    ASSERT_OK(lhs_slot->Init(RowDescriptor(), true, runtime_state_.get()));
+    ordering_exprs_.push_back(lhs_slot);
+
     tsort_info_.sorting_order = TSortingOrder::LEXICAL;
     tsort_info_.is_asc_order.push_back(true);
     tsort_info_.nulls_first.push_back(true);
-    CreateTupleComparator();
 
     next_instance_id_.lo = 0;
     next_instance_id_.hi = 0;
@@ -217,7 +220,9 @@ class DataStreamTest : public testing::Test {
 
   virtual void TearDown() {
     desc_tbl_->ReleaseResources();
-    less_than_->Close(runtime_state_.get());
+    for (auto less_than_comparator : less_than_comparators_) {
+      less_than_comparator->Close(runtime_state_.get());
+    }
     ScalarExpr::Close(ordering_exprs_);
     mem_pool_->FreeAll();
     StopKrpcBackend();
@@ -236,7 +241,7 @@ class DataStreamTest : public testing::Test {
   DescriptorTbl* desc_tbl_;
   const RowDescriptor* row_desc_;
   TSortInfo tsort_info_;
-  TupleRowComparator* less_than_;
+  vector<TupleRowComparator*> less_than_comparators_;
   boost::scoped_ptr<ExecEnv> exec_env_;
   scoped_ptr<RuntimeState> runtime_state_;
   TUniqueId next_instance_id_;
@@ -342,14 +347,12 @@ class DataStreamTest : public testing::Test {
   }
 
   // Create a tuple comparator to sort in ascending order on the single bigint column.
-  void CreateTupleComparator() {
-    SlotRef* lhs_slot = obj_pool_.Add(new SlotRef(TYPE_BIGINT, 0));
-    ASSERT_OK(lhs_slot->Init(RowDescriptor(), true, runtime_state_.get()));
-    ordering_exprs_.push_back(lhs_slot);
+  void CreateTupleComparator(TupleRowComparator** less_than_comparator) {
     TupleRowComparatorConfig* comparator_config =
         obj_pool_.Add(new TupleRowComparatorConfig(tsort_info_, ordering_exprs_));
-    less_than_ = obj_pool_.Add(new TupleRowLexicalComparator(*comparator_config));
-    ASSERT_OK(less_than_->Open(
+    *less_than_comparator =
+        obj_pool_.Add(new TupleRowLexicalComparator(*comparator_config));
+    ASSERT_OK((*less_than_comparator)->Open(
         &obj_pool_, runtime_state_.get(), mem_pool_.get(), mem_pool_.get()));
   }
 
@@ -389,11 +392,15 @@ class DataStreamTest : public testing::Test {
     info->stream_recvr = stream_mgr_->CreateRecvr(row_desc_, *runtime_state_.get(),
         instance_id, DEST_NODE_ID, num_senders, buffer_size, is_merging, profile,
         &tracker_, &buffer_pool_client_);
-    if (!is_merging) {
+   if (!is_merging) {
       info->thread_handle.reset(new thread(&DataStreamTest::ReadStream, this, info));
     } else {
+      TupleRowComparator* less_than_comparator = nullptr;
+      CreateTupleComparator(&less_than_comparator);
+      DCHECK(less_than_comparator != nullptr);
+      less_than_comparators_.push_back(less_than_comparator);
       info->thread_handle.reset(new thread(&DataStreamTest::ReadStreamMerging, this, info,
-          profile));
+          profile, less_than_comparator));
     }
     if (out_id != nullptr) *out_id = instance_id;
   }
@@ -423,8 +430,9 @@ class DataStreamTest : public testing::Test {
     VLOG_QUERY << "done reading";
   }
 
-  void ReadStreamMerging(ReceiverInfo* info, RuntimeProfile* profile) {
-    info->status = info->stream_recvr->CreateMerger(*less_than_);
+  void ReadStreamMerging(ReceiverInfo* info, RuntimeProfile* profile,
+      TupleRowComparator* less_than_comparator) {
+    info->status = info->stream_recvr->CreateMerger(*less_than_comparator);
     if (info->status.IsCancelled()) return;
     RowBatch batch(row_desc_, 1024, &tracker_);
     VLOG_QUERY << "start reading merging";
diff --git a/be/src/runtime/descriptors.cc b/be/src/runtime/descriptors.cc
index 0ed4519..b9a96fc 100644
--- a/be/src/runtime/descriptors.cc
+++ b/be/src/runtime/descriptors.cc
@@ -142,7 +142,7 @@ string SlotDescriptor::DebugString() const {
     out << " collection_item_tuple_id=" << collection_item_descriptor_->id();
   }
   out << " offset=" << tuple_offset_ << " null=" << null_indicator_offset_.DebugString()
-      << " slot_idx=" << slot_idx_ << " field_idx=" << llvm_field_idx_
+      << " slot_idx=" << slot_idx_ << " field_idx=" << slot_idx_
       << ")";
   return out.str();
 }
@@ -724,6 +724,10 @@ llvm::Value* SlotDescriptor::CodegenGetNullByte(
 vector<SlotDescriptor*> TupleDescriptor::SlotsOrderedByIdx() const {
   vector<SlotDescriptor*> sorted_slots(slots().size());
   for (SlotDescriptor* slot: slots()) sorted_slots[slot->slot_idx_] = slot;
+  // Check that the size of sorted_slots has not changed. This ensures that the series
+  // of slot indexes starts at 0 and increases by 1 for each slot. This also ensures that
+  // the returned vector has no nullptr elements.
+  DCHECK_EQ(slots().size(), sorted_slots.size());
   return sorted_slots;
 }
 
@@ -736,7 +740,6 @@ llvm::StructType* TupleDescriptor::GetLlvmStruct(LlvmCodeGen* codegen) const {
   int curr_struct_offset = 0;
   for (SlotDescriptor* slot: sorted_slots) {
     DCHECK_EQ(curr_struct_offset, slot->tuple_offset());
-    slot->llvm_field_idx_ = struct_fields.size();
     struct_fields.push_back(codegen->GetSlotType(slot->type()));
     curr_struct_offset = slot->tuple_offset() + slot->slot_size();
   }
diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h
index a54e4c6..c056b18 100644
--- a/be/src/runtime/descriptors.h
+++ b/be/src/runtime/descriptors.h
@@ -124,8 +124,12 @@ class SlotDescriptor {
   /// convenient for table formats for which we only support flat data.
   int col_pos() const { return col_path_[0]; }
   const SchemaPath& col_path() const { return col_path_; }
-  /// Returns the field index in the generated llvm struct for this slot's tuple
-  int llvm_field_idx() const { return llvm_field_idx_; }
+
+  /// Returns the field index in the generated llvm struct for this slot's tuple.
+  /// The 'llvm_field_idx' is the index of the slot in the llvm codegen'd tuple struct.
+  /// This takes into account any padding bytes.
+  int llvm_field_idx() const { return slot_idx_; }
+
   int tuple_offset() const { return tuple_offset_; }
   const NullIndicatorOffset& null_indicator_offset() const {
     return null_indicator_offset_;
@@ -185,11 +189,6 @@ class SlotDescriptor {
   /// the byte size of this slot.
   const int slot_size_;
 
-  /// The idx of the slot in the llvm codegen'd tuple struct
-  /// This is set by TupleDescriptor during codegen and takes into account
-  /// any padding bytes.
-  int llvm_field_idx_ = -1;
-
   /// collection_item_descriptor should be non-NULL iff this is a collection slot
   SlotDescriptor(const TSlotDescriptor& tdesc, const TupleDescriptor* parent,
       const TupleDescriptor* collection_item_descriptor);
diff --git a/bin/tsan-suppressions.txt b/bin/tsan-suppressions.txt
index adf67c0..678ddf8 100644
--- a/bin/tsan-suppressions.txt
+++ b/bin/tsan-suppressions.txt
@@ -30,3 +30,14 @@ race:impala::StatestoreTest_SmokeTest_Test
 thread:impala::StatestoreTest_SmokeTest_Test
 race:impala::SessionTest_TestExpiry_Test
 race:impala::StatestoreSslTest_ValidCertSmokeTest_Test
+
+# TODO: IMPALA-9314: The ThriftServer used by the tests in hs2-http-test.cc has various
+# race conditions during the shutdown process
+race:impala::ThriftHttpTest_TestChunkedRequests_Test
+race:impala::Hs2HttpTest_TestSpnego_Test
+
+# TODO: IMPALA-9455 / IMPALA-6085: 'RpcMgrKerberizedTest' calls 'InitAuth'
+# (auth-provider.h) multiple times per process. This causes a race condition on
+# 'g_kinit_ctx' (init.cc). On a production cluster, Impala daemons should only call
+# 'InitAuth' once.
+race:impala::RpcMgrKerberizedTest