You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2017/11/28 02:01:29 UTC

kudu git commit: [tablet_metadata] protect pre_flush_callback_ by flush_lock_

Repository: kudu
Updated Branches:
  refs/heads/master b29c93734 -> fa65d647a


[tablet_metadata] protect pre_flush_callback_ by flush_lock_

This patch makes the pre_flush_callback_ protected by the flush_lock_
to address the case of concurrent calls to
TabletMetadata::DeleteTabletData() and Tablet::Shutdown().  The call to
Tablet::Shutdown() was originated from TabletReplica::OnDiskSize() when
the local shared_ptr variable ended up keeping the last reference to
the object corresponding to the tablet being deleted.

The issue contributed to the flakiness at least of the following tests:
   * CreateTableStressTest.CreateAndDeleteBigTable
   * DeleteTableWhileScanInProgressParamTest

Prior to this patch, TSAN reported about read/write race for the callback
with the traces like the following:
  Read of size 8 at 0x7b50000703d0 by thread T56 (mutexes: write M2161):
    #0 kudu::Callback<kudu::Status ()()>::Run() const /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/gutil/callback.h:394:45 (libtablet.so+0x1caa81)
    #1 kudu::tablet::TabletMetadata::Flush() /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tablet/tablet_metadata.cc:549:23 (libtablet.so+0x1c30b8)
    #2 kudu::tablet::TabletMetadata::DeleteTabletData(kudu::tablet::TabletDataState, boost::optional<kudu::consensus::OpId> const&) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tablet/tablet_metadata.cc:232:3 (libtablet.so+0x1c411a)
    #3 kudu::tserver::TSTabletManager::DeleteTabletData(scoped_refptr<kudu::tablet::TabletMetadata> const&, scoped_refptr<kudu::consensus::ConsensusMetadataManager> const&, kudu::tablet::TabletDataState, boost::optional<kudu::consensus::OpId>) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tserver/ts_tablet_manager.cc:1281:3 (libtserver.so+0xfad26)
    #4 kudu::tserver::TSTabletManager::DeleteTablet(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, kudu::tablet::TabletDataState, boost::optional<long> const&, kudu::tserver::TabletServerErrorPB_Code*) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tserver/ts_tablet_manager.cc:826:14 (libtserver.so+0xfc365)
    #5 kudu::tserver::TabletServiceAdminImpl::DeleteTablet(kudu::tserver::DeleteTabletRequestPB const*, kudu::tserver::DeleteTabletResponsePB*, kudu::rpc::RpcContext*) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tserver/tablet_service.cc:804:41 (libtserver.so+0xd5dfd)
    ...

  Previous write of size 8 at 0x7b50000703d0 by thread T12 (mutexes: write M90770251150680224, write M623321216325058272):
    #0 kudu::internal::CallbackBase::operator=(kudu::internal::CallbackBase const&) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/gutil/callback_internal.h:34:7 (libtserver.so+0xb0596)
    #1 kudu::Callback<kudu::Status ()()>::operator=(kudu::Callback<kudu::Status ()()>&&) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/gutil/callback.h:358:7 (libtablet.so+0x115cb0)
    #2 kudu::tablet::TabletMetadata::SetPreFlushCallback(kudu::Callback<kudu::Status ()()>) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tablet/tablet_metadata.h:229:74 (libtablet.so+0x10e437)
    #3 kudu::tablet::Tablet::Shutdown() /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tablet/tablet.cc:352:14 (libtablet.so+0xf7fdc)
    #4 kudu::tablet::Tablet::~Tablet() /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tablet/tablet.cc:248:3 (libtablet.so+0xf7c9e)
    ...
    #10 kudu::tablet::TabletReplica::OnDiskSize() const /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tablet/tablet_replica.cc:742:1 (libtablet.so+0x143af7)
    ...

Change-Id: I21d3195183584d1a51aeec64b049ac49994f69be
Reviewed-on: http://gerrit.cloudera.org:8080/8649
Reviewed-by: Mike Percy <mp...@apache.org>
Reviewed-by: Andrew Wong <aw...@cloudera.com>
Tested-by: Alexey Serbin <as...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/fa65d647
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/fa65d647
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/fa65d647

Branch: refs/heads/master
Commit: fa65d647a450c6895bac279dd5e5ad5fb6f8d228
Parents: b29c937
Author: Alexey Serbin <as...@cloudera.com>
Authored: Mon Nov 27 12:16:49 2017 -0800
Committer: Alexey Serbin <as...@cloudera.com>
Committed: Tue Nov 28 02:00:39 2017 +0000

----------------------------------------------------------------------
 src/kudu/tablet/tablet_metadata.cc | 5 +++++
 src/kudu/tablet/tablet_metadata.h  | 5 ++---
 2 files changed, 7 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/fa65d647/src/kudu/tablet/tablet_metadata.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_metadata.cc b/src/kudu/tablet/tablet_metadata.cc
index d848c1f..92eb8f8 100644
--- a/src/kudu/tablet/tablet_metadata.cc
+++ b/src/kudu/tablet/tablet_metadata.cc
@@ -617,6 +617,11 @@ Status TabletMetadata::ReplaceSuperBlockUnlocked(const TabletSuperBlockPB &pb) {
   return Status::OK();
 }
 
+void TabletMetadata::SetPreFlushCallback(StatusClosure callback) {
+  MutexLock l_flush(flush_lock_);
+  pre_flush_callback_ = std::move(callback);
+}
+
 boost::optional<consensus::OpId> TabletMetadata::tombstone_last_logged_opid() const {
   std::lock_guard<LockType> l(data_lock_);
   return tombstone_last_logged_opid_;

http://git-wip-us.apache.org/repos/asf/kudu/blob/fa65d647/src/kudu/tablet/tablet_metadata.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_metadata.h b/src/kudu/tablet/tablet_metadata.h
index e94101e..fd88ee8 100644
--- a/src/kudu/tablet/tablet_metadata.h
+++ b/src/kudu/tablet/tablet_metadata.h
@@ -22,7 +22,6 @@
 #include <memory>
 #include <string>
 #include <unordered_set>
-#include <utility>
 #include <vector>
 
 #include <boost/optional/optional.hpp>
@@ -226,7 +225,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
 
   void SetLastDurableMrsIdForTests(int64_t mrs_id) { last_durable_mrs_id_ = mrs_id; }
 
-  void SetPreFlushCallback(StatusClosure callback) { pre_flush_callback_ = std::move(callback); }
+  void SetPreFlushCallback(StatusClosure callback);
 
   // Return the last-logged opid of a tombstoned tablet, if known.
   boost::optional<consensus::OpId> tombstone_last_logged_opid() const;
@@ -373,7 +372,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
   bool needs_flush_;
 
   // A callback that, if set, is called before this metadata is flushed
-  // to disk.
+  // to disk. Protected by the 'flush_lock_'.
   StatusClosure pre_flush_callback_;
 
   // The on-disk size of the tablet metadata, as of the last successful