You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2017/11/28 02:01:29 UTC
kudu git commit: [tablet_metadata] protect pre_flush_callback_ by
flush_lock_
Repository: kudu
Updated Branches:
refs/heads/master b29c93734 -> fa65d647a
[tablet_metadata] protect pre_flush_callback_ by flush_lock_
This patch makes the pre_flush_callback_ protected by the flush_lock_
to address the case of concurrent calls to
TabletMetadata::DeleteTabletData() and Tablet::Shutdown(). The call to
Tablet::Shutdown() was originated from TabletReplica::OnDiskSize() when
the local shared_ptr variable ended up keeping the last reference to
the object corresponding to the tablet being deleted.
The issue contributed to the flakiness at least of the following tests:
* CreateTableStressTest.CreateAndDeleteBigTable
* DeleteTableWhileScanInProgressParamTest
Prior to this patch, TSAN reported about read/write race for the callback
with the traces like the following:
Read of size 8 at 0x7b50000703d0 by thread T56 (mutexes: write M2161):
#0 kudu::Callback<kudu::Status ()()>::Run() const /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/gutil/callback.h:394:45 (libtablet.so+0x1caa81)
#1 kudu::tablet::TabletMetadata::Flush() /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tablet/tablet_metadata.cc:549:23 (libtablet.so+0x1c30b8)
#2 kudu::tablet::TabletMetadata::DeleteTabletData(kudu::tablet::TabletDataState, boost::optional<kudu::consensus::OpId> const&) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tablet/tablet_metadata.cc:232:3 (libtablet.so+0x1c411a)
#3 kudu::tserver::TSTabletManager::DeleteTabletData(scoped_refptr<kudu::tablet::TabletMetadata> const&, scoped_refptr<kudu::consensus::ConsensusMetadataManager> const&, kudu::tablet::TabletDataState, boost::optional<kudu::consensus::OpId>) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tserver/ts_tablet_manager.cc:1281:3 (libtserver.so+0xfad26)
#4 kudu::tserver::TSTabletManager::DeleteTablet(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, kudu::tablet::TabletDataState, boost::optional<long> const&, kudu::tserver::TabletServerErrorPB_Code*) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tserver/ts_tablet_manager.cc:826:14 (libtserver.so+0xfc365)
#5 kudu::tserver::TabletServiceAdminImpl::DeleteTablet(kudu::tserver::DeleteTabletRequestPB const*, kudu::tserver::DeleteTabletResponsePB*, kudu::rpc::RpcContext*) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tserver/tablet_service.cc:804:41 (libtserver.so+0xd5dfd)
...
Previous write of size 8 at 0x7b50000703d0 by thread T12 (mutexes: write M90770251150680224, write M623321216325058272):
#0 kudu::internal::CallbackBase::operator=(kudu::internal::CallbackBase const&) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/gutil/callback_internal.h:34:7 (libtserver.so+0xb0596)
#1 kudu::Callback<kudu::Status ()()>::operator=(kudu::Callback<kudu::Status ()()>&&) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/gutil/callback.h:358:7 (libtablet.so+0x115cb0)
#2 kudu::tablet::TabletMetadata::SetPreFlushCallback(kudu::Callback<kudu::Status ()()>) /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tablet/tablet_metadata.h:229:74 (libtablet.so+0x10e437)
#3 kudu::tablet::Tablet::Shutdown() /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tablet/tablet.cc:352:14 (libtablet.so+0xf7fdc)
#4 kudu::tablet::Tablet::~Tablet() /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tablet/tablet.cc:248:3 (libtablet.so+0xf7c9e)
...
#10 kudu::tablet::TabletReplica::OnDiskSize() const /data/somelongdirectorytoavoidrpathissues/src/kudu/src/kudu/tablet/tablet_replica.cc:742:1 (libtablet.so+0x143af7)
...
Change-Id: I21d3195183584d1a51aeec64b049ac49994f69be
Reviewed-on: http://gerrit.cloudera.org:8080/8649
Reviewed-by: Mike Percy <mp...@apache.org>
Reviewed-by: Andrew Wong <aw...@cloudera.com>
Tested-by: Alexey Serbin <as...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/fa65d647
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/fa65d647
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/fa65d647
Branch: refs/heads/master
Commit: fa65d647a450c6895bac279dd5e5ad5fb6f8d228
Parents: b29c937
Author: Alexey Serbin <as...@cloudera.com>
Authored: Mon Nov 27 12:16:49 2017 -0800
Committer: Alexey Serbin <as...@cloudera.com>
Committed: Tue Nov 28 02:00:39 2017 +0000
----------------------------------------------------------------------
src/kudu/tablet/tablet_metadata.cc | 5 +++++
src/kudu/tablet/tablet_metadata.h | 5 ++---
2 files changed, 7 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/fa65d647/src/kudu/tablet/tablet_metadata.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_metadata.cc b/src/kudu/tablet/tablet_metadata.cc
index d848c1f..92eb8f8 100644
--- a/src/kudu/tablet/tablet_metadata.cc
+++ b/src/kudu/tablet/tablet_metadata.cc
@@ -617,6 +617,11 @@ Status TabletMetadata::ReplaceSuperBlockUnlocked(const TabletSuperBlockPB &pb) {
return Status::OK();
}
+void TabletMetadata::SetPreFlushCallback(StatusClosure callback) {
+ MutexLock l_flush(flush_lock_);
+ pre_flush_callback_ = std::move(callback);
+}
+
boost::optional<consensus::OpId> TabletMetadata::tombstone_last_logged_opid() const {
std::lock_guard<LockType> l(data_lock_);
return tombstone_last_logged_opid_;
http://git-wip-us.apache.org/repos/asf/kudu/blob/fa65d647/src/kudu/tablet/tablet_metadata.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_metadata.h b/src/kudu/tablet/tablet_metadata.h
index e94101e..fd88ee8 100644
--- a/src/kudu/tablet/tablet_metadata.h
+++ b/src/kudu/tablet/tablet_metadata.h
@@ -22,7 +22,6 @@
#include <memory>
#include <string>
#include <unordered_set>
-#include <utility>
#include <vector>
#include <boost/optional/optional.hpp>
@@ -226,7 +225,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
void SetLastDurableMrsIdForTests(int64_t mrs_id) { last_durable_mrs_id_ = mrs_id; }
- void SetPreFlushCallback(StatusClosure callback) { pre_flush_callback_ = std::move(callback); }
+ void SetPreFlushCallback(StatusClosure callback);
// Return the last-logged opid of a tombstoned tablet, if known.
boost::optional<consensus::OpId> tombstone_last_logged_opid() const;
@@ -373,7 +372,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
bool needs_flush_;
// A callback that, if set, is called before this metadata is flushed
- // to disk.
+ // to disk. Protected by the 'flush_lock_'.
StatusClosure pre_flush_callback_;
// The on-disk size of the tablet metadata, as of the last successful