You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2019/11/16 00:44:48 UTC
[kudu] 01/03: [tablet] Optimize TabletMetadata::CollectBlockIds
efficiency
This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 3ca7db0dbce89b2938d4c2cd5f2189767d5676f3
Author: Yingchun Lai <40...@qq.com>
AuthorDate: Tue Oct 15 16:50:04 2019 +0800
[tablet] Optimize TabletMetadata::CollectBlockIds efficiency
Container size may reallocate and elements will be moved or copied
several times if using std::vector and inserting elements at the
beginning in TabletMetadata::CollectBlockIds, we'd better to insert
elements at the end.
We did some simple benchmarks for TabletMetadata::CollectBlockIds()
operation in TestTabletMetadata.BenchmarkCollectBlockIds, compared
with old version, result shows that inserting at the end provides a
linear time cost when block count per RowSet or RowSet count
increasing.
And also, we ran benchmarks to compare std::vector with std::list
and std::deque, both of them have worse efficiency.
Result details as follow:
(The first column is FLAGS_test_row_set_count/FLAGS_test_block_count_per_rs,
the other columns show 10 times total TabletMetadata::CollectBlockIds()
time cost, in millisecond.)
vector list deque vector
(head insert) (rear insert)
1000/1000 3464 687 334 321
2000/1000 15238 1390 647 613
4000/1000 75139 3057 1392 1212
8000/1000 328808 6593 2805 2736
4000/2000 159517 6488 2965 2462
4000/4000 348471 11967 5513 5141
4000/8000 -(too long) 23706 11704 10806
Change-Id: I7ce853e35eb7dfa9f9a099e465ea8edfaa7c4aa9
Reviewed-on: http://gerrit.cloudera.org:8080/14445
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>
---
src/kudu/fs/block_id.h | 2 +-
src/kudu/tablet/delta_tracker.cc | 2 +-
src/kudu/tablet/diskrowset.cc | 2 +-
src/kudu/tablet/metadata-test.cc | 16 +++++++-------
src/kudu/tablet/rowset_metadata.cc | 8 +++----
src/kudu/tablet/rowset_metadata.h | 6 +++---
src/kudu/tablet/tablet_metadata-test.cc | 38 +++++++++++++++++++++++++++++++++
src/kudu/tablet/tablet_metadata.cc | 22 +++++++++----------
src/kudu/tablet/tablet_metadata.h | 9 ++++----
src/kudu/tools/kudu-tool-test.cc | 9 +++++---
src/kudu/tools/tool_action_fs.cc | 4 ++--
11 files changed, 80 insertions(+), 38 deletions(-)
diff --git a/src/kudu/fs/block_id.h b/src/kudu/fs/block_id.h
index b1ebe03..5e7f5a2 100644
--- a/src/kudu/fs/block_id.h
+++ b/src/kudu/fs/block_id.h
@@ -101,6 +101,6 @@ struct BlockIdEqual {
};
typedef std::unordered_set<BlockId, BlockIdHash, BlockIdEqual> BlockIdSet;
-
+typedef std::vector<BlockId> BlockIdContainer;
} // namespace kudu
#endif /* KUDU_FS_BLOCK_ID_H */
diff --git a/src/kudu/tablet/delta_tracker.cc b/src/kudu/tablet/delta_tracker.cc
index 770e3aa..0c0b0d7 100644
--- a/src/kudu/tablet/delta_tracker.cc
+++ b/src/kudu/tablet/delta_tracker.cc
@@ -367,7 +367,7 @@ Status DeltaTracker::CommitDeltaStoreMetadataUpdate(const RowSetMetadataUpdate&
&new_stores, type),
"Unable to open delta blocks");
- vector<BlockId> removed_blocks;
+ BlockIdContainer removed_blocks;
rowset_metadata_->CommitUpdate(update, &removed_blocks);
rowset_metadata_->AddOrphanedBlocks(removed_blocks);
// Once we successfully commit to the rowset metadata, let's ensure we update
diff --git a/src/kudu/tablet/diskrowset.cc b/src/kudu/tablet/diskrowset.cc
index b6a9dc3..64ec1e1 100644
--- a/src/kudu/tablet/diskrowset.cc
+++ b/src/kudu/tablet/diskrowset.cc
@@ -601,7 +601,7 @@ Status DiskRowSet::MajorCompactDeltaStoresWithColumnIds(const vector<ColumnId>&
// Prepare the changes to the metadata.
RowSetMetadataUpdate update;
compaction->CreateMetadataUpdate(&update);
- vector<BlockId> removed_blocks;
+ BlockIdContainer removed_blocks;
rowset_metadata_->CommitUpdate(update, &removed_blocks);
// Now that the metadata has been updated, open a new cfile set with the
diff --git a/src/kudu/tablet/metadata-test.cc b/src/kudu/tablet/metadata-test.cc
index e67529d..ad538b8 100644
--- a/src/kudu/tablet/metadata-test.cc
+++ b/src/kudu/tablet/metadata-test.cc
@@ -63,13 +63,13 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_1) {
to_replace.emplace_back(2);
to_replace.emplace_back(3);
- vector<BlockId> removed;
+ BlockIdContainer removed;
meta_->CommitUpdate(
RowSetMetadataUpdate()
.ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed);
ASSERT_EQ(vector<BlockId>({ BlockId(1), BlockId(123), BlockId(4) }),
meta_->redo_delta_blocks());
- ASSERT_EQ(vector<BlockId>({ BlockId(2), BlockId(3) }),
+ ASSERT_EQ(BlockIdContainer({ BlockId(2), BlockId(3) }),
removed);
}
@@ -79,13 +79,13 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_2) {
to_replace.emplace_back(1);
to_replace.emplace_back(2);
- vector<BlockId> removed;
+ BlockIdContainer removed;
meta_->CommitUpdate(
RowSetMetadataUpdate()
.ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed);
ASSERT_EQ(vector<BlockId>({ BlockId(123), BlockId(3), BlockId(4) }),
meta_->redo_delta_blocks());
- ASSERT_EQ(vector<BlockId>({ BlockId(1), BlockId(2) }),
+ ASSERT_EQ(BlockIdContainer({ BlockId(1), BlockId(2) }),
removed);
}
@@ -95,13 +95,13 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_3) {
to_replace.emplace_back(3);
to_replace.emplace_back(4);
- vector<BlockId> removed;
+ BlockIdContainer removed;
meta_->CommitUpdate(
RowSetMetadataUpdate()
.ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed);
ASSERT_EQ(vector<BlockId>({ BlockId(1), BlockId(2), BlockId(123) }),
meta_->redo_delta_blocks());
- ASSERT_EQ(vector<BlockId>({ BlockId(3), BlockId(4) }),
+ ASSERT_EQ(BlockIdContainer({ BlockId(3), BlockId(4) }),
removed);
}
@@ -112,7 +112,7 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_Bad_NonContiguous) {
to_replace.emplace_back(4);
EXPECT_DEATH({
- vector<BlockId> removed;
+ BlockIdContainer removed;
meta_->CommitUpdate(
RowSetMetadataUpdate().ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }),
&removed);
@@ -127,7 +127,7 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_Bad_DoesntExist) {
to_replace.emplace_back(555);
EXPECT_DEATH({
- vector<BlockId> removed;
+ BlockIdContainer removed;
meta_->CommitUpdate(
RowSetMetadataUpdate().ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }),
&removed);
diff --git a/src/kudu/tablet/rowset_metadata.cc b/src/kudu/tablet/rowset_metadata.cc
index 6c9705c..46bd932 100644
--- a/src/kudu/tablet/rowset_metadata.cc
+++ b/src/kudu/tablet/rowset_metadata.cc
@@ -56,7 +56,7 @@ Status RowSetMetadata::CreateNew(TabletMetadata* tablet_metadata,
return Status::OK();
}
-void RowSetMetadata::AddOrphanedBlocks(const vector<BlockId>& blocks) {
+void RowSetMetadata::AddOrphanedBlocks(const BlockIdContainer& blocks) {
tablet_metadata_->AddOrphanedBlocks(blocks);
}
@@ -203,7 +203,7 @@ Status RowSetMetadata::CommitUndoDeltaDataBlock(const BlockId& block_id) {
}
void RowSetMetadata::CommitUpdate(const RowSetMetadataUpdate& update,
- vector<BlockId>* removed) {
+ BlockIdContainer* removed) {
removed->clear();
{
std::lock_guard<LockType> l(lock_);
@@ -298,8 +298,8 @@ int64_t RowSetMetadata::live_row_count() const {
return live_row_count_;
}
-vector<BlockId> RowSetMetadata::GetAllBlocks() {
- vector<BlockId> blocks;
+BlockIdContainer RowSetMetadata::GetAllBlocks() {
+ BlockIdContainer blocks;
std::lock_guard<LockType> l(lock_);
if (!adhoc_index_block_.IsNull()) {
blocks.push_back(adhoc_index_block_);
diff --git a/src/kudu/tablet/rowset_metadata.h b/src/kudu/tablet/rowset_metadata.h
index 7f67b63..c4be77d 100644
--- a/src/kudu/tablet/rowset_metadata.h
+++ b/src/kudu/tablet/rowset_metadata.h
@@ -93,7 +93,7 @@ class RowSetMetadata {
Status Flush();
- void AddOrphanedBlocks(const std::vector<BlockId>& blocks);
+ void AddOrphanedBlocks(const BlockIdContainer& blocks);
const std::string ToString() const;
@@ -222,11 +222,11 @@ class RowSetMetadata {
// Returns the blocks removed from the rowset metadata during the update.
// These blocks must be added to the TabletMetadata's orphaned blocks list.
void CommitUpdate(const RowSetMetadataUpdate& update,
- std::vector<BlockId>* removed);
+ BlockIdContainer* removed);
void ToProtobuf(RowSetDataPB *pb);
- std::vector<BlockId> GetAllBlocks();
+ BlockIdContainer GetAllBlocks();
// Increase the row count.
// Note:
diff --git a/src/kudu/tablet/tablet_metadata-test.cc b/src/kudu/tablet/tablet_metadata-test.cc
index dc482eb..1e69ce7 100644
--- a/src/kudu/tablet/tablet_metadata-test.cc
+++ b/src/kudu/tablet/tablet_metadata-test.cc
@@ -18,29 +18,42 @@
#include "kudu/tablet/tablet_metadata.h"
#include <cstdint>
+#include <map>
#include <memory>
#include <ostream>
#include <string>
#include <boost/optional/optional.hpp>
+#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "kudu/common/common.pb.h"
#include "kudu/common/partial_row.h"
+#include "kudu/common/schema.h"
#include "kudu/common/wire_protocol-test-util.h"
+#include "kudu/fs/block_id.h"
#include "kudu/gutil/gscoped_ptr.h"
#include "kudu/gutil/port.h"
#include "kudu/gutil/ref_counted.h"
#include "kudu/tablet/local_tablet_writer.h"
#include "kudu/tablet/metadata.pb.h"
+#include "kudu/tablet/rowset_metadata.h"
#include "kudu/tablet/tablet-harness.h"
#include "kudu/tablet/tablet-test-util.h"
#include "kudu/tablet/tablet.h"
#include "kudu/util/pb_util.h"
#include "kudu/util/status.h"
+#include "kudu/util/stopwatch.h"
#include "kudu/util/test_macros.h"
+DEFINE_int64(test_row_set_count, 1000, "");
+DEFINE_int64(test_block_count_per_rs, 1000, "");
+
+using std::map;
+using std::shared_ptr;
+using std::unique_ptr;
+
namespace kudu {
namespace tablet {
@@ -183,5 +196,30 @@ TEST_F(TestTabletMetadata, TestOnDiskSize) {
ASSERT_GE(final_size, superblock_pb.ByteSize());
}
+TEST_F(TestTabletMetadata, BenchmarkCollectBlockIds) {
+ auto tablet_meta = harness_->tablet()->metadata();
+ RowSetMetadataVector rs_metas;
+ for (int i = 0; i < FLAGS_test_row_set_count; ++i) {
+ unique_ptr<RowSetMetadata> meta;
+ ASSERT_OK(RowSetMetadata::CreateNew(tablet_meta, i, &meta));
+
+ map<ColumnId, BlockId> block_by_column;
+ for (int j = 0; j < FLAGS_test_block_count_per_rs; ++j) {
+ block_by_column[ColumnId(j)] = BlockId(j);
+ }
+ meta->SetColumnDataBlocks(block_by_column);
+ rs_metas.emplace_back(shared_ptr<RowSetMetadata>(meta.release()));
+ }
+ tablet_meta->UpdateAndFlush(RowSetMetadataIds(), rs_metas, TabletMetadata::kNoMrsFlushed);
+
+ for (int i = 0; i < 10; ++i) {
+ BlockIdContainer block_ids;
+ LOG_TIMING(INFO, "collecting BlockIds") {
+ block_ids = tablet_meta->CollectBlockIds();
+ }
+ LOG(INFO) << "block_ids size: " << block_ids.size();
+ }
+}
+
} // namespace tablet
} // namespace kudu
diff --git a/src/kudu/tablet/tablet_metadata.cc b/src/kudu/tablet/tablet_metadata.cc
index 8189085..129da86 100644
--- a/src/kudu/tablet/tablet_metadata.cc
+++ b/src/kudu/tablet/tablet_metadata.cc
@@ -189,11 +189,11 @@ vector<BlockIdPB> TabletMetadata::CollectBlockIdPBs(const TabletSuperBlockPB& su
return block_ids;
}
-vector<BlockId> TabletMetadata::CollectBlockIds() {
- vector<BlockId> block_ids;
+BlockIdContainer TabletMetadata::CollectBlockIds() {
+ BlockIdContainer block_ids;
for (const auto& r : rowsets_) {
- vector<BlockId> rowset_block_ids = r->GetAllBlocks();
- block_ids.insert(block_ids.begin(),
+ BlockIdContainer rowset_block_ids = r->GetAllBlocks();
+ block_ids.insert(block_ids.end(),
rowset_block_ids.begin(),
rowset_block_ids.end());
}
@@ -348,7 +348,7 @@ Status TabletMetadata::UpdateOnDiskSize() {
}
Status TabletMetadata::LoadFromSuperBlock(const TabletSuperBlockPB& superblock) {
- vector<BlockId> orphaned_blocks;
+ BlockIdContainer orphaned_blocks;
VLOG(2) << "Loading TabletMetadata from SuperBlockPB:" << std::endl
<< SecureDebugString(superblock);
@@ -497,17 +497,17 @@ Status TabletMetadata::UpdateAndFlush(const RowSetMetadataIds& to_remove,
return Flush();
}
-void TabletMetadata::AddOrphanedBlocks(const vector<BlockId>& blocks) {
+void TabletMetadata::AddOrphanedBlocks(const BlockIdContainer& block_ids) {
std::lock_guard<LockType> l(data_lock_);
- AddOrphanedBlocksUnlocked(blocks);
+ AddOrphanedBlocksUnlocked(block_ids);
}
-void TabletMetadata::AddOrphanedBlocksUnlocked(const vector<BlockId>& blocks) {
+void TabletMetadata::AddOrphanedBlocksUnlocked(const BlockIdContainer& block_ids) {
DCHECK(data_lock_.is_locked());
- orphaned_blocks_.insert(blocks.begin(), blocks.end());
+ orphaned_blocks_.insert(block_ids.begin(), block_ids.end());
}
-void TabletMetadata::DeleteOrphanedBlocks(const vector<BlockId>& blocks) {
+void TabletMetadata::DeleteOrphanedBlocks(const BlockIdContainer& blocks) {
if (PREDICT_FALSE(!FLAGS_enable_tablet_orphaned_block_deletion)) {
LOG_WITH_PREFIX(WARNING) << "Not deleting " << blocks.size()
<< " block(s) from disk. Block deletion disabled via "
@@ -559,7 +559,7 @@ Status TabletMetadata::Flush() {
"tablet_id", tablet_id_);
MutexLock l_flush(flush_lock_);
- vector<BlockId> orphaned;
+ BlockIdContainer orphaned;
TabletSuperBlockPB pb;
{
std::lock_guard<LockType> l(data_lock_);
diff --git a/src/kudu/tablet/tablet_metadata.h b/src/kudu/tablet/tablet_metadata.h
index 060c071..fae2a3c 100644
--- a/src/kudu/tablet/tablet_metadata.h
+++ b/src/kudu/tablet/tablet_metadata.h
@@ -113,7 +113,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
static std::vector<BlockIdPB> CollectBlockIdPBs(
const TabletSuperBlockPB& superblock);
- std::vector<BlockId> CollectBlockIds();
+ BlockIdContainer CollectBlockIds();
const std::string& tablet_id() const {
DCHECK_NE(state_, kNotLoadedYet);
@@ -190,7 +190,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
//
// Blocks are removed from this set after they are successfully deleted
// in a call to DeleteOrphanedBlocks().
- void AddOrphanedBlocks(const std::vector<BlockId>& block_ids);
+ void AddOrphanedBlocks(const BlockIdContainer& block_ids);
// Mark the superblock to be in state 'delete_type', sync it to disk, and
// then delete all of the rowsets in this tablet.
@@ -289,6 +289,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
private:
friend class RefCountedThreadSafe<TabletMetadata>;
friend class MetadataTest;
+ friend class TestTabletMetadataBenchmark;
// Compile time assert that no one deletes TabletMetadata objects.
~TabletMetadata();
@@ -331,7 +332,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
const RowSetMetadataVector& rowsets) const;
// Requires 'data_lock_'.
- void AddOrphanedBlocksUnlocked(const std::vector<BlockId>& block_ids);
+ void AddOrphanedBlocksUnlocked(const BlockIdContainer& block_ids);
// Deletes the provided 'blocks' on disk.
//
@@ -339,7 +340,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
// 'orphaned_blocks_' set.
//
// Failures are logged, but are not fatal.
- void DeleteOrphanedBlocks(const std::vector<BlockId>& blocks);
+ void DeleteOrphanedBlocks(const BlockIdContainer& blocks);
enum State {
kNotLoadedYet,
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index d1f96e2..d5fb05c 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -1198,7 +1198,7 @@ TEST_F(ToolTest, TestFsCheck) {
// Create a local replica, flush some rows a few times, and collect all
// of the created block IDs.
- vector<BlockId> block_ids;
+ BlockIdContainer block_ids;
{
TabletHarness::Options opts(kTestDir);
opts.tablet_id = kTabletId;
@@ -1235,8 +1235,11 @@ TEST_F(ToolTest, TestFsCheck) {
ASSERT_OK(fs.Open(&report));
std::shared_ptr<BlockDeletionTransaction> deletion_transaction =
fs.block_manager()->NewDeletionTransaction();
- for (int i = 0; i < block_ids.size(); i += 2) {
- deletion_transaction->AddDeletedBlock(block_ids[i]);
+ int index = 0;
+ for (const auto& block_id : block_ids) {
+ if (index++ % 2 == 0) {
+ deletion_transaction->AddDeletedBlock(block_id);
+ }
}
deletion_transaction->CommitDeletedBlocks(&missing_ids);
}
diff --git a/src/kudu/tools/tool_action_fs.cc b/src/kudu/tools/tool_action_fs.cc
index e650c7f..c72247f 100644
--- a/src/kudu/tools/tool_action_fs.cc
+++ b/src/kudu/tools/tool_action_fs.cc
@@ -140,14 +140,14 @@ Status Check(const RunnerContext& /*context*/) {
}
// Get the "live" block IDs (i.e. those referenced by a tablet).
- vector<BlockId> live_block_ids;
+ BlockIdContainer live_block_ids;
unordered_map<BlockId, string, BlockIdHash, BlockIdEqual> live_block_id_to_tablet;
vector<string> tablet_ids;
RETURN_NOT_OK(fs_manager.ListTabletIds(&tablet_ids));
for (const auto& t : tablet_ids) {
scoped_refptr<TabletMetadata> meta;
RETURN_NOT_OK(TabletMetadata::Load(&fs_manager, t, &meta));
- vector<BlockId> tablet_live_block_ids = meta->CollectBlockIds();
+ BlockIdContainer tablet_live_block_ids = meta->CollectBlockIds();
live_block_ids.insert(live_block_ids.end(),
tablet_live_block_ids.begin(),
tablet_live_block_ids.end());