You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ya...@apache.org on 2021/03/17 14:31:22 UTC
[incubator-doris] branch master updated: [Bug] Fix bug that the
stale rowset file will not be deleted (#5527)
This is an automated email from the ASF dual-hosted git repository.
yangzhg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 087fd81 [Bug] Fix bug that the stale rowset file will not be deleted (#5527)
087fd81 is described below
commit 087fd8159beb9a3fc7b1a9004bde64eb54ea6832
Author: Mingyu Chen <mo...@gmail.com>
AuthorDate: Wed Mar 17 22:31:05 2021 +0800
[Bug] Fix bug that the stale rowset file will not be deleted (#5527)
1. If cumulative compaction compact only one rowset, the old rowset will not be put into `stale_rowset_meta_map`
2. Show rowset id in `/api/compaction/show`
Co-authored-by: xxiao2018 <be...@sina.com>
---
be/src/olap/schema_change.cpp | 3 +-
be/src/olap/tablet.cpp | 91 ++++++++++++++++++----
be/src/olap/tablet.h | 4 +-
be/src/olap/tablet_meta.cpp | 9 ++-
be/src/olap/tablet_meta.h | 5 +-
be/src/olap/version_graph.cpp | 6 +-
be/src/olap/version_graph.h | 2 +-
.../apache/doris/common/proc/BackendProcNode.java | 2 +-
8 files changed, 93 insertions(+), 29 deletions(-)
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 20e459b..8d9c616 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -1465,7 +1465,8 @@ OLAPStatus SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletRe
rowsets_to_delete.push_back(rowset);
}
}
- new_tablet->modify_rowsets(std::vector<RowsetSharedPtr>(), rowsets_to_delete);
+ std::vector<RowsetSharedPtr> empty_vec;
+ new_tablet->modify_rowsets(empty_vec, rowsets_to_delete);
// inherit cumulative_layer_point from base_tablet
// check if new_tablet.ce_point > base_tablet.ce_point?
new_tablet->set_cumulative_layer_point(-1);
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 27b110e..1a12faa 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -34,6 +34,7 @@
#include "olap/olap_define.h"
#include "olap/reader.h"
#include "olap/row_cursor.h"
+#include "olap/rowset/rowset.h"
#include "olap/rowset/rowset_factory.h"
#include "olap/rowset/rowset_meta_manager.h"
#include "olap/storage_engine.h"
@@ -225,7 +226,8 @@ OLAPStatus Tablet::add_rowset(RowsetSharedPtr rowset, bool need_persist) {
rowsets_to_delete.push_back(it.second);
}
}
- modify_rowsets(std::vector<RowsetSharedPtr>(), rowsets_to_delete);
+ std::vector<RowsetSharedPtr> empty_vec;
+ modify_rowsets(empty_vec, rowsets_to_delete);
if (need_persist) {
RowsetMetaPB rowset_meta_pb;
@@ -240,20 +242,43 @@ OLAPStatus Tablet::add_rowset(RowsetSharedPtr rowset, bool need_persist) {
return OLAP_SUCCESS;
}
-void Tablet::modify_rowsets(const std::vector<RowsetSharedPtr>& to_add,
- const std::vector<RowsetSharedPtr>& to_delete) {
+void Tablet::modify_rowsets(std::vector<RowsetSharedPtr>& to_add,
+ std::vector<RowsetSharedPtr>& to_delete) {
// the compaction process allow to compact the single version, eg: version[4-4].
// this kind of "single version compaction" has same "input version" and "output version".
// which means "to_add->version()" equals to "to_delete->version()".
// So we should delete the "to_delete" before adding the "to_add",
// otherwise, the "to_add" will be deleted from _rs_version_map, eventually.
+ //
+ // And if the version of "to_add" and "to_delete" are exactly same. eg:
+ // to_add: [7-7]
+ // to_delete: [7-7]
+ // In this case, we no longer need to add the rowset in "to_delete" to
+ // _stale_rs_version_map, but can delete it directly.
+
+ bool same_version = true;
+ std::sort(to_add.begin(), to_add.end(), Rowset::comparator);
+ std::sort(to_delete.begin(), to_delete.end(), Rowset::comparator);
+ if (to_add.size() == to_delete.size()) {
+ for (int i = 0; i < to_add.size(); ++i) {
+ if (to_add[i]->version() != to_delete[i]->version()) {
+ same_version = false;
+ break;
+ }
+ }
+ } else {
+ same_version = false;
+ }
+
std::vector<RowsetMetaSharedPtr> rs_metas_to_delete;
for (auto& rs : to_delete) {
rs_metas_to_delete.push_back(rs->rowset_meta());
_rs_version_map.erase(rs->version());
- // put compaction rowsets in _stale_rs_version_map.
- _stale_rs_version_map[rs->version()] = rs;
+ if (!same_version) {
+ // put compaction rowsets in _stale_rs_version_map.
+ _stale_rs_version_map[rs->version()] = rs;
+ }
}
std::vector<RowsetMetaSharedPtr> rs_metas_to_add;
@@ -261,14 +286,26 @@ void Tablet::modify_rowsets(const std::vector<RowsetSharedPtr>& to_add,
rs_metas_to_add.push_back(rs->rowset_meta());
_rs_version_map[rs->version()] = rs;
- _timestamped_version_tracker.add_version(rs->version());
+ if (!same_version) {
+ // If version are same, then _timestamped_version_tracker
+ // already has this version, no need to add again.
+ _timestamped_version_tracker.add_version(rs->version());
+ }
++_newly_created_rowset_num;
}
- _tablet_meta->modify_rs_metas(rs_metas_to_add, rs_metas_to_delete);
+ _tablet_meta->modify_rs_metas(rs_metas_to_add, rs_metas_to_delete, same_version);
- // add rs_metas_to_delete to tracker
- _timestamped_version_tracker.add_stale_path_version(rs_metas_to_delete);
+ if (!same_version) {
+ // add rs_metas_to_delete to tracker
+ _timestamped_version_tracker.add_stale_path_version(rs_metas_to_delete);
+ } else {
+ // delete rowset in "to_delete" directly
+ for (auto& rs : to_delete) {
+ LOG(INFO) << "add unused rowset " << rs->rowset_id() << " because of same version";
+ StorageEngine::instance()->add_unused_rowset(rs);
+ }
+ }
}
// snapshot manager may call this api to check if version exists, so that
@@ -1002,6 +1039,7 @@ void Tablet::get_compaction_status(std::string* json_result) {
path_arr.SetArray();
std::vector<RowsetSharedPtr> rowsets;
+ std::vector<RowsetSharedPtr> stale_rowsets;
std::vector<bool> delete_flags;
{
ReadLock rdlock(&_meta_lock);
@@ -1011,6 +1049,12 @@ void Tablet::get_compaction_status(std::string* json_result) {
}
std::sort(rowsets.begin(), rowsets.end(), Rowset::comparator);
+ stale_rowsets.reserve(_stale_rs_version_map.size());
+ for (auto& it : _stale_rs_version_map) {
+ stale_rowsets.push_back(it.second);
+ }
+ std::sort(stale_rowsets.begin(), stale_rowsets.end(), Rowset::comparator);
+
delete_flags.reserve(rowsets.size());
for (auto& rs : rowsets) {
delete_flags.push_back(version_for_delete_predicate(rs->version()));
@@ -1050,14 +1094,31 @@ void Tablet::get_compaction_status(std::string* json_result) {
std::string disk_size =
PrettyPrinter::print(rowsets[i]->rowset_meta()->total_disk_size(), TUnit::BYTES);
std::string version_str = strings::Substitute(
- "[$0-$1] $2 $3 $4 $5", ver.first, ver.second, rowsets[i]->num_segments(),
+ "[$0-$1] $2 $3 $4 $5 $6", ver.first, ver.second, rowsets[i]->num_segments(),
(delete_flags[i] ? "DELETE" : "DATA"),
- SegmentsOverlapPB_Name(rowsets[i]->rowset_meta()->segments_overlap()), disk_size);
+ SegmentsOverlapPB_Name(rowsets[i]->rowset_meta()->segments_overlap()),
+ rowsets[i]->rowset_id().to_string(), disk_size);
value.SetString(version_str.c_str(), version_str.length(), versions_arr.GetAllocator());
versions_arr.PushBack(value, versions_arr.GetAllocator());
}
root.AddMember("rowsets", versions_arr, root.GetAllocator());
+ // print all stale rowsets' version as an array
+ rapidjson::Document stale_versions_arr;
+ stale_versions_arr.SetArray();
+ for (int i = 0; i < stale_rowsets.size(); ++i) {
+ const Version& ver = stale_rowsets[i]->version();
+ rapidjson::Value value;
+ std::string disk_size =
+ PrettyPrinter::print(stale_rowsets[i]->rowset_meta()->total_disk_size(), TUnit::BYTES);
+ std::string version_str = strings::Substitute(
+ "[$0-$1] $2 $3 $4", ver.first, ver.second, stale_rowsets[i]->num_segments(),
+ stale_rowsets[i]->rowset_id().to_string(), disk_size);
+ value.SetString(version_str.c_str(), version_str.length(), stale_versions_arr.GetAllocator());
+ stale_versions_arr.PushBack(value, stale_versions_arr.GetAllocator());
+ }
+ root.AddMember("stale_rowsets", stale_versions_arr, root.GetAllocator());
+
// add stale version rowsets
root.AddMember("stale version path", path_arr, root.GetAllocator());
@@ -1201,12 +1262,8 @@ void Tablet::build_tablet_report_info(TTabletInfo* tablet_info) {
// there are some rowset meta in local meta store and in in-memory tablet meta
// but not in tablet meta in local meta store
void Tablet::generate_tablet_meta_copy(TabletMetaSharedPtr new_tablet_meta) const {
- TabletMetaPB tablet_meta_pb;
- {
- ReadLock rdlock(&_meta_lock);
- _tablet_meta->to_meta_pb(&tablet_meta_pb);
- }
- new_tablet_meta->init_from_pb(tablet_meta_pb);
+ ReadLock rdlock(&_meta_lock);
+ generate_tablet_meta_copy_unlocked(new_tablet_meta);
}
// this is a unlocked version of generate_tablet_meta_copy()
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index a903715..15e6858 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -95,8 +95,8 @@ public:
// operation in rowsets
OLAPStatus add_rowset(RowsetSharedPtr rowset, bool need_persist = true);
- void modify_rowsets(const vector<RowsetSharedPtr>& to_add,
- const vector<RowsetSharedPtr>& to_delete);
+ void modify_rowsets(vector<RowsetSharedPtr>& to_add,
+ vector<RowsetSharedPtr>& to_delete);
// _rs_version_map and _stale_rs_version_map should be protected by _meta_lock
// The caller must call hold _meta_lock when call this two function.
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index a4940ed..5bfbbc9 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -499,7 +499,8 @@ void TabletMeta::delete_rs_meta_by_version(const Version& version,
}
void TabletMeta::modify_rs_metas(const std::vector<RowsetMetaSharedPtr>& to_add,
- const std::vector<RowsetMetaSharedPtr>& to_delete) {
+ const std::vector<RowsetMetaSharedPtr>& to_delete,
+ bool same_version) {
// Remove to_delete rowsets from _rs_metas
for (auto rs_to_del : to_delete) {
auto it = _rs_metas.begin();
@@ -516,8 +517,10 @@ void TabletMeta::modify_rs_metas(const std::vector<RowsetMetaSharedPtr>& to_add,
}
}
}
- // put to_delete rowsets in _stale_rs_metas.
- _stale_rs_metas.insert(_stale_rs_metas.end(), to_delete.begin(), to_delete.end());
+ if (!same_version) {
+ // put to_delete rowsets in _stale_rs_metas.
+ _stale_rs_metas.insert(_stale_rs_metas.end(), to_delete.begin(), to_delete.end());
+ }
// put to_add rowsets in _rs_metas.
_rs_metas.insert(_rs_metas.end(), to_add.begin(), to_add.end());
}
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 1336838..ae9a5f4 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -167,8 +167,11 @@ public:
OLAPStatus add_rs_meta(const RowsetMetaSharedPtr& rs_meta);
void delete_rs_meta_by_version(const Version& version,
std::vector<RowsetMetaSharedPtr>* deleted_rs_metas);
+ // If same_version is true, the rowset in "to_delete" will not be added
+ // to _stale_rs_meta, but to be deleted from rs_meta directly.
void modify_rs_metas(const std::vector<RowsetMetaSharedPtr>& to_add,
- const std::vector<RowsetMetaSharedPtr>& to_delete);
+ const std::vector<RowsetMetaSharedPtr>& to_delete,
+ bool same_version = false);
void revise_rs_metas(std::vector<RowsetMetaSharedPtr>&& rs_metas);
inline const std::vector<RowsetMetaSharedPtr>& all_stale_rs_metas() const;
diff --git a/be/src/olap/version_graph.cpp b/be/src/olap/version_graph.cpp
index 4873c98..79380f6 100644
--- a/be/src/olap/version_graph.cpp
+++ b/be/src/olap/version_graph.cpp
@@ -284,7 +284,7 @@ void TimestampedVersionTracker::recover_versioned_tracker(
}
_path_map_iter++;
}
- LOG(INFO) << "recover_versioned_tracker current map info " << _get_current_path_map_str();
+ LOG(INFO) << "recover_versioned_tracker current map info " << get_current_path_map_str();
}
void TimestampedVersionTracker::add_version(const Version& version) {
@@ -354,7 +354,7 @@ PathVersionListSharedPtr TimestampedVersionTracker::fetch_and_delete_path_by_id(
return nullptr;
}
- VLOG_NOTICE << _get_current_path_map_str();
+ VLOG_NOTICE << get_current_path_map_str();
PathVersionListSharedPtr ptr = fetch_path_version_by_id(path_id);
_stale_version_path_map.erase(path_id);
@@ -365,7 +365,7 @@ PathVersionListSharedPtr TimestampedVersionTracker::fetch_and_delete_path_by_id(
return ptr;
}
-std::string TimestampedVersionTracker::_get_current_path_map_str() {
+std::string TimestampedVersionTracker::get_current_path_map_str() {
std::stringstream tracker_info;
tracker_info << "current expired next_path_id " << _next_path_id << std::endl;
diff --git a/be/src/olap/version_graph.h b/be/src/olap/version_graph.h
index 330b743..d5ef70e 100644
--- a/be/src/olap/version_graph.h
+++ b/be/src/olap/version_graph.h
@@ -171,7 +171,7 @@ public:
PathVersionListSharedPtr fetch_and_delete_path_by_id(int64_t path_id);
/// Print all expired version path in a tablet.
- std::string _get_current_path_map_str();
+ std::string get_current_path_map_str();
/// Get json document of _stale_version_path_map. Fill the path_id and version_path
/// list in the document. The parameter path arr is used as return variable.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/BackendProcNode.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/BackendProcNode.java
index 5ee2472..fbb2d8c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/BackendProcNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/BackendProcNode.java
@@ -66,7 +66,7 @@ public class BackendProcNode implements ProcNodeInterface {
long totalB = entry.getValue().getTotalCapacityB();
Pair<Double, String> totalUnitPair = DebugUtil.getByteUint(totalB);
// other
- long otherB = totalB - availB;
+ long otherB = totalB - availB - dataUsedB;
Pair<Double, String> otherUnitPair = DebugUtil.getByteUint(otherB);
info.add(DebugUtil.DECIMAL_FORMAT_SCALE_3.format(otherUnitPair.first) + " " + otherUnitPair.second);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org