You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by ya...@apache.org on 2021/03/17 14:31:22 UTC

[incubator-doris] branch master updated: [Bug] Fix bug that the stale rowset file will not be deleted (#5527)

This is an automated email from the ASF dual-hosted git repository.

yangzhg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 087fd81  [Bug] Fix bug that the stale rowset file will not be deleted (#5527)
087fd81 is described below

commit 087fd8159beb9a3fc7b1a9004bde64eb54ea6832
Author: Mingyu Chen <mo...@gmail.com>
AuthorDate: Wed Mar 17 22:31:05 2021 +0800

    [Bug] Fix bug that the stale rowset file will not be deleted (#5527)
    
    1. If cumulative compaction compact only one rowset, the old rowset will not be put into `stale_rowset_meta_map`
    
    2. Show rowset id in `/api/compaction/show`
    
    Co-authored-by: xxiao2018 <be...@sina.com>
---
 be/src/olap/schema_change.cpp                      |  3 +-
 be/src/olap/tablet.cpp                             | 91 ++++++++++++++++++----
 be/src/olap/tablet.h                               |  4 +-
 be/src/olap/tablet_meta.cpp                        |  9 ++-
 be/src/olap/tablet_meta.h                          |  5 +-
 be/src/olap/version_graph.cpp                      |  6 +-
 be/src/olap/version_graph.h                        |  2 +-
 .../apache/doris/common/proc/BackendProcNode.java  |  2 +-
 8 files changed, 93 insertions(+), 29 deletions(-)

diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 20e459b..8d9c616 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -1465,7 +1465,8 @@ OLAPStatus SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletRe
                 rowsets_to_delete.push_back(rowset);
             }
         }
-        new_tablet->modify_rowsets(std::vector<RowsetSharedPtr>(), rowsets_to_delete);
+        std::vector<RowsetSharedPtr> empty_vec;
+        new_tablet->modify_rowsets(empty_vec, rowsets_to_delete);
         // inherit cumulative_layer_point from base_tablet
         // check if new_tablet.ce_point > base_tablet.ce_point?
         new_tablet->set_cumulative_layer_point(-1);
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 27b110e..1a12faa 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -34,6 +34,7 @@
 #include "olap/olap_define.h"
 #include "olap/reader.h"
 #include "olap/row_cursor.h"
+#include "olap/rowset/rowset.h"
 #include "olap/rowset/rowset_factory.h"
 #include "olap/rowset/rowset_meta_manager.h"
 #include "olap/storage_engine.h"
@@ -225,7 +226,8 @@ OLAPStatus Tablet::add_rowset(RowsetSharedPtr rowset, bool need_persist) {
             rowsets_to_delete.push_back(it.second);
         }
     }
-    modify_rowsets(std::vector<RowsetSharedPtr>(), rowsets_to_delete);
+    std::vector<RowsetSharedPtr> empty_vec;
+    modify_rowsets(empty_vec, rowsets_to_delete);
 
     if (need_persist) {
         RowsetMetaPB rowset_meta_pb;
@@ -240,20 +242,43 @@ OLAPStatus Tablet::add_rowset(RowsetSharedPtr rowset, bool need_persist) {
     return OLAP_SUCCESS;
 }
 
-void Tablet::modify_rowsets(const std::vector<RowsetSharedPtr>& to_add,
-                            const std::vector<RowsetSharedPtr>& to_delete) {
+void Tablet::modify_rowsets(std::vector<RowsetSharedPtr>& to_add,
+                            std::vector<RowsetSharedPtr>& to_delete) {
     // the compaction process allow to compact the single version, eg: version[4-4].
     // this kind of "single version compaction" has same "input version" and "output version".
     // which means "to_add->version()" equals to "to_delete->version()".
     // So we should delete the "to_delete" before adding the "to_add",
     // otherwise, the "to_add" will be deleted from _rs_version_map, eventually.
+    //
+    // And if the version of "to_add" and "to_delete" are exactly same. eg:
+    // to_add:      [7-7]
+    // to_delete:   [7-7]
+    // In this case, we no longer need to add the rowset in "to_delete" to
+    // _stale_rs_version_map, but can delete it directly.
+
+    bool same_version = true;
+    std::sort(to_add.begin(), to_add.end(), Rowset::comparator);
+    std::sort(to_delete.begin(), to_delete.end(), Rowset::comparator);
+    if (to_add.size() == to_delete.size()) {
+        for (int i = 0; i < to_add.size(); ++i) {
+            if (to_add[i]->version() != to_delete[i]->version()) {
+                same_version = false;
+                break;
+            } 
+        }
+    } else {
+        same_version = false;
+    }
+
     std::vector<RowsetMetaSharedPtr> rs_metas_to_delete;
     for (auto& rs : to_delete) {
         rs_metas_to_delete.push_back(rs->rowset_meta());
         _rs_version_map.erase(rs->version());
 
-        // put compaction rowsets in _stale_rs_version_map.
-        _stale_rs_version_map[rs->version()] = rs;
+        if (!same_version) {
+            // put compaction rowsets in _stale_rs_version_map.
+            _stale_rs_version_map[rs->version()] = rs;
+        }
     }
 
     std::vector<RowsetMetaSharedPtr> rs_metas_to_add;
@@ -261,14 +286,26 @@ void Tablet::modify_rowsets(const std::vector<RowsetSharedPtr>& to_add,
         rs_metas_to_add.push_back(rs->rowset_meta());
         _rs_version_map[rs->version()] = rs;
 
-        _timestamped_version_tracker.add_version(rs->version());
+        if (!same_version) {
+            // If version are same, then _timestamped_version_tracker
+            // already has this version, no need to add again.
+            _timestamped_version_tracker.add_version(rs->version());
+        }
         ++_newly_created_rowset_num;
     }
 
-    _tablet_meta->modify_rs_metas(rs_metas_to_add, rs_metas_to_delete);
+    _tablet_meta->modify_rs_metas(rs_metas_to_add, rs_metas_to_delete, same_version);
 
-    // add rs_metas_to_delete to tracker
-    _timestamped_version_tracker.add_stale_path_version(rs_metas_to_delete);
+    if (!same_version) {
+        // add rs_metas_to_delete to tracker
+        _timestamped_version_tracker.add_stale_path_version(rs_metas_to_delete);
+    } else {
+        // delete rowset in "to_delete" directly
+        for (auto& rs : to_delete) {
+            LOG(INFO) << "add unused rowset " << rs->rowset_id() << " because of same version";
+            StorageEngine::instance()->add_unused_rowset(rs); 
+        }
+    }
 }
 
 // snapshot manager may call this api to check if version exists, so that
@@ -1002,6 +1039,7 @@ void Tablet::get_compaction_status(std::string* json_result) {
     path_arr.SetArray();
 
     std::vector<RowsetSharedPtr> rowsets;
+    std::vector<RowsetSharedPtr> stale_rowsets;
     std::vector<bool> delete_flags;
     {
         ReadLock rdlock(&_meta_lock);
@@ -1011,6 +1049,12 @@ void Tablet::get_compaction_status(std::string* json_result) {
         }
         std::sort(rowsets.begin(), rowsets.end(), Rowset::comparator);
 
+        stale_rowsets.reserve(_stale_rs_version_map.size());
+        for (auto& it : _stale_rs_version_map) {
+            stale_rowsets.push_back(it.second);
+        }
+        std::sort(stale_rowsets.begin(), stale_rowsets.end(), Rowset::comparator);
+
         delete_flags.reserve(rowsets.size());
         for (auto& rs : rowsets) {
             delete_flags.push_back(version_for_delete_predicate(rs->version()));
@@ -1050,14 +1094,31 @@ void Tablet::get_compaction_status(std::string* json_result) {
         std::string disk_size =
                 PrettyPrinter::print(rowsets[i]->rowset_meta()->total_disk_size(), TUnit::BYTES);
         std::string version_str = strings::Substitute(
-                "[$0-$1] $2 $3 $4 $5", ver.first, ver.second, rowsets[i]->num_segments(),
+                "[$0-$1] $2 $3 $4 $5 $6", ver.first, ver.second, rowsets[i]->num_segments(),
                 (delete_flags[i] ? "DELETE" : "DATA"),
-                SegmentsOverlapPB_Name(rowsets[i]->rowset_meta()->segments_overlap()), disk_size);
+                SegmentsOverlapPB_Name(rowsets[i]->rowset_meta()->segments_overlap()),
+                rowsets[i]->rowset_id().to_string(), disk_size);
         value.SetString(version_str.c_str(), version_str.length(), versions_arr.GetAllocator());
         versions_arr.PushBack(value, versions_arr.GetAllocator());
     }
     root.AddMember("rowsets", versions_arr, root.GetAllocator());
 
+    // print all stale rowsets' version as an array
+    rapidjson::Document stale_versions_arr;
+    stale_versions_arr.SetArray();
+    for (int i = 0; i < stale_rowsets.size(); ++i) {
+        const Version& ver = stale_rowsets[i]->version();
+        rapidjson::Value value;
+        std::string disk_size =
+                PrettyPrinter::print(stale_rowsets[i]->rowset_meta()->total_disk_size(), TUnit::BYTES);
+        std::string version_str = strings::Substitute(
+                "[$0-$1] $2 $3 $4", ver.first, ver.second, stale_rowsets[i]->num_segments(),
+                stale_rowsets[i]->rowset_id().to_string(), disk_size);
+        value.SetString(version_str.c_str(), version_str.length(), stale_versions_arr.GetAllocator());
+        stale_versions_arr.PushBack(value, stale_versions_arr.GetAllocator());
+    }
+    root.AddMember("stale_rowsets", stale_versions_arr, root.GetAllocator());
+
     // add stale version rowsets
     root.AddMember("stale version path", path_arr, root.GetAllocator());
 
@@ -1201,12 +1262,8 @@ void Tablet::build_tablet_report_info(TTabletInfo* tablet_info) {
 // there are some rowset meta in local meta store and in in-memory tablet meta
 // but not in tablet meta in local meta store
 void Tablet::generate_tablet_meta_copy(TabletMetaSharedPtr new_tablet_meta) const {
-    TabletMetaPB tablet_meta_pb;
-    {
-        ReadLock rdlock(&_meta_lock);
-        _tablet_meta->to_meta_pb(&tablet_meta_pb);
-    }
-    new_tablet_meta->init_from_pb(tablet_meta_pb);
+    ReadLock rdlock(&_meta_lock);
+    generate_tablet_meta_copy_unlocked(new_tablet_meta);
 }
 
 // this is a unlocked version of generate_tablet_meta_copy()
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index a903715..15e6858 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -95,8 +95,8 @@ public:
 
     // operation in rowsets
     OLAPStatus add_rowset(RowsetSharedPtr rowset, bool need_persist = true);
-    void modify_rowsets(const vector<RowsetSharedPtr>& to_add,
-                        const vector<RowsetSharedPtr>& to_delete);
+    void modify_rowsets(vector<RowsetSharedPtr>& to_add,
+                        vector<RowsetSharedPtr>& to_delete);
 
     // _rs_version_map and _stale_rs_version_map should be protected by _meta_lock
     // The caller must call hold _meta_lock when call this two function.
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index a4940ed..5bfbbc9 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -499,7 +499,8 @@ void TabletMeta::delete_rs_meta_by_version(const Version& version,
 }
 
 void TabletMeta::modify_rs_metas(const std::vector<RowsetMetaSharedPtr>& to_add,
-                                 const std::vector<RowsetMetaSharedPtr>& to_delete) {
+                                 const std::vector<RowsetMetaSharedPtr>& to_delete,
+                                 bool same_version) {
     // Remove to_delete rowsets from _rs_metas
     for (auto rs_to_del : to_delete) {
         auto it = _rs_metas.begin();
@@ -516,8 +517,10 @@ void TabletMeta::modify_rs_metas(const std::vector<RowsetMetaSharedPtr>& to_add,
             }
         }
     }
-    // put to_delete rowsets in _stale_rs_metas.
-    _stale_rs_metas.insert(_stale_rs_metas.end(), to_delete.begin(), to_delete.end());
+    if (!same_version) {
+        // put to_delete rowsets in _stale_rs_metas.
+        _stale_rs_metas.insert(_stale_rs_metas.end(), to_delete.begin(), to_delete.end());
+    }
     // put to_add rowsets in _rs_metas.
     _rs_metas.insert(_rs_metas.end(), to_add.begin(), to_add.end());
 }
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 1336838..ae9a5f4 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -167,8 +167,11 @@ public:
     OLAPStatus add_rs_meta(const RowsetMetaSharedPtr& rs_meta);
     void delete_rs_meta_by_version(const Version& version,
                                    std::vector<RowsetMetaSharedPtr>* deleted_rs_metas);
+    // If same_version is true, the rowset in "to_delete" will not be added
+    // to _stale_rs_meta, but to be deleted from rs_meta directly.
     void modify_rs_metas(const std::vector<RowsetMetaSharedPtr>& to_add,
-                         const std::vector<RowsetMetaSharedPtr>& to_delete);
+                         const std::vector<RowsetMetaSharedPtr>& to_delete,
+                         bool same_version = false);
     void revise_rs_metas(std::vector<RowsetMetaSharedPtr>&& rs_metas);
 
     inline const std::vector<RowsetMetaSharedPtr>& all_stale_rs_metas() const;
diff --git a/be/src/olap/version_graph.cpp b/be/src/olap/version_graph.cpp
index 4873c98..79380f6 100644
--- a/be/src/olap/version_graph.cpp
+++ b/be/src/olap/version_graph.cpp
@@ -284,7 +284,7 @@ void TimestampedVersionTracker::recover_versioned_tracker(
         }
         _path_map_iter++;
     }
-    LOG(INFO) << "recover_versioned_tracker current map info " << _get_current_path_map_str();
+    LOG(INFO) << "recover_versioned_tracker current map info " << get_current_path_map_str();
 }
 
 void TimestampedVersionTracker::add_version(const Version& version) {
@@ -354,7 +354,7 @@ PathVersionListSharedPtr TimestampedVersionTracker::fetch_and_delete_path_by_id(
         return nullptr;
     }
 
-    VLOG_NOTICE << _get_current_path_map_str();
+    VLOG_NOTICE << get_current_path_map_str();
     PathVersionListSharedPtr ptr = fetch_path_version_by_id(path_id);
 
     _stale_version_path_map.erase(path_id);
@@ -365,7 +365,7 @@ PathVersionListSharedPtr TimestampedVersionTracker::fetch_and_delete_path_by_id(
     return ptr;
 }
 
-std::string TimestampedVersionTracker::_get_current_path_map_str() {
+std::string TimestampedVersionTracker::get_current_path_map_str() {
     std::stringstream tracker_info;
     tracker_info << "current expired next_path_id " << _next_path_id << std::endl;
 
diff --git a/be/src/olap/version_graph.h b/be/src/olap/version_graph.h
index 330b743..d5ef70e 100644
--- a/be/src/olap/version_graph.h
+++ b/be/src/olap/version_graph.h
@@ -171,7 +171,7 @@ public:
     PathVersionListSharedPtr fetch_and_delete_path_by_id(int64_t path_id);
 
     /// Print all expired version path in a tablet.
-    std::string _get_current_path_map_str();
+    std::string get_current_path_map_str();
 
     /// Get json document of _stale_version_path_map. Fill the path_id and version_path
     /// list in the document. The parameter path arr is used as return variable.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/BackendProcNode.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/BackendProcNode.java
index 5ee2472..fbb2d8c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/BackendProcNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/BackendProcNode.java
@@ -66,7 +66,7 @@ public class BackendProcNode implements ProcNodeInterface {
             long totalB = entry.getValue().getTotalCapacityB();
             Pair<Double, String> totalUnitPair = DebugUtil.getByteUint(totalB);
             // other
-            long otherB = totalB - availB;
+            long otherB = totalB - availB - dataUsedB;
             Pair<Double, String> otherUnitPair = DebugUtil.getByteUint(otherB);
 
             info.add(DebugUtil.DECIMAL_FORMAT_SCALE_3.format(otherUnitPair.first) + " " + otherUnitPair.second);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org