You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2020/02/06 08:40:48 UTC

[incubator-doris] branch master updated: [Compaction] Avoid unnecessary compaction (#2839)

This is an automated email from the ASF dual-hosted git repository.

zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f77cfcd  [Compaction] Avoid unnecessary compaction (#2839)
f77cfcd is described below

commit f77cfcdb617c7ec470bad848e2e999f8e6fbcb8d
Author: Mingyu Chen <mo...@gmail.com>
AuthorDate: Thu Feb 6 16:40:38 2020 +0800

    [Compaction] Avoid unnecessary compaction (#2839)
    
    It is not necessary to perform compaction in the following cases
    
    1. A tablet has only 2 rowsets, the versions are [0-1] and [2-x]. In this case,
    there is no need to perform base compaction because the [0-1] version is an empty version.
    
        Some tables will be partitioned by day, and then each partition will only load one batch of data
     each day, so a large number of tablets with rowsets [0-1][2-2] will appear. And these tablets
     do not need to be base compaction.
    
    2. The initial value of the `last successful execution time of compaction` is 0,
    which causes the first time to determine the time interval from the
     last successful execution time of compaction, which always meets the
    conditions to trigger cumulative compaction.
---
 be/src/olap/base_compaction.cpp       | 11 ++++++++++
 be/src/olap/cumulative_compaction.cpp | 41 +++++++++++++++++++++++------------
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp
index be36fb3..322eeb6 100644
--- a/be/src/olap/base_compaction.cpp
+++ b/be/src/olap/base_compaction.cpp
@@ -67,6 +67,12 @@ OLAPStatus BaseCompaction::pick_rowsets_to_compact() {
     RETURN_NOT_OK(check_version_continuity(_input_rowsets));
     RETURN_NOT_OK(_check_rowset_overlapping(_input_rowsets));
 
+    if (_input_rowsets.size() == 2 && _input_rowsets[0]->end_version() == 1) {
+        // the tablet is with rowset: [0-1], [2-y]
+        // and [0-1] has no data. in this situation, no need to do base compaction.
+        return OLAP_ERR_BE_NO_SUITABLE_VERSION;
+    }
+
     // 1. cumulative rowset must reach base_compaction_num_cumulative_deltas threshold
     if (_input_rowsets.size() > config::base_compaction_num_cumulative_deltas) {
         LOG(INFO) << "satisfy the base compaction policy. tablet="<< _tablet->full_name()
@@ -87,6 +93,11 @@ OLAPStatus BaseCompaction::pick_rowsets_to_compact() {
     }
 
     double base_cumulative_delta_ratio = config::base_cumulative_delta_ratio;
+    if (base_size == 0) {
+        // base_size == 0 means this may be a base version [0-1], which has no data.
+        // set to 1 to void devide by zero
+        base_size = 1;
+    }
     double cumulative_base_ratio = static_cast<double>(cumulative_total_size) / base_size;
 
     if (cumulative_base_ratio > base_cumulative_delta_ratio) {
diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp
index b884736..87c6628 100755
--- a/be/src/olap/cumulative_compaction.cpp
+++ b/be/src/olap/cumulative_compaction.cpp
@@ -130,24 +130,37 @@ OLAPStatus CumulativeCompaction::pick_rowsets_to_compact() {
         // the cumulative point after waiting for a long time, to ensure that the base compaction can continue.
 
         // check both last success time of base and cumulative compaction
-        int64_t interval_threshold = config::base_compaction_interval_seconds_since_last_operation * 1000;
         int64_t now = UnixMillis();
-        int64_t cumu_interval = now - _tablet->last_cumu_compaction_success_time();
-        int64_t base_interval = now - _tablet->last_base_compaction_success_time();
-        if (cumu_interval > interval_threshold && base_interval > interval_threshold) {
-            // before increasing cumulative point, we should make sure all rowsets are non-overlapping.
-            // if at least one rowset is overlapping, we should compact them first.
-            CHECK(candidate_rowsets.size() == transient_rowsets.size())
-                << "tablet: " << _tablet->full_name() << ", "<<  candidate_rowsets.size() << " vs. " << transient_rowsets.size();
-            for (auto& rs : candidate_rowsets) {
-                if (rs->rowset_meta()->is_segments_overlapping()) {
-                    _input_rowsets = candidate_rowsets;
-                    return OLAP_SUCCESS;
+        int64_t last_cumu = _tablet->last_cumu_compaction_success_time();
+        int64_t last_base = _tablet->last_base_compaction_success_time();
+        if (last_cumu != 0 || last_base != 0) {
+            int64_t interval_threshold = config::base_compaction_interval_seconds_since_last_operation * 1000;
+            int64_t cumu_interval = now - last_cumu;
+            int64_t base_interval = now - last_base;
+            if (cumu_interval > interval_threshold && base_interval > interval_threshold) {
+                // before increasing cumulative point, we should make sure all rowsets are non-overlapping.
+                // if at least one rowset is overlapping, we should compact them first.
+                CHECK(candidate_rowsets.size() == transient_rowsets.size())
+                    << "tablet: " << _tablet->full_name() << ", "<<  candidate_rowsets.size() << " vs. " << transient_rowsets.size();
+                for (auto& rs : candidate_rowsets) {
+                    if (rs->rowset_meta()->is_segments_overlapping()) {
+                        _input_rowsets = candidate_rowsets;
+                        return OLAP_SUCCESS;
+                    }
                 }
+
+                // all candicate rowsets are non-overlapping, increase the cumulative point
+                _tablet->set_cumulative_layer_point(candidate_rowsets.back()->start_version() + 1);
+            }
+        } else {
+            // init the compaction success time for first time
+            if (last_cumu == 0) {
+                _tablet->set_last_cumu_compaction_success_time(now);
             }
 
-            // all candicate rowsets are non-overlapping, increase the cumulative point
-            _tablet->set_cumulative_layer_point(candidate_rowsets.back()->start_version() + 1);
+            if (last_base == 0) {
+                _tablet->set_last_base_compaction_success_time(now);
+            }
         }
 
         return OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org