You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2020/02/06 08:40:48 UTC
[incubator-doris] branch master updated: [Compaction] Avoid
unnecessary compaction (#2839)
This is an automated email from the ASF dual-hosted git repository.
zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new f77cfcd [Compaction] Avoid unnecessary compaction (#2839)
f77cfcd is described below
commit f77cfcdb617c7ec470bad848e2e999f8e6fbcb8d
Author: Mingyu Chen <mo...@gmail.com>
AuthorDate: Thu Feb 6 16:40:38 2020 +0800
[Compaction] Avoid unnecessary compaction (#2839)
It is not necessary to perform compaction in the following cases
1. A tablet has only 2 rowsets, the versions are [0-1] and [2-x]. In this case,
there is no need to perform base compaction because the [0-1] version is an empty version.
Some tables will be partitioned by day, and then each partition will only load one batch of data
each day, so a large number of tablets with rowsets [0-1][2-2] will appear. And these tablets
do not need to be base compaction.
2. The initial value of the `last successful execution time of compaction` is 0,
which causes the first time to determine the time interval from the
last successful execution time of compaction, which always meets the
conditions to trigger cumulative compaction.
---
be/src/olap/base_compaction.cpp | 11 ++++++++++
be/src/olap/cumulative_compaction.cpp | 41 +++++++++++++++++++++++------------
2 files changed, 38 insertions(+), 14 deletions(-)
diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp
index be36fb3..322eeb6 100644
--- a/be/src/olap/base_compaction.cpp
+++ b/be/src/olap/base_compaction.cpp
@@ -67,6 +67,12 @@ OLAPStatus BaseCompaction::pick_rowsets_to_compact() {
RETURN_NOT_OK(check_version_continuity(_input_rowsets));
RETURN_NOT_OK(_check_rowset_overlapping(_input_rowsets));
+ if (_input_rowsets.size() == 2 && _input_rowsets[0]->end_version() == 1) {
+ // the tablet is with rowset: [0-1], [2-y]
+ // and [0-1] has no data. in this situation, no need to do base compaction.
+ return OLAP_ERR_BE_NO_SUITABLE_VERSION;
+ }
+
// 1. cumulative rowset must reach base_compaction_num_cumulative_deltas threshold
if (_input_rowsets.size() > config::base_compaction_num_cumulative_deltas) {
LOG(INFO) << "satisfy the base compaction policy. tablet="<< _tablet->full_name()
@@ -87,6 +93,11 @@ OLAPStatus BaseCompaction::pick_rowsets_to_compact() {
}
double base_cumulative_delta_ratio = config::base_cumulative_delta_ratio;
+ if (base_size == 0) {
+ // base_size == 0 means this may be a base version [0-1], which has no data.
+ // set to 1 to void devide by zero
+ base_size = 1;
+ }
double cumulative_base_ratio = static_cast<double>(cumulative_total_size) / base_size;
if (cumulative_base_ratio > base_cumulative_delta_ratio) {
diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp
index b884736..87c6628 100755
--- a/be/src/olap/cumulative_compaction.cpp
+++ b/be/src/olap/cumulative_compaction.cpp
@@ -130,24 +130,37 @@ OLAPStatus CumulativeCompaction::pick_rowsets_to_compact() {
// the cumulative point after waiting for a long time, to ensure that the base compaction can continue.
// check both last success time of base and cumulative compaction
- int64_t interval_threshold = config::base_compaction_interval_seconds_since_last_operation * 1000;
int64_t now = UnixMillis();
- int64_t cumu_interval = now - _tablet->last_cumu_compaction_success_time();
- int64_t base_interval = now - _tablet->last_base_compaction_success_time();
- if (cumu_interval > interval_threshold && base_interval > interval_threshold) {
- // before increasing cumulative point, we should make sure all rowsets are non-overlapping.
- // if at least one rowset is overlapping, we should compact them first.
- CHECK(candidate_rowsets.size() == transient_rowsets.size())
- << "tablet: " << _tablet->full_name() << ", "<< candidate_rowsets.size() << " vs. " << transient_rowsets.size();
- for (auto& rs : candidate_rowsets) {
- if (rs->rowset_meta()->is_segments_overlapping()) {
- _input_rowsets = candidate_rowsets;
- return OLAP_SUCCESS;
+ int64_t last_cumu = _tablet->last_cumu_compaction_success_time();
+ int64_t last_base = _tablet->last_base_compaction_success_time();
+ if (last_cumu != 0 || last_base != 0) {
+ int64_t interval_threshold = config::base_compaction_interval_seconds_since_last_operation * 1000;
+ int64_t cumu_interval = now - last_cumu;
+ int64_t base_interval = now - last_base;
+ if (cumu_interval > interval_threshold && base_interval > interval_threshold) {
+ // before increasing cumulative point, we should make sure all rowsets are non-overlapping.
+ // if at least one rowset is overlapping, we should compact them first.
+ CHECK(candidate_rowsets.size() == transient_rowsets.size())
+ << "tablet: " << _tablet->full_name() << ", "<< candidate_rowsets.size() << " vs. " << transient_rowsets.size();
+ for (auto& rs : candidate_rowsets) {
+ if (rs->rowset_meta()->is_segments_overlapping()) {
+ _input_rowsets = candidate_rowsets;
+ return OLAP_SUCCESS;
+ }
}
+
+ // all candicate rowsets are non-overlapping, increase the cumulative point
+ _tablet->set_cumulative_layer_point(candidate_rowsets.back()->start_version() + 1);
+ }
+ } else {
+ // init the compaction success time for first time
+ if (last_cumu == 0) {
+ _tablet->set_last_cumu_compaction_success_time(now);
}
- // all candicate rowsets are non-overlapping, increase the cumulative point
- _tablet->set_cumulative_layer_point(candidate_rowsets.back()->start_version() + 1);
+ if (last_base == 0) {
+ _tablet->set_last_base_compaction_success_time(now);
+ }
}
return OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org