You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2022/12/01 14:15:48 UTC
[doris] branch master updated: [feature](compaction) support vertical_compaction & ordered_data_compaction (#14524)
This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 94a6ffb906 [feature](compaction) support vertical_compaction & ordered_data_compaction (#14524)
94a6ffb906 is described below
commit 94a6ffb906ea7716c479c23402ed453ca0a86a63
Author: yixiutt <10...@users.noreply.github.com>
AuthorDate: Thu Dec 1 22:15:41 2022 +0800
[feature](compaction) support vertical_compaction & ordered_data_compaction (#14524)
---
be/src/common/config.h | 13 +
be/src/olap/compaction.cpp | 235 +++++-
be/src/olap/compaction.h | 17 +-
be/src/olap/iterators.h | 12 +
be/src/olap/merger.cpp | 136 ++++
be/src/olap/merger.h | 21 +
be/src/olap/reader.cpp | 1 +
be/src/olap/reader.h | 5 +-
be/src/olap/rowset/CMakeLists.txt | 1 +
be/src/olap/rowset/beta_rowset.cpp | 18 +-
be/src/olap/rowset/beta_rowset.h | 5 +-
be/src/olap/rowset/beta_rowset_reader.cpp | 105 ++-
be/src/olap/rowset/beta_rowset_reader.h | 7 +
be/src/olap/rowset/beta_rowset_writer.cpp | 63 +-
be/src/olap/rowset/beta_rowset_writer.h | 8 +-
be/src/olap/rowset/rowset.h | 22 +-
be/src/olap/rowset/rowset_factory.cpp | 7 +-
be/src/olap/rowset/rowset_factory.h | 2 +-
be/src/olap/rowset/rowset_meta.h | 16 +
be/src/olap/rowset/rowset_reader.h | 5 +
be/src/olap/rowset/rowset_reader_context.h | 2 +
be/src/olap/rowset/rowset_writer.h | 13 +
be/src/olap/rowset/segment_v2/segment_writer.cpp | 284 ++++---
be/src/olap/rowset/segment_v2/segment_writer.h | 41 +-
be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 180 +++++
be/src/olap/rowset/vertical_beta_rowset_writer.h | 53 ++
be/src/olap/snapshot_manager.cpp | 2 +-
be/src/olap/tablet.cpp | 20 +-
be/src/olap/tablet.h | 7 +
be/src/olap/task/engine_checksum_task.cpp | 6 +-
be/src/vec/CMakeLists.txt | 2 +
be/src/vec/olap/olap_data_convertor.cpp | 9 +
be/src/vec/olap/olap_data_convertor.h | 1 +
be/src/vec/olap/vertical_block_reader.cpp | 397 ++++++++++
be/src/vec/olap/vertical_block_reader.h | 113 +++
be/src/vec/olap/vertical_merge_iterator.cpp | 546 +++++++++++++
be/src/vec/olap/vertical_merge_iterator.h | 312 ++++++++
be/src/vec/olap/vgeneric_iterators.cpp | 1 -
be/test/CMakeLists.txt | 2 +
be/test/io/cache/remote_file_cache_test.cpp | 4 +-
...n_test.cpp => ordered_data_compaction_test.cpp} | 458 ++++-------
be/test/olap/rowid_conversion_test.cpp | 6 +-
be/test/olap/rowset/beta_rowset_test.cpp | 2 +-
be/test/olap/rowset/segment_v2/segment_test.cpp | 37 +-
be/test/olap/segcompaction_test.cpp | 6 +-
be/test/testutil/mock_rowset.h | 3 +-
be/test/vec/olap/vertical_compaction_test.cpp | 862 +++++++++++++++++++++
.../test_compaction_agg_keys_with_delete.out | 7 +
.../test_compaction_dup_keys_with_delete.out | 7 +
.../test_compaction_uniq_keys_with_delete.out | 15 +
.../test_vertical_compaction_agg_keys.out | 16 +
.../test_vertical_compaction_dup_keys.out | 25 +
.../test_vertical_compaction_uniq_keys.out | 16 +
.../test_compaction_agg_keys_with_delete.groovy | 219 ++++++
.../test_compaction_dup_keys_with_delete.groovy | 217 ++++++
.../test_compaction_uniq_keys_with_delete.groovy | 221 ++++++
.../test_vertical_compaction_agg_keys.groovy | 256 ++++++
.../test_vertical_compaction_dup_keys.groovy | 255 ++++++
.../test_vertical_compaction_uniq_keys.groovy | 253 ++++++
59 files changed, 5068 insertions(+), 507 deletions(-)
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 4c256f168b..d2687b83a2 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -240,12 +240,25 @@ CONF_Bool(enable_storage_vectorization, "true");
CONF_Bool(enable_low_cardinality_optimize, "true");
// be policy
+// whether check compaction checksum
+CONF_mBool(enable_compaction_checksum, "false");
// whether disable automatic compaction task
CONF_mBool(disable_auto_compaction, "false");
// whether enable vectorized compaction
CONF_Bool(enable_vectorized_compaction, "true");
// whether enable vectorized schema change/material-view/rollup task.
CONF_Bool(enable_vectorized_alter_table, "true");
+// whether enable vertical compaction
+CONF_mBool(enable_vertical_compaction, "false");
+// whether enable ordered data compaction
+CONF_mBool(enable_ordered_data_compaction, "false");
+// In vertical compaction, column number for every group
+CONF_mInt32(vertical_compaction_num_columns_per_group, "5");
+// In vertical compaction, max memory usage for row_source_buffer
+CONF_Int32(vertical_compaction_max_row_source_memory_mb, "200");
+
+// In ordered data compaction, min segment size for input rowset
+CONF_mInt32(ordered_data_compaction_min_segment_size, "10485760");
// check the configuration of auto compaction in seconds when auto compaction disabled
CONF_mInt32(check_auto_compaction_interval_seconds, "5");
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index e74dca760f..65797a8914 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -19,9 +19,11 @@
#include "common/status.h"
#include "gutil/strings/substitute.h"
+#include "olap/rowset/beta_rowset.h"
#include "olap/rowset/rowset.h"
#include "olap/rowset/rowset_meta.h"
#include "olap/tablet.h"
+#include "olap/task/engine_checksum_task.h"
#include "util/time.h"
#include "util/trace.h"
@@ -33,6 +35,8 @@ Compaction::Compaction(TabletSharedPtr tablet, const std::string& label)
: _tablet(tablet),
_input_rowsets_size(0),
_input_row_num(0),
+ _input_num_segments(0),
+ _input_index_size(0),
_state(CompactionState::INITED) {
_mem_tracker = std::make_shared<MemTrackerLimiter>(MemTrackerLimiter::Type::COMPACTION, label);
}
@@ -106,27 +110,127 @@ Status Compaction::quick_rowsets_compact() {
Status Compaction::do_compaction(int64_t permits) {
TRACE("start to do compaction");
+ uint32_t checksum_before;
+ uint32_t checksum_after;
+ if (config::enable_compaction_checksum) {
+ EngineChecksumTask checksum_task(_tablet->tablet_id(), _tablet->schema_hash(),
+ _input_rowsets.back()->end_version(), &checksum_before);
+ checksum_task.execute();
+ }
+
_tablet->data_dir()->disks_compaction_score_increment(permits);
_tablet->data_dir()->disks_compaction_num_increment(1);
Status st = do_compaction_impl(permits);
_tablet->data_dir()->disks_compaction_score_increment(-permits);
_tablet->data_dir()->disks_compaction_num_increment(-1);
+
+ if (config::enable_compaction_checksum) {
+ EngineChecksumTask checksum_task(_tablet->tablet_id(), _tablet->schema_hash(),
+ _input_rowsets.back()->end_version(), &checksum_after);
+ checksum_task.execute();
+ if (checksum_before != checksum_after) {
+ LOG(WARNING) << "Compaction tablet=" << _tablet->tablet_id()
+ << " checksum not consistent"
+ << ", before=" << checksum_before << ", checksum_after=" << checksum_after;
+ }
+ }
return st;
}
-Status Compaction::do_compaction_impl(int64_t permits) {
- OlapStopWatch watch;
+bool Compaction::should_vertical_compaction() {
+ // some conditions that not use vertical compaction
+ if (!config::enable_vertical_compaction) {
+ return false;
+ }
+ if (_tablet->enable_unique_key_merge_on_write()) {
+ return false;
+ }
+ return true;
+}
+
+int64_t Compaction::get_avg_segment_rows() {
+ // take care of empty rowset
+ // input_rowsets_size is total disk_size of input_rowset, this size is the
+ // final size after codec and compress, so expect dest segment file size
+ // in disk is config::writer_buffer_size
+ return config::write_buffer_size / (_input_rowsets_size / (_input_row_num + 1) + 1);
+}
- // 1. prepare input and output parameters
- int64_t segments_num = 0;
+bool Compaction::is_rowset_tidy(std::string& pre_max_key, const RowsetSharedPtr& rhs) {
+ size_t min_tidy_size = config::ordered_data_compaction_min_segment_size;
+ if (rhs->num_segments() == 0) {
+ return true;
+ }
+ if (rhs->is_segments_overlapping()) {
+ return false;
+ }
+ // check segment size
+ auto beta_rowset = reinterpret_cast<BetaRowset*>(rhs.get());
+ std::vector<size_t> segments_size;
+ beta_rowset->get_segments_size(&segments_size);
+ for (auto segment_size : segments_size) {
+ // is segment is too small, need to do compaction
+ if (segment_size < min_tidy_size) {
+ return false;
+ }
+ }
+ std::string min_key;
+ auto ret = rhs->min_key(&min_key);
+ if (!ret) {
+ return false;
+ }
+ if (min_key < pre_max_key) {
+ return false;
+ }
+ CHECK(rhs->max_key(&pre_max_key));
+
+ return true;
+}
+
+Status Compaction::do_compact_ordered_rowsets() {
+ build_basic_info();
+ RETURN_NOT_OK(construct_output_rowset_writer());
+
+ LOG(INFO) << "start to do ordered data compaction, tablet=" << _tablet->full_name()
+ << ", output_version=" << _output_version;
+ // link data to new rowset
+ auto seg_id = 0;
+ std::vector<KeyBoundsPB> segment_key_bounds;
+ for (auto rowset : _input_rowsets) {
+ RETURN_NOT_OK(rowset->link_files_to(_tablet->tablet_path(), _output_rs_writer->rowset_id(),
+ seg_id));
+ seg_id += rowset->num_segments();
+
+ std::vector<KeyBoundsPB> key_bounds;
+ rowset->get_segments_key_bounds(&key_bounds);
+ segment_key_bounds.insert(segment_key_bounds.end(), key_bounds.begin(), key_bounds.end());
+ }
+ // build output rowset
+ RowsetMetaSharedPtr rowset_meta = std::make_shared<RowsetMeta>();
+ rowset_meta->set_num_rows(_input_row_num);
+ rowset_meta->set_total_disk_size(_input_rowsets_size);
+ rowset_meta->set_data_disk_size(_input_rowsets_size);
+ rowset_meta->set_index_disk_size(_input_index_size);
+ rowset_meta->set_empty(_input_row_num == 0);
+ rowset_meta->set_num_segments(_input_num_segments);
+ rowset_meta->set_segments_overlap(NONOVERLAPPING);
+ rowset_meta->set_rowset_state(VISIBLE);
+
+ rowset_meta->set_segments_key_bounds(segment_key_bounds);
+ _output_rowset = _output_rs_writer->manual_build(rowset_meta);
+ return Status::OK();
+}
+
+void Compaction::build_basic_info() {
for (auto& rowset : _input_rowsets) {
_input_rowsets_size += rowset->data_disk_size();
+ _input_index_size += rowset->index_disk_size();
_input_row_num += rowset->num_rows();
- segments_num += rowset->num_segments();
+ _input_num_segments += rowset->num_segments();
}
TRACE_COUNTER_INCREMENT("input_rowsets_data_size", _input_rowsets_size);
TRACE_COUNTER_INCREMENT("input_row_num", _input_row_num);
- TRACE_COUNTER_INCREMENT("input_segments_num", segments_num);
+ TRACE_COUNTER_INCREMENT("input_segments_num", _input_num_segments);
_output_version =
Version(_input_rowsets.front()->start_version(), _input_rowsets.back()->end_version());
@@ -134,20 +238,84 @@ Status Compaction::do_compaction_impl(int64_t permits) {
_oldest_write_timestamp = _input_rowsets.front()->oldest_write_timestamp();
_newest_write_timestamp = _input_rowsets.back()->newest_write_timestamp();
- auto use_vectorized_compaction = config::enable_vectorized_compaction;
- string merge_type = use_vectorized_compaction ? "v" : "";
-
- LOG(INFO) << "start " << merge_type << compaction_name() << ". tablet=" << _tablet->full_name()
- << ", output_version=" << _output_version << ", permits: " << permits;
- // get cur schema if rowset schema exist, rowset schema must be newer than tablet schema
std::vector<RowsetMetaSharedPtr> rowset_metas(_input_rowsets.size());
std::transform(_input_rowsets.begin(), _input_rowsets.end(), rowset_metas.begin(),
[](const RowsetSharedPtr& rowset) { return rowset->rowset_meta(); });
- TabletSchemaSPtr cur_tablet_schema =
+ _cur_tablet_schema =
_tablet->rowset_meta_with_max_schema_version(rowset_metas)->tablet_schema();
+}
- RETURN_NOT_OK(construct_output_rowset_writer(cur_tablet_schema));
+bool Compaction::handle_ordered_data_compaction() {
+ if (!config::enable_ordered_data_compaction) {
+ return false;
+ }
+ // check delete version: if compaction type is base compaction and
+ // has a delete version, use original compaction
+ if (compaction_type() == ReaderType::READER_BASE_COMPACTION) {
+ for (auto rowset : _input_rowsets) {
+ if (_tablet->version_for_delete_predicate(rowset->version())) {
+ return false;
+ }
+ }
+ }
+
+ // check if rowsets are tidy so we can just modify meta and do link
+ // files to handle compaction
+ auto input_size = _input_rowsets.size();
+ std::string pre_max_key;
+ for (auto i = 0; i < input_size; ++i) {
+ if (!is_rowset_tidy(pre_max_key, _input_rowsets[i])) {
+ if (i <= input_size / 2) {
+ return false;
+ } else {
+ _input_rowsets.resize(i);
+ break;
+ }
+ }
+ }
+ // most rowset of current compaction is nonoverlapping
+ // just handle nonoverlappint rowsets
+ auto st = do_compact_ordered_rowsets();
+ if (!st.ok()) {
+ return false;
+ }
+ return true;
+}
+
+Status Compaction::do_compaction_impl(int64_t permits) {
+ OlapStopWatch watch;
+
+ auto use_vectorized_compaction = config::enable_vectorized_compaction;
+ string merge_type = use_vectorized_compaction ? "v" : "";
+
+ if (handle_ordered_data_compaction()) {
+ RETURN_NOT_OK(modify_rowsets());
+ TRACE("modify rowsets finished");
+
+ int64_t now = UnixMillis();
+ if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION) {
+ _tablet->set_last_cumu_compaction_success_time(now);
+ } else {
+ _tablet->set_last_base_compaction_success_time(now);
+ }
+ auto cumu_policy = _tablet->cumulative_compaction_policy();
+ LOG(INFO) << "succeed to do ordered data " << merge_type << compaction_name()
+ << ". tablet=" << _tablet->full_name() << ", output_version=" << _output_version
+ << ", disk=" << _tablet->data_dir()->path()
+ << ", segments=" << _input_num_segments << ", input_row_num=" << _input_row_num
+ << ", output_row_num=" << _output_rowset->num_rows()
+ << ". elapsed time=" << watch.get_elapse_second()
+ << "s. cumulative_compaction_policy="
+ << (cumu_policy == nullptr ? "quick" : cumu_policy->name());
+ return Status::OK();
+ }
+ build_basic_info();
+
+ LOG(INFO) << "start " << merge_type << compaction_name() << ". tablet=" << _tablet->full_name()
+ << ", output_version=" << _output_version << ", permits: " << permits;
+ bool vertical_compaction = should_vertical_compaction();
RETURN_NOT_OK(construct_input_rowset_readers());
+ RETURN_NOT_OK(construct_output_rowset_writer(vertical_compaction));
TRACE("prepare finished");
// 2. write merged rows to output rowset
@@ -160,10 +328,16 @@ Status Compaction::do_compaction_impl(int64_t permits) {
}
if (use_vectorized_compaction) {
- res = Merger::vmerge_rowsets(_tablet, compaction_type(), cur_tablet_schema,
- _input_rs_readers, _output_rs_writer.get(), &stats);
+ if (vertical_compaction) {
+ res = Merger::vertical_merge_rowsets(_tablet, compaction_type(), _cur_tablet_schema,
+ _input_rs_readers, _output_rs_writer.get(),
+ get_avg_segment_rows(), &stats);
+ } else {
+ res = Merger::vmerge_rowsets(_tablet, compaction_type(), _cur_tablet_schema,
+ _input_rs_readers, _output_rs_writer.get(), &stats);
+ }
} else {
- res = Merger::merge_rowsets(_tablet, compaction_type(), cur_tablet_schema,
+ res = Merger::merge_rowsets(_tablet, compaction_type(), _cur_tablet_schema,
_input_rs_readers, _output_rs_writer.get(), &stats);
}
@@ -218,9 +392,10 @@ Status Compaction::do_compaction_impl(int64_t permits) {
auto cumu_policy = _tablet->cumulative_compaction_policy();
LOG(INFO) << "succeed to do " << merge_type << compaction_name()
- << ". tablet=" << _tablet->full_name() << ", output_version=" << _output_version
+ << " is_vertical=" << vertical_compaction << ". tablet=" << _tablet->full_name()
+ << ", output_version=" << _output_version
<< ", current_max_version=" << current_max_version
- << ", disk=" << _tablet->data_dir()->path() << ", segments=" << segments_num
+ << ", disk=" << _tablet->data_dir()->path() << ", segments=" << _input_num_segments
<< ", input_row_num=" << _input_row_num
<< ", output_row_num=" << _output_rowset->num_rows()
<< ". elapsed time=" << watch.get_elapse_second()
@@ -231,10 +406,15 @@ Status Compaction::do_compaction_impl(int64_t permits) {
return Status::OK();
}
-Status Compaction::construct_output_rowset_writer(TabletSchemaSPtr schema) {
- return _tablet->create_rowset_writer(_output_version, VISIBLE, NONOVERLAPPING, schema,
- _oldest_write_timestamp, _newest_write_timestamp,
- &_output_rs_writer);
+Status Compaction::construct_output_rowset_writer(bool is_vertical) {
+ if (is_vertical) {
+ return _tablet->create_vertical_rowset_writer(_output_version, VISIBLE, NONOVERLAPPING,
+ _cur_tablet_schema, _oldest_write_timestamp,
+ _newest_write_timestamp, &_output_rs_writer);
+ }
+ return _tablet->create_rowset_writer(_output_version, VISIBLE, NONOVERLAPPING,
+ _cur_tablet_schema, _oldest_write_timestamp,
+ _newest_write_timestamp, &_output_rs_writer);
}
Status Compaction::construct_input_rowset_readers() {
@@ -343,4 +523,13 @@ int64_t Compaction::get_compaction_permits() {
return permits;
}
+#ifdef BE_TEST
+void Compaction::set_input_rowset(const std::vector<RowsetSharedPtr>& rowsets) {
+ _input_rowsets = rowsets;
+}
+
+RowsetSharedPtr Compaction::output_rowset() {
+ return _output_rowset;
+}
+#endif
} // namespace doris
diff --git a/be/src/olap/compaction.h b/be/src/olap/compaction.h
index 26d985f9c0..bf11629fac 100644
--- a/be/src/olap/compaction.h
+++ b/be/src/olap/compaction.h
@@ -53,6 +53,10 @@ public:
virtual Status prepare_compact() = 0;
Status execute_compact();
virtual Status execute_compact_impl() = 0;
+#ifdef BE_TEST
+ void set_input_rowset(const std::vector<RowsetSharedPtr>& rowsets);
+ RowsetSharedPtr output_rowset();
+#endif
protected:
virtual Status pick_rowsets_to_compact() = 0;
@@ -65,7 +69,7 @@ protected:
Status modify_rowsets();
void gc_output_rowset();
- Status construct_output_rowset_writer(TabletSchemaSPtr schema);
+ Status construct_output_rowset_writer(bool is_vertical = false);
Status construct_input_rowset_readers();
Status check_version_continuity(const std::vector<RowsetSharedPtr>& rowsets);
@@ -74,6 +78,14 @@ protected:
std::vector<Version>* missing_version);
int64_t get_compaction_permits();
+ bool should_vertical_compaction();
+ int64_t get_avg_segment_rows();
+
+ bool handle_ordered_data_compaction();
+ Status do_compact_ordered_rowsets();
+ bool is_rowset_tidy(std::string& pre_max_key, const RowsetSharedPtr& rhs);
+ void build_basic_info();
+
protected:
// the root tracker for this compaction
std::shared_ptr<MemTrackerLimiter> _mem_tracker;
@@ -84,6 +96,8 @@ protected:
std::vector<RowsetReaderSharedPtr> _input_rs_readers;
int64_t _input_rowsets_size;
int64_t _input_row_num;
+ int64_t _input_num_segments;
+ int64_t _input_index_size;
RowsetSharedPtr _output_rowset;
std::unique_ptr<RowsetWriter> _output_rs_writer;
@@ -96,6 +110,7 @@ protected:
int64_t _oldest_write_timestamp;
int64_t _newest_write_timestamp;
RowIdConversion _rowid_conversion;
+ TabletSchemaSPtr _cur_tablet_schema;
DISALLOW_COPY_AND_ASSIGN(Compaction);
};
diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index 12bd1b17c7..0cabfb606b 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -36,6 +36,9 @@ class ColumnPredicate;
struct IOContext {
ReaderType reader_type;
};
+namespace vectorized {
+struct IteratorRowRef;
+};
class StorageReadOptions {
public:
@@ -127,6 +130,13 @@ public:
return Status::NotSupported("to be implemented");
}
+ virtual Status next_row(vectorized::IteratorRowRef* ref) {
+ return Status::NotSupported("to be implemented");
+ }
+ virtual Status unique_key_next_row(vectorized::IteratorRowRef* ref) {
+ return Status::NotSupported("to be implemented");
+ }
+
virtual bool support_return_data_by_ref() { return false; }
virtual Status current_block_row_locations(std::vector<RowLocation>* block_row_locations) {
@@ -144,6 +154,8 @@ public:
virtual uint64_t data_id() const { return 0; }
virtual bool update_profile(RuntimeProfile* profile) { return false; }
+ // return rows merged count by iterator
+ virtual uint64_t merged_rows() const { return 0; }
};
} // namespace doris
diff --git a/be/src/olap/merger.cpp b/be/src/olap/merger.cpp
index ace3d6b39a..46bb15c96d 100644
--- a/be/src/olap/merger.cpp
+++ b/be/src/olap/merger.cpp
@@ -26,6 +26,8 @@
#include "olap/tuple_reader.h"
#include "util/trace.h"
#include "vec/olap/block_reader.h"
+#include "vec/olap/vertical_block_reader.h"
+#include "vec/olap/vertical_merge_iterator.h"
namespace doris {
@@ -188,4 +190,138 @@ Status Merger::vmerge_rowsets(TabletSharedPtr tablet, ReaderType reader_type,
return Status::OK();
}
+// split columns into several groups, make sure all keys in one group
+// unique_key should consider sequence&delete column
+void Merger::vertical_split_columns(TabletSchemaSPtr tablet_schema,
+ std::vector<std::vector<uint32_t>>* column_groups) {
+ uint32_t num_key_cols = tablet_schema->num_key_columns();
+ uint32_t total_cols = tablet_schema->num_columns();
+ std::vector<uint32_t> key_columns;
+ for (auto i = 0; i < num_key_cols; ++i) {
+ key_columns.emplace_back(i);
+ }
+ // in unique key, sequence & delete sign column should merge with key columns
+ int32_t sequence_col_idx = -1;
+ int32_t delete_sign_idx = -1;
+ // in key column compaction, seq_col real index is _num_key_columns
+ // and delete_sign column is _block->columns() - 1
+ if (tablet_schema->keys_type() == KeysType::UNIQUE_KEYS) {
+ if (tablet_schema->has_sequence_col()) {
+ sequence_col_idx = tablet_schema->sequence_col_idx();
+ key_columns.emplace_back(sequence_col_idx);
+ }
+ delete_sign_idx = tablet_schema->field_index(DELETE_SIGN);
+ if (delete_sign_idx != -1) {
+ key_columns.emplace_back(delete_sign_idx);
+ }
+ }
+ VLOG_NOTICE << "sequence_col_idx=" << sequence_col_idx
+ << ", delete_sign_idx=" << delete_sign_idx;
+ column_groups->emplace_back(std::move(key_columns));
+ std::vector<uint32_t> value_columns;
+ for (auto i = num_key_cols; i < total_cols; ++i) {
+ if (i == sequence_col_idx || i == delete_sign_idx) {
+ continue;
+ }
+ if ((i - num_key_cols) % config::vertical_compaction_num_columns_per_group == 0) {
+ column_groups->emplace_back();
+ }
+ column_groups->back().emplace_back(i);
+ }
+}
+
+Status Merger::vertical_compact_one_group(
+ TabletSharedPtr tablet, ReaderType reader_type, TabletSchemaSPtr tablet_schema, bool is_key,
+ const std::vector<uint32_t>& column_group, vectorized::RowSourcesBuffer* row_source_buf,
+ const std::vector<RowsetReaderSharedPtr>& src_rowset_readers,
+ RowsetWriter* dst_rowset_writer, int64_t max_rows_per_segment, Statistics* stats_output) {
+ // build tablet reader
+ VLOG_NOTICE << "vertical compact one group, max_rows_per_segment=" << max_rows_per_segment;
+ vectorized::VerticalBlockReader reader(row_source_buf);
+ TabletReader::ReaderParams reader_params;
+ reader_params.is_key_column_group = is_key;
+ reader_params.tablet = tablet;
+ reader_params.reader_type = reader_type;
+ reader_params.rs_readers = src_rowset_readers;
+ reader_params.version = dst_rowset_writer->version();
+ {
+ std::shared_lock rdlock(tablet->get_header_lock());
+ auto delete_preds = tablet->delete_predicates();
+ std::copy(delete_preds.cbegin(), delete_preds.cend(),
+ std::inserter(reader_params.delete_predicates,
+ reader_params.delete_predicates.begin()));
+ }
+ TabletSchemaSPtr merge_tablet_schema = std::make_shared<TabletSchema>();
+ merge_tablet_schema->copy_from(*tablet_schema);
+ // Merge the columns in delete predicate that not in latest schema in to current tablet schema
+ for (auto& del_pred_rs : reader_params.delete_predicates) {
+ merge_tablet_schema->merge_dropped_columns(tablet->tablet_schema(del_pred_rs->version()));
+ }
+ reader_params.tablet_schema = merge_tablet_schema;
+
+ reader_params.return_columns = column_group;
+ reader_params.origin_return_columns = &reader_params.return_columns;
+ RETURN_NOT_OK(reader.init(reader_params));
+
+ vectorized::Block block = tablet_schema->create_block(reader_params.return_columns);
+ size_t output_rows = 0;
+ bool eof = false;
+ while (!eof) {
+ // Read one block from block reader
+ RETURN_NOT_OK_LOG(
+ reader.next_block_with_aggregation(&block, nullptr, nullptr, &eof),
+ "failed to read next block when merging rowsets of tablet " + tablet->full_name());
+ RETURN_NOT_OK_LOG(
+ dst_rowset_writer->add_columns(&block, column_group, is_key, max_rows_per_segment),
+ "failed to write block when merging rowsets of tablet " + tablet->full_name());
+
+ output_rows += block.rows();
+ block.clear_column_data();
+ }
+
+ if (is_key && stats_output != nullptr) {
+ stats_output->output_rows = output_rows;
+ stats_output->merged_rows = reader.merged_rows();
+ stats_output->filtered_rows = reader.filtered_rows();
+ }
+ RETURN_IF_ERROR(dst_rowset_writer->flush_columns());
+
+ return Status::OK();
+}
+
+// steps to do vertical merge:
+// 1. split columns into column groups
+// 2. compact groups one by one, generate a row_source_buf when compact key group
+// and use this row_source_buf to compact value column groups
+// 3. build output rowset
+Status Merger::vertical_merge_rowsets(TabletSharedPtr tablet, ReaderType reader_type,
+ TabletSchemaSPtr tablet_schema,
+ const std::vector<RowsetReaderSharedPtr>& src_rowset_readers,
+ RowsetWriter* dst_rowset_writer, int64_t max_rows_per_segment,
+ Statistics* stats_output) {
+ LOG(INFO) << "Start to do vertical compaction, tablet_id: " << tablet->tablet_id();
+ std::vector<std::vector<uint32_t>> column_groups;
+ vertical_split_columns(tablet_schema, &column_groups);
+
+ vectorized::RowSourcesBuffer row_sources_buf(tablet->tablet_id(), tablet->tablet_path(),
+ reader_type);
+ // compact group one by one
+ for (auto i = 0; i < column_groups.size(); ++i) {
+ VLOG_NOTICE << "row source size: " << row_sources_buf.total_size();
+ bool is_key = (i == 0);
+ RETURN_IF_ERROR(vertical_compact_one_group(
+ tablet, reader_type, tablet_schema, is_key, column_groups[i], &row_sources_buf,
+ src_rowset_readers, dst_rowset_writer, max_rows_per_segment, stats_output));
+ if (is_key) {
+ row_sources_buf.flush();
+ }
+ row_sources_buf.seek_to_begin();
+ }
+ // finish compact, build output rowset
+ VLOG_NOTICE << "finish compact groups";
+ RETURN_IF_ERROR(dst_rowset_writer->final_flush());
+
+ return Status::OK();
+}
+
} // namespace doris
diff --git a/be/src/olap/merger.h b/be/src/olap/merger.h
index e0286e158d..aff9a741c4 100644
--- a/be/src/olap/merger.h
+++ b/be/src/olap/merger.h
@@ -24,6 +24,10 @@
namespace doris {
+namespace vectorized {
+class RowSourcesBuffer;
+};
+
class Merger {
public:
struct Statistics {
@@ -46,6 +50,23 @@ public:
TabletSchemaSPtr cur_tablet_schema,
const std::vector<RowsetReaderSharedPtr>& src_rowset_readers,
RowsetWriter* dst_rowset_writer, Statistics* stats_output);
+ static Status vertical_merge_rowsets(
+ TabletSharedPtr tablet, ReaderType reader_type, TabletSchemaSPtr tablet_schema,
+ const std::vector<RowsetReaderSharedPtr>& src_rowset_readers,
+ RowsetWriter* dst_rowset_writer, int64_t max_rows_per_segment,
+ Statistics* stats_output);
+
+public:
+ // for vertical compaction
+ static void vertical_split_columns(TabletSchemaSPtr tablet_schema,
+ std::vector<std::vector<uint32_t>>* column_groups);
+ static Status vertical_compact_one_group(
+ TabletSharedPtr tablet, ReaderType reader_type, TabletSchemaSPtr tablet_schema,
+ bool is_key, const std::vector<uint32_t>& column_group,
+ vectorized::RowSourcesBuffer* row_source_buf,
+ const std::vector<RowsetReaderSharedPtr>& src_rowset_readers,
+ RowsetWriter* dst_rowset_writer, int64_t max_rows_per_segment,
+ Statistics* stats_output);
};
} // namespace doris
diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp
index 0381521066..fcc2f9b6dc 100644
--- a/be/src/olap/reader.cpp
+++ b/be/src/olap/reader.cpp
@@ -214,6 +214,7 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params,
_reader_context.delete_bitmap = read_params.delete_bitmap;
_reader_context.enable_unique_key_merge_on_write = tablet()->enable_unique_key_merge_on_write();
_reader_context.record_rowids = read_params.record_rowids;
+ _reader_context.is_key_column_group = read_params.is_key_column_group;
*valid_rs_readers = *rs_readers;
diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h
index 251af66ecc..131d04e028 100644
--- a/be/src/olap/reader.h
+++ b/be/src/olap/reader.h
@@ -102,6 +102,9 @@ public:
// num of columns for orderby key
size_t read_orderby_key_num_prefix_columns = 0;
+ // for vertical compaction
+ bool is_key_column_group = false;
+
void check_validation() const;
std::string to_string() const;
@@ -134,7 +137,7 @@ public:
return Status::OLAPInternalError(OLAP_ERR_READER_INITIALIZE_ERROR);
}
- uint64_t merged_rows() const { return _merged_rows; }
+ virtual uint64_t merged_rows() const { return _merged_rows; }
uint64_t filtered_rows() const {
return _stats.rows_del_filtered + _stats.rows_del_by_bitmap +
diff --git a/be/src/olap/rowset/CMakeLists.txt b/be/src/olap/rowset/CMakeLists.txt
index a701fa1005..da6375a2d1 100644
--- a/be/src/olap/rowset/CMakeLists.txt
+++ b/be/src/olap/rowset/CMakeLists.txt
@@ -29,4 +29,5 @@ add_library(Rowset STATIC
beta_rowset.cpp
beta_rowset_reader.cpp
beta_rowset_writer.cpp
+ vertical_beta_rowset_writer.cpp
rowset_tree.cpp)
diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp
index 8ed48fdf70..6fbe708090 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -109,6 +109,19 @@ Status BetaRowset::do_load(bool /*use_cache*/) {
return Status::OK();
}
+Status BetaRowset::get_segments_size(std::vector<size_t>* segments_size) {
+ auto fs = _rowset_meta->fs();
+ if (!fs || _schema == nullptr) {
+ return Status::OLAPInternalError(OLAP_ERR_INIT_FAILED);
+ }
+ for (int seg_id = 0; seg_id < num_segments(); ++seg_id) {
+ auto seg_path = segment_file_path(seg_id);
+ size_t file_size;
+ RETURN_IF_ERROR(fs->file_size(seg_path, &file_size));
+ segments_size->push_back(file_size);
+ }
+ return Status::OK();
+}
Status BetaRowset::load_segments(std::vector<segment_v2::SegmentSharedPtr>* segments) {
auto fs = _rowset_meta->fs();
if (!fs || _schema == nullptr) {
@@ -197,14 +210,15 @@ void BetaRowset::do_close() {
// do nothing.
}
-Status BetaRowset::link_files_to(const std::string& dir, RowsetId new_rowset_id) {
+Status BetaRowset::link_files_to(const std::string& dir, RowsetId new_rowset_id,
+ size_t new_rowset_start_seg_id) {
DCHECK(is_local());
auto fs = _rowset_meta->fs();
if (!fs) {
return Status::OLAPInternalError(OLAP_ERR_INIT_FAILED);
}
for (int i = 0; i < num_segments(); ++i) {
- auto dst_path = segment_file_path(dir, new_rowset_id, i);
+ auto dst_path = segment_file_path(dir, new_rowset_id, i + new_rowset_start_seg_id);
// TODO(lingbin): use Env API? or EnvUtil?
bool dst_path_exist = false;
if (!fs->exists(dst_path, &dst_path_exist).ok() || dst_path_exist) {
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index 7cd792bf69..93d9b31676 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -70,7 +70,8 @@ public:
Status remove() override;
- Status link_files_to(const std::string& dir, RowsetId new_rowset_id) override;
+ Status link_files_to(const std::string& dir, RowsetId new_rowset_id,
+ size_t new_rowset_start_seg_id = 0) override;
Status copy_files_to(const std::string& dir, const RowsetId& new_rowset_id) override;
@@ -89,6 +90,8 @@ public:
Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment);
+ Status get_segments_size(std::vector<size_t>* segments_size);
+
protected:
BetaRowset(TabletSchemaSPtr schema, const std::string& tablet_path,
RowsetMetaSharedPtr rowset_meta);
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp
index bfb08ad811..eae3f403d1 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -36,7 +36,16 @@ BetaRowsetReader::BetaRowsetReader(BetaRowsetSharedPtr rowset)
_rowset->acquire();
}
-Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
+void BetaRowsetReader::reset_read_options() {
+ _read_options.delete_condition_predicates = std::make_shared<AndBlockColumnPredicate>();
+ _read_options.column_predicates.clear();
+ _read_options.col_id_to_predicates.clear();
+ _read_options.col_id_to_del_predicates.clear();
+ _read_options.key_ranges.clear();
+}
+
+Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context,
+ std::vector<RowwiseIterator*>* out_iters) {
RETURN_NOT_OK(_rowset->load());
_context = read_context;
if (_context->stats != nullptr) {
@@ -47,30 +56,31 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
}
// convert RowsetReaderContext to StorageReadOptions
- StorageReadOptions read_options;
- read_options.stats = _stats;
- read_options.push_down_agg_type_opt = _context->push_down_agg_type_opt;
+ _read_options.stats = _stats;
+ _read_options.push_down_agg_type_opt = _context->push_down_agg_type_opt;
if (read_context->lower_bound_keys != nullptr) {
for (int i = 0; i < read_context->lower_bound_keys->size(); ++i) {
- read_options.key_ranges.emplace_back(&read_context->lower_bound_keys->at(i),
- read_context->is_lower_keys_included->at(i),
- &read_context->upper_bound_keys->at(i),
- read_context->is_upper_keys_included->at(i));
+ _read_options.key_ranges.emplace_back(&read_context->lower_bound_keys->at(i),
+ read_context->is_lower_keys_included->at(i),
+ &read_context->upper_bound_keys->at(i),
+ read_context->is_upper_keys_included->at(i));
}
}
- bool can_reuse_schema = true;
// delete_hanlder is always set, but it maybe not init, so that it will return empty conditions
// or predicates when it is not inited.
if (read_context->delete_handler != nullptr) {
read_context->delete_handler->get_delete_conditions_after_version(
- _rowset->end_version(), read_options.delete_condition_predicates.get(),
- &read_options.col_id_to_del_predicates);
+ _rowset->end_version(), _read_options.delete_condition_predicates.get(),
+ &_read_options.col_id_to_del_predicates);
// if del cond is not empty, schema may be different in multiple rowset
- can_reuse_schema = read_options.col_id_to_del_predicates.empty();
+ _can_reuse_schema = _read_options.col_id_to_del_predicates.empty();
}
-
- if (!can_reuse_schema || _context->reuse_input_schema == nullptr) {
+ // In vertical compaction, every column group need new schema
+ if (read_context->is_vertical_compaction) {
+ _can_reuse_schema = false;
+ }
+ if (!_can_reuse_schema || _context->reuse_input_schema == nullptr) {
std::vector<uint32_t> read_columns;
std::set<uint32_t> read_columns_set;
std::set<uint32_t> delete_columns_set;
@@ -78,37 +88,37 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
read_columns.push_back(_context->return_columns->at(i));
read_columns_set.insert(_context->return_columns->at(i));
}
- read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set);
+ _read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set);
for (auto cid : delete_columns_set) {
if (read_columns_set.find(cid) == read_columns_set.end()) {
read_columns.push_back(cid);
}
}
+ VLOG_NOTICE << "read columns size: " << read_columns.size();
_input_schema = std::make_shared<Schema>(_context->tablet_schema->columns(), read_columns);
-
- if (can_reuse_schema) {
+ if (_can_reuse_schema) {
_context->reuse_input_schema = _input_schema;
}
}
// if can reuse schema, context must have reuse_input_schema
// if can't reuse schema, context mustn't have reuse_input_schema
- DCHECK(can_reuse_schema ^ (_context->reuse_input_schema == nullptr));
+ DCHECK(_can_reuse_schema ^ (_context->reuse_input_schema == nullptr));
if (_context->reuse_input_schema != nullptr && _input_schema == nullptr) {
_input_schema = _context->reuse_input_schema;
}
if (read_context->predicates != nullptr) {
- read_options.column_predicates.insert(read_options.column_predicates.end(),
- read_context->predicates->begin(),
- read_context->predicates->end());
+ _read_options.column_predicates.insert(_read_options.column_predicates.end(),
+ read_context->predicates->begin(),
+ read_context->predicates->end());
for (auto pred : *(read_context->predicates)) {
- if (read_options.col_id_to_predicates.count(pred->column_id()) < 1) {
- read_options.col_id_to_predicates.insert(
+ if (_read_options.col_id_to_predicates.count(pred->column_id()) < 1) {
+ _read_options.col_id_to_predicates.insert(
{pred->column_id(), std::make_shared<AndBlockColumnPredicate>()});
}
auto single_column_block_predicate = new SingleColumnBlockPredicate(pred);
- read_options.col_id_to_predicates[pred->column_id()]->add_column_predicate(
+ _read_options.col_id_to_predicates[pred->column_id()]->add_column_predicate(
single_column_block_predicate);
}
}
@@ -124,32 +134,32 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
}
VLOG_TRACE << "Get the delete bitmap for rowset: " << rowset_id.to_string()
<< ", segment id:" << seg_id << ", size:" << d->cardinality();
- read_options.delete_bitmap.emplace(seg_id, std::move(d));
+ _read_options.delete_bitmap.emplace(seg_id, std::move(d));
}
}
if (_should_push_down_value_predicates()) {
if (read_context->value_predicates != nullptr) {
- read_options.column_predicates.insert(read_options.column_predicates.end(),
- read_context->value_predicates->begin(),
- read_context->value_predicates->end());
+ _read_options.column_predicates.insert(_read_options.column_predicates.end(),
+ read_context->value_predicates->begin(),
+ read_context->value_predicates->end());
for (auto pred : *(read_context->value_predicates)) {
- if (read_options.col_id_to_predicates.count(pred->column_id()) < 1) {
- read_options.col_id_to_predicates.insert(
+ if (_read_options.col_id_to_predicates.count(pred->column_id()) < 1) {
+ _read_options.col_id_to_predicates.insert(
{pred->column_id(), std::make_shared<AndBlockColumnPredicate>()});
}
auto single_column_block_predicate = new SingleColumnBlockPredicate(pred);
- read_options.col_id_to_predicates[pred->column_id()]->add_column_predicate(
+ _read_options.col_id_to_predicates[pred->column_id()]->add_column_predicate(
single_column_block_predicate);
}
}
}
- read_options.use_page_cache = read_context->use_page_cache;
- read_options.tablet_schema = read_context->tablet_schema;
- read_options.record_rowids = read_context->record_rowids;
- read_options.read_orderby_key_reverse = read_context->read_orderby_key_reverse;
- read_options.read_orderby_key_columns = read_context->read_orderby_key_columns;
- read_options.io_ctx.reader_type = read_context->reader_type;
+ _read_options.use_page_cache = read_context->use_page_cache;
+ _read_options.tablet_schema = read_context->tablet_schema;
+ _read_options.record_rowids = read_context->record_rowids;
+ _read_options.read_orderby_key_reverse = read_context->read_orderby_key_reverse;
+ _read_options.read_orderby_key_columns = read_context->read_orderby_key_columns;
+ _read_options.io_ctx.reader_type = read_context->reader_type;
// load segments
RETURN_NOT_OK(SegmentLoader::instance()->load_segments(
@@ -160,7 +170,7 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
std::vector<std::unique_ptr<RowwiseIterator>> seg_iterators;
for (auto& seg_ptr : _segment_cache_handle.get_segments()) {
std::unique_ptr<RowwiseIterator> iter;
- auto s = seg_ptr->new_iterator(*_input_schema, read_options, &iter);
+ auto s = seg_ptr->new_iterator(*_input_schema, _read_options, &iter);
if (!s.ok()) {
LOG(WARNING) << "failed to create iterator[" << seg_ptr->id() << "]: " << s.to_string();
return Status::OLAPInternalError(OLAP_ERR_ROWSET_READER_INIT);
@@ -168,11 +178,22 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
seg_iterators.push_back(std::move(iter));
}
- std::vector<RowwiseIterator*> iterators;
for (auto& owned_it : seg_iterators) {
+ auto st = owned_it->init(_read_options);
+ if (!st.ok()) {
+ LOG(WARNING) << "failed to init iterator: " << st.to_string();
+ return Status::OLAPInternalError(OLAP_ERR_ROWSET_READER_INIT);
+ }
// transfer ownership of segment iterator to `_iterator`
- iterators.push_back(owned_it.release());
+ out_iters->push_back(owned_it.release());
}
+ return Status::OK();
+}
+
+Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
+ _context = read_context;
+ std::vector<RowwiseIterator*> iterators;
+ RETURN_NOT_OK(get_segment_iterators(_context, &iterators));
// merge or union segment iterator
RowwiseIterator* final_iterator;
@@ -199,7 +220,7 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
}
}
- auto s = final_iterator->init(read_options);
+ auto s = final_iterator->init(_read_options);
if (!s.ok()) {
LOG(WARNING) << "failed to init iterator: " << s.to_string();
return Status::OLAPInternalError(OLAP_ERR_ROWSET_READER_INIT);
@@ -217,7 +238,7 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
}
// init input block
- if (can_reuse_schema && !has_nestable_fields) {
+ if (_can_reuse_schema && !has_nestable_fields) {
if (read_context->reuse_block == nullptr) {
read_context->reuse_block.reset(
new RowBlockV2(*_input_schema, std::min(1024, read_context->batch_size)));
diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h
index 0b0ef05fe0..e2f888cb12 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -35,6 +35,10 @@ public:
Status init(RowsetReaderContext* read_context) override;
+ Status get_segment_iterators(RowsetReaderContext* read_context,
+ std::vector<RowwiseIterator*>* out_iters) override;
+ void reset_read_options() override;
+
// It's ok, because we only get ref here, the block's owner is this reader.
Status next_block(RowBlock** block) override;
Status next_block(vectorized::Block* block) override;
@@ -91,6 +95,9 @@ private:
// make sure this handle is initialized and valid before
// reading data.
SegmentCacheHandle _segment_cache_handle;
+
+ StorageReadOptions _read_options;
+ bool _can_reuse_schema = true;
};
} // namespace doris
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp
index 512e4d85b1..6f8bb656c5 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -716,6 +716,27 @@ Status BetaRowsetWriter::_wait_flying_segcompaction() {
return Status::OK();
}
+RowsetSharedPtr BetaRowsetWriter::manual_build(const RowsetMetaSharedPtr& spec_rowset_meta) {
+ if (_rowset_meta->oldest_write_timestamp() == -1) {
+ _rowset_meta->set_oldest_write_timestamp(UnixSeconds());
+ }
+
+ if (_rowset_meta->newest_write_timestamp() == -1) {
+ _rowset_meta->set_newest_write_timestamp(UnixSeconds());
+ }
+
+ _build_rowset_meta_with_spec_field(_rowset_meta, spec_rowset_meta);
+ RowsetSharedPtr rowset;
+ auto status = RowsetFactory::create_rowset(_context.tablet_schema, _context.rowset_dir,
+ _rowset_meta, &rowset);
+ if (!status.ok()) {
+ LOG(WARNING) << "rowset init failed when build new rowset, res=" << status;
+ return nullptr;
+ }
+ _already_built = true;
+ return rowset;
+}
+
RowsetSharedPtr BetaRowsetWriter::build() {
// TODO(lingbin): move to more better place, or in a CreateBlockBatch?
for (auto& file_writer : _file_writers) {
@@ -770,6 +791,38 @@ RowsetSharedPtr BetaRowsetWriter::build() {
return rowset;
}
+bool BetaRowsetWriter::_is_segment_overlapping(
+ const std::vector<KeyBoundsPB>& segments_encoded_key_bounds) {
+ std::string last;
+ for (auto segment_encode_key : segments_encoded_key_bounds) {
+ auto cur_min = segment_encode_key.min_key();
+ auto cur_max = segment_encode_key.max_key();
+ if (cur_min < last) {
+ return true;
+ }
+ last = cur_max;
+ }
+ return false;
+}
+
+void BetaRowsetWriter::_build_rowset_meta_with_spec_field(
+ RowsetMetaSharedPtr rowset_meta, const RowsetMetaSharedPtr& spec_rowset_meta) {
+ rowset_meta->set_num_rows(spec_rowset_meta->num_rows());
+ rowset_meta->set_total_disk_size(spec_rowset_meta->total_disk_size());
+ rowset_meta->set_data_disk_size(spec_rowset_meta->total_disk_size());
+ rowset_meta->set_index_disk_size(spec_rowset_meta->index_disk_size());
+ // TODO write zonemap to meta
+ rowset_meta->set_empty(spec_rowset_meta->num_rows() == 0);
+ rowset_meta->set_creation_time(time(nullptr));
+ rowset_meta->set_num_segments(spec_rowset_meta->num_segments());
+ rowset_meta->set_segments_overlap(spec_rowset_meta->segments_overlap());
+ rowset_meta->set_rowset_state(spec_rowset_meta->rowset_state());
+
+ std::vector<KeyBoundsPB> segments_key_bounds;
+ spec_rowset_meta->get_segments_key_bounds(&segments_key_bounds);
+ rowset_meta->set_segments_key_bounds(segments_key_bounds);
+}
+
void BetaRowsetWriter::_build_rowset_meta(std::shared_ptr<RowsetMeta> rowset_meta) {
int64_t num_seg = _is_segcompacted() ? _num_segcompacted : _num_segment;
int64_t num_rows_written = 0;
@@ -787,16 +840,16 @@ void BetaRowsetWriter::_build_rowset_meta(std::shared_ptr<RowsetMeta> rowset_met
segments_encoded_key_bounds.push_back(itr.second.key_bounds);
}
}
- rowset_meta->set_num_segments(num_seg);
- if (num_seg <= 1) {
- rowset_meta->set_segments_overlap(NONOVERLAPPING);
- }
- _segment_num_rows = segment_num_rows;
for (auto itr = _segments_encoded_key_bounds.begin(); itr != _segments_encoded_key_bounds.end();
++itr) {
segments_encoded_key_bounds.push_back(*itr);
}
+ if (!_is_segment_overlapping(segments_encoded_key_bounds)) {
+ rowset_meta->set_segments_overlap(NONOVERLAPPING);
+ }
+ rowset_meta->set_num_segments(num_seg);
+ _segment_num_rows = segment_num_rows;
// TODO(zhangzhengyu): key_bounds.size() should equal num_seg, but currently not always
rowset_meta->set_num_rows(num_rows_written + _num_rows_written);
rowset_meta->set_total_disk_size(total_data_size + _total_data_size);
diff --git a/be/src/olap/rowset/beta_rowset_writer.h b/be/src/olap/rowset/beta_rowset_writer.h
index 65662ecf1a..7e1438761b 100644
--- a/be/src/olap/rowset/beta_rowset_writer.h
+++ b/be/src/olap/rowset/beta_rowset_writer.h
@@ -64,6 +64,8 @@ public:
// for this segment
RowsetSharedPtr build_tmp() override;
+ RowsetSharedPtr manual_build(const RowsetMetaSharedPtr& rowset_meta) override;
+
Version version() override { return _context.version; }
int64_t num_rows() const override { return _raw_num_rows_written; }
@@ -120,7 +122,11 @@ private:
Status _do_compact_segments(SegCompactionCandidatesSharedPtr segments);
-private:
+ void _build_rowset_meta_with_spec_field(RowsetMetaSharedPtr rowset_meta,
+ const RowsetMetaSharedPtr& spec_rowset_meta);
+ bool _is_segment_overlapping(const std::vector<KeyBoundsPB>& segments_encoded_key_bounds);
+
+protected:
RowsetWriterContext _context;
std::shared_ptr<RowsetMeta> _rowset_meta;
diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h
index 32fda0e102..1e06b4b425 100644
--- a/be/src/olap/rowset/rowset.h
+++ b/be/src/olap/rowset/rowset.h
@@ -161,6 +161,7 @@ public:
RowsetMetaPB get_rowset_pb() const { return rowset_meta()->get_rowset_pb(); }
int64_t oldest_write_timestamp() const { return rowset_meta()->oldest_write_timestamp(); }
int64_t newest_write_timestamp() const { return rowset_meta()->newest_write_timestamp(); }
+ bool is_segments_overlapping() const { return rowset_meta()->is_segments_overlapping(); }
KeysType keys_type() { return _schema->keys_type(); }
// remove all files in this rowset
@@ -198,7 +199,8 @@ public:
}
// hard link all files in this rowset to `dir` to form a new rowset with id `new_rowset_id`.
- virtual Status link_files_to(const std::string& dir, RowsetId new_rowset_id) = 0;
+ virtual Status link_files_to(const std::string& dir, RowsetId new_rowset_id,
+ size_t new_rowset_start_seg_id = 0) = 0;
// copy all files to `dir`
virtual Status copy_files_to(const std::string& dir, const RowsetId& new_rowset_id) = 0;
@@ -265,6 +267,24 @@ public:
_rowset_meta->get_segments_key_bounds(segments_key_bounds);
return Status::OK();
}
+ bool min_key(std::string* min_key) {
+ KeyBoundsPB key_bounds;
+ bool ret = _rowset_meta->get_first_segment_key_bound(&key_bounds);
+ if (!ret) {
+ return false;
+ }
+ *min_key = key_bounds.min_key();
+ return true;
+ }
+ bool max_key(std::string* max_key) {
+ KeyBoundsPB key_bounds;
+ bool ret = _rowset_meta->get_last_segment_key_bound(&key_bounds);
+ if (!ret) {
+ return false;
+ }
+ *max_key = key_bounds.max_key();
+ return true;
+ }
bool check_rowset_segment();
diff --git a/be/src/olap/rowset/rowset_factory.cpp b/be/src/olap/rowset/rowset_factory.cpp
index 9c8c75b2dc..fcbfb5fd61 100644
--- a/be/src/olap/rowset/rowset_factory.cpp
+++ b/be/src/olap/rowset/rowset_factory.cpp
@@ -23,6 +23,7 @@
#include "gen_cpp/olap_file.pb.h"
#include "olap/rowset/beta_rowset_writer.h"
#include "olap/rowset/rowset_writer.h"
+#include "olap/rowset/vertical_beta_rowset_writer.h"
namespace doris {
@@ -38,12 +39,16 @@ Status RowsetFactory::create_rowset(TabletSchemaSPtr schema, const std::string&
return Status::OLAPInternalError(OLAP_ERR_ROWSET_TYPE_NOT_FOUND); // should never happen
}
-Status RowsetFactory::create_rowset_writer(const RowsetWriterContext& context,
+Status RowsetFactory::create_rowset_writer(const RowsetWriterContext& context, bool is_vertical,
std::unique_ptr<RowsetWriter>* output) {
if (context.rowset_type == ALPHA_ROWSET) {
return Status::OLAPInternalError(OLAP_ERR_ROWSET_INVALID);
}
if (context.rowset_type == BETA_ROWSET) {
+ if (is_vertical) {
+ output->reset(new VerticalBetaRowsetWriter);
+ return (*output)->init(context);
+ }
output->reset(new BetaRowsetWriter);
return (*output)->init(context);
}
diff --git a/be/src/olap/rowset/rowset_factory.h b/be/src/olap/rowset/rowset_factory.h
index e216b81f52..72d2e76cf8 100644
--- a/be/src/olap/rowset/rowset_factory.h
+++ b/be/src/olap/rowset/rowset_factory.h
@@ -37,7 +37,7 @@ public:
// create and init rowset writer.
// return OLAP_SUCCESS and set `*output` to inited rowset writer.
// return others if failed
- static Status create_rowset_writer(const RowsetWriterContext& context,
+ static Status create_rowset_writer(const RowsetWriterContext& context, bool is_vertical,
std::unique_ptr<RowsetWriter>* output);
};
diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h
index 3c760dae83..1a7075ef65 100644
--- a/be/src/olap/rowset/rowset_meta.h
+++ b/be/src/olap/rowset/rowset_meta.h
@@ -325,6 +325,22 @@ public:
segments_key_bounds->push_back(key_range);
}
}
+ virtual bool get_first_segment_key_bound(KeyBoundsPB* key_bounds) {
+ // for compatibility, old version has not segment key bounds
+ if (_rowset_meta_pb.segments_key_bounds_size() == 0) {
+ return false;
+ }
+ *key_bounds = _rowset_meta_pb.segments_key_bounds(0);
+ return true;
+ }
+ virtual bool get_last_segment_key_bound(KeyBoundsPB* key_bounds) {
+ if (_rowset_meta_pb.segments_key_bounds_size() == 0) {
+ return false;
+ }
+ *key_bounds =
+ _rowset_meta_pb.segments_key_bounds(_rowset_meta_pb.segments_key_bounds_size() - 1);
+ return true;
+ }
void set_segments_key_bounds(const std::vector<KeyBoundsPB>& segments_key_bounds) {
for (const KeyBoundsPB& key_bounds : segments_key_bounds) {
diff --git a/be/src/olap/rowset/rowset_reader.h b/be/src/olap/rowset/rowset_reader.h
index f9a1acc0d8..a189ef73dc 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -22,6 +22,7 @@
#include <unordered_map>
#include "gen_cpp/olap_file.pb.h"
+#include "olap/iterators.h"
#include "olap/rowset/rowset.h"
#include "olap/rowset/rowset_reader_context.h"
#include "vec/core/block.h"
@@ -43,6 +44,10 @@ public:
// reader init
virtual Status init(RowsetReaderContext* read_context) = 0;
+ virtual Status get_segment_iterators(RowsetReaderContext* read_context,
+ std::vector<RowwiseIterator*>* out_iters) = 0;
+ virtual void reset_read_options() = 0;
+
// read next block data into *block.
// Returns
// OLAP_SUCCESS when read successfully.
diff --git a/be/src/olap/rowset/rowset_reader_context.h b/be/src/olap/rowset/rowset_reader_context.h
index ce2fd4b721..31b115ae33 100644
--- a/be/src/olap/rowset/rowset_reader_context.h
+++ b/be/src/olap/rowset/rowset_reader_context.h
@@ -66,6 +66,8 @@ struct RowsetReaderContext {
const DeleteBitmap* delete_bitmap = nullptr;
bool record_rowids = false;
std::shared_ptr<RowBlockV2> reuse_block;
+ bool is_vertical_compaction = false;
+ bool is_key_column_group = false;
std::shared_ptr<Schema> reuse_input_schema;
};
diff --git a/be/src/olap/rowset/rowset_writer.h b/be/src/olap/rowset/rowset_writer.h
index 2713b3c60c..531dd103c8 100644
--- a/be/src/olap/rowset/rowset_writer.h
+++ b/be/src/olap/rowset/rowset_writer.h
@@ -45,6 +45,10 @@ public:
virtual Status add_block(const vectorized::Block* block) {
return Status::OLAPInternalError(OLAP_ERR_FUNC_NOT_IMPLEMENTED);
}
+ virtual Status add_columns(const vectorized::Block* block, const std::vector<uint32_t>& col_ids,
+ bool is_key, uint32_t max_rows_per_segment) {
+ return Status::OLAPInternalError(OLAP_ERR_FUNC_NOT_IMPLEMENTED);
+ }
// Precondition: the input `rowset` should have the same type of the rowset we're building
virtual Status add_rowset(RowsetSharedPtr rowset) = 0;
@@ -55,6 +59,12 @@ public:
// explicit flush all buffered rows into segment file.
// note that `add_row` could also trigger flush when certain conditions are met
virtual Status flush() = 0;
+ virtual Status flush_columns() {
+ return Status::OLAPInternalError(OLAP_ERR_FUNC_NOT_IMPLEMENTED);
+ }
+ virtual Status final_flush() {
+ return Status::OLAPInternalError(OLAP_ERR_FUNC_NOT_IMPLEMENTED);
+ }
virtual Status flush_single_memtable(MemTable* memtable, int64_t* flush_size) {
return Status::OLAPInternalError(OLAP_ERR_FUNC_NOT_IMPLEMENTED);
@@ -72,6 +82,9 @@ public:
// real build will be called in DeltaWriter close_wait.
virtual RowsetSharedPtr build_tmp() = 0;
+ // For ordered rowset compaction, manual build rowset
+ virtual RowsetSharedPtr manual_build(const RowsetMetaSharedPtr& rowset_meta) = 0;
+
virtual Version version() = 0;
virtual int64_t num_rows() const = 0;
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index f644ae87b8..1efe66f97a 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -49,14 +49,11 @@ SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id,
_opts(opts),
_file_writer(file_writer),
_mem_tracker(std::make_unique<MemTracker>("SegmentWriter:Segment-" +
- std::to_string(segment_id))),
- _olap_data_convertor(tablet_schema.get()) {
+ std::to_string(segment_id))) {
CHECK_NOTNULL(file_writer);
- if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
- _num_key_columns = _tablet_schema->num_key_columns();
- } else {
- _num_key_columns = _tablet_schema->num_short_key_columns();
- }
+ _num_key_columns = _tablet_schema->num_key_columns();
+ _num_short_key_columns = _tablet_schema->num_short_key_columns();
+ DCHECK(_num_key_columns >= _num_short_key_columns);
for (size_t cid = 0; cid < _num_key_columns; ++cid) {
const auto& column = _tablet_schema->column(cid);
_key_coders.push_back(get_key_coder(column.type()));
@@ -74,10 +71,9 @@ SegmentWriter::~SegmentWriter() {
_mem_tracker->release(_mem_tracker->consumption());
}
-void SegmentWriter::init_column_meta(ColumnMetaPB* meta, uint32_t* column_id,
+void SegmentWriter::init_column_meta(ColumnMetaPB* meta, uint32_t column_id,
const TabletColumn& column, TabletSchemaSPtr tablet_schema) {
- // TODO(zc): Do we need this column_id??
- meta->set_column_id((*column_id)++);
+ meta->set_column_id(column_id);
meta->set_unique_id(column.unique_id());
meta->set_type(column.type());
meta->set_length(column.length());
@@ -91,13 +87,25 @@ void SegmentWriter::init_column_meta(ColumnMetaPB* meta, uint32_t* column_id,
}
Status SegmentWriter::init() {
- uint32_t column_id = 0;
+ std::vector<uint32_t> column_ids;
+ for (uint32_t i = 0; i < _tablet_schema->num_columns(); ++i) {
+ column_ids.emplace_back(i);
+ }
+ return init(column_ids, true);
+}
+
+Status SegmentWriter::init(const std::vector<uint32_t>& col_ids, bool has_key) {
+ DCHECK(_column_writers.empty());
+ DCHECK(_column_ids.empty());
+ _has_key = has_key;
_column_writers.reserve(_tablet_schema->columns().size());
- for (auto& column : _tablet_schema->columns()) {
+ _column_ids.insert(_column_ids.end(), col_ids.begin(), col_ids.end());
+ for (auto& cid : col_ids) {
+ const auto& column = _tablet_schema->column(cid);
ColumnWriterOptions opts;
opts.meta = _footer.add_columns();
- init_column_meta(opts.meta, &column_id, column, _tablet_schema);
+ init_column_meta(opts.meta, cid, column, _tablet_schema);
// now we create zone map for key columns in AGG_KEYS or all column in UNIQUE_KEYS or DUP_KEYS
// and not support zone map for array type and jsonb type.
@@ -130,18 +138,24 @@ Status SegmentWriter::init() {
}
// we don't need the short key index for unique key merge on write table.
- if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
- size_t seq_col_length = 0;
- if (_tablet_schema->has_sequence_col()) {
- seq_col_length =
- _tablet_schema->column(_tablet_schema->sequence_col_idx()).length() + 1;
+ if (_has_key) {
+ if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
+ size_t seq_col_length = 0;
+ if (_tablet_schema->has_sequence_col()) {
+ seq_col_length =
+ _tablet_schema->column(_tablet_schema->sequence_col_idx()).length() + 1;
+ }
+ _primary_key_index_builder.reset(
+ new PrimaryKeyIndexBuilder(_file_writer, seq_col_length));
+ RETURN_IF_ERROR(_primary_key_index_builder->init());
+ } else {
+ _short_key_index_builder.reset(
+ new ShortKeyIndexBuilder(_segment_id, _opts.num_rows_per_block));
}
- _primary_key_index_builder.reset(new PrimaryKeyIndexBuilder(_file_writer, seq_col_length));
- RETURN_IF_ERROR(_primary_key_index_builder->init());
- } else {
- _short_key_index_builder.reset(
- new ShortKeyIndexBuilder(_segment_id, _opts.num_rows_per_block));
}
+ // init olap data converter
+ _olap_data_convertor =
+ std::make_unique<vectorized::OlapBlockDataConvertor>(_tablet_schema.get(), _column_ids);
return Status::OK();
}
@@ -149,78 +163,88 @@ Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_po
size_t num_rows) {
assert(block && num_rows > 0 && row_pos + num_rows <= block->rows() &&
block->columns() == _column_writers.size());
- _olap_data_convertor.set_source_content(block, row_pos, num_rows);
+ _olap_data_convertor->set_source_content(block, row_pos, num_rows);
// find all row pos for short key indexes
std::vector<size_t> short_key_pos;
- // We build a short key index every `_opts.num_rows_per_block` rows. Specifically, we
- // build a short key index using 1st rows for first block and `_short_key_row_pos - _row_count`
- // for next blocks.
- // Ensure we build a short key index using 1st rows only for the first block (ISSUE-9766).
- if (UNLIKELY(_short_key_row_pos == 0 && _row_count == 0)) {
- short_key_pos.push_back(0);
- }
- while (_short_key_row_pos + _opts.num_rows_per_block < _row_count + num_rows) {
- _short_key_row_pos += _opts.num_rows_per_block;
- short_key_pos.push_back(_short_key_row_pos - _row_count);
+ if (_has_key) {
+ // We build a short key index every `_opts.num_rows_per_block` rows. Specifically, we
+ // build a short key index using 1st rows for first block and `_short_key_row_pos - _row_count`
+ // for next blocks.
+ // Ensure we build a short key index using 1st rows only for the first block (ISSUE-9766).
+ if (UNLIKELY(_short_key_row_pos == 0 && _num_rows_written == 0)) {
+ short_key_pos.push_back(0);
+ }
+ while (_short_key_row_pos + _opts.num_rows_per_block < _num_rows_written + num_rows) {
+ _short_key_row_pos += _opts.num_rows_per_block;
+ short_key_pos.push_back(_short_key_row_pos - _num_rows_written);
+ }
}
// convert column data from engine format to storage layer format
std::vector<vectorized::IOlapColumnDataAccessor*> key_columns;
- for (size_t cid = 0; cid < _column_writers.size(); ++cid) {
- auto converted_result = _olap_data_convertor.convert_column_data(cid);
+ for (size_t id = 0; id < _column_writers.size(); ++id) {
+ // olap data convertor alway start from id = 0
+ auto converted_result = _olap_data_convertor->convert_column_data(id);
if (converted_result.first != Status::OK()) {
return converted_result.first;
}
- if (cid < _num_key_columns ||
- (_tablet_schema->has_sequence_col() && _tablet_schema->keys_type() == UNIQUE_KEYS &&
- _opts.enable_unique_key_merge_on_write && cid == _tablet_schema->sequence_col_idx())) {
+ auto cid = _column_ids[id];
+ if (_has_key && (cid < _num_key_columns || (_tablet_schema->has_sequence_col() &&
+ _tablet_schema->keys_type() == UNIQUE_KEYS &&
+ _opts.enable_unique_key_merge_on_write &&
+ cid == _tablet_schema->sequence_col_idx()))) {
key_columns.push_back(converted_result.second);
}
- RETURN_IF_ERROR(_column_writers[cid]->append(converted_result.second->get_nullmap(),
- converted_result.second->get_data(),
- num_rows));
+ RETURN_IF_ERROR(_column_writers[id]->append(converted_result.second->get_nullmap(),
+ converted_result.second->get_data(), num_rows));
}
-
- if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
- // create primary indexes
- for (size_t pos = 0; pos < num_rows; pos++) {
- RETURN_IF_ERROR(_primary_key_index_builder->add_item(_encode_keys(key_columns, pos)));
- }
- } else {
- // create short key indexes
- for (const auto pos : short_key_pos) {
- RETURN_IF_ERROR(_short_key_index_builder->add_item(_encode_keys(key_columns, pos)));
+ if (_has_key) {
+ if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
+ // create primary indexes
+ for (size_t pos = 0; pos < num_rows; pos++) {
+ RETURN_IF_ERROR(
+ _primary_key_index_builder->add_item(_full_encode_keys(key_columns, pos)));
+ }
+ } else {
+ // create short key indexes'
+ // for min_max key
+ set_min_key(_full_encode_keys(key_columns, 0));
+ set_max_key(_full_encode_keys(key_columns, num_rows - 1));
+
+ key_columns.resize(_num_short_key_columns);
+ for (const auto pos : short_key_pos) {
+ RETURN_IF_ERROR(_short_key_index_builder->add_item(_encode_keys(key_columns, pos)));
+ }
}
}
- _row_count += num_rows;
- _olap_data_convertor.clear_source_content();
+ _num_rows_written += num_rows;
+ _olap_data_convertor->clear_source_content();
return Status::OK();
}
int64_t SegmentWriter::max_row_to_add(size_t row_avg_size_in_bytes) {
auto segment_size = estimate_segment_size();
- if (PREDICT_FALSE(segment_size >= MAX_SEGMENT_SIZE || _row_count >= _max_row_per_segment)) {
+ if (PREDICT_FALSE(segment_size >= MAX_SEGMENT_SIZE ||
+ _num_rows_written >= _max_row_per_segment)) {
return 0;
}
int64_t size_rows = ((int64_t)MAX_SEGMENT_SIZE - (int64_t)segment_size) / row_avg_size_in_bytes;
- int64_t count_rows = (int64_t)_max_row_per_segment - _row_count;
+ int64_t count_rows = (int64_t)_max_row_per_segment - _num_rows_written;
return std::min(size_rows, count_rows);
}
-std::string SegmentWriter::_encode_keys(
+std::string SegmentWriter::_full_encode_keys(
const std::vector<vectorized::IOlapColumnDataAccessor*>& key_columns, size_t pos,
bool null_first) {
- if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write &&
- _tablet_schema->has_sequence_col()) {
+ assert(_key_index_size.size() == _num_key_columns);
+ if (_tablet_schema->has_sequence_col() && _opts.enable_unique_key_merge_on_write) {
assert(key_columns.size() == _num_key_columns + 1 &&
- _key_coders.size() == _num_key_columns + 1 &&
- _key_index_size.size() == _num_key_columns);
+ _key_coders.size() == _num_key_columns + 1);
} else {
- assert(key_columns.size() == _num_key_columns && _key_coders.size() == _num_key_columns &&
- _key_index_size.size() == _num_key_columns);
+ assert(key_columns.size() == _num_key_columns && _key_coders.size() == _num_key_columns);
}
std::string encoded_keys;
@@ -237,11 +261,32 @@ std::string SegmentWriter::_encode_keys(
continue;
}
encoded_keys.push_back(KEY_NORMAL_MARKER);
- if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
- _key_coders[cid]->full_encode_ascending(field, &encoded_keys);
- } else {
- _key_coders[cid]->encode_ascending(field, _key_index_size[cid], &encoded_keys);
+ _key_coders[cid]->full_encode_ascending(field, &encoded_keys);
+ ++cid;
+ }
+ return encoded_keys;
+}
+
+std::string SegmentWriter::_encode_keys(
+ const std::vector<vectorized::IOlapColumnDataAccessor*>& key_columns, size_t pos,
+ bool null_first) {
+ assert(key_columns.size() == _num_short_key_columns);
+
+ std::string encoded_keys;
+ size_t cid = 0;
+ for (const auto& column : key_columns) {
+ auto field = column->get_data_at(pos);
+ if (UNLIKELY(!field)) {
+ if (null_first) {
+ encoded_keys.push_back(KEY_NULL_FIRST_MARKER);
+ } else {
+ encoded_keys.push_back(KEY_NULL_LAST_MARKER);
+ }
+ ++cid;
+ continue;
}
+ encoded_keys.push_back(KEY_NORMAL_MARKER);
+ _key_coders[cid]->encode_ascending(field, _key_index_size[cid], &encoded_keys);
++cid;
}
return encoded_keys;
@@ -253,26 +298,27 @@ Status SegmentWriter::append_row(const RowType& row) {
auto cell = row.cell(cid);
RETURN_IF_ERROR(_column_writers[cid]->append(cell));
}
+ std::string full_encoded_key;
+ encode_key<RowType, true, true>(&full_encoded_key, row, _num_key_columns);
+ if (_tablet_schema->has_sequence_col()) {
+ full_encoded_key.push_back(KEY_NORMAL_MARKER);
+ auto cid = _tablet_schema->sequence_col_idx();
+ auto cell = row.cell(cid);
+ row.schema()->column(cid)->full_encode_ascending(cell.cell_ptr(), &full_encoded_key);
+ }
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
- std::string encoded_key;
- encode_key<RowType, true, true>(&encoded_key, row, _num_key_columns);
- if (_tablet_schema->has_sequence_col()) {
- encoded_key.push_back(KEY_NORMAL_MARKER);
- auto cid = _tablet_schema->sequence_col_idx();
- auto cell = row.cell(cid);
- row.schema()->column(cid)->full_encode_ascending(cell.cell_ptr(), &encoded_key);
- }
- RETURN_IF_ERROR(_primary_key_index_builder->add_item(encoded_key));
+ RETURN_IF_ERROR(_primary_key_index_builder->add_item(full_encoded_key));
} else {
// At the beginning of one block, so add a short key index entry
- if ((_row_count % _opts.num_rows_per_block) == 0) {
+ if ((_num_rows_written % _opts.num_rows_per_block) == 0) {
std::string encoded_key;
- encode_key(&encoded_key, row, _num_key_columns);
+ encode_key(&encoded_key, row, _num_short_key_columns);
RETURN_IF_ERROR(_short_key_index_builder->add_item(encoded_key));
}
+ set_min_max_key(full_encoded_key);
}
- ++_row_count;
+ ++_num_rows_written;
return Status::OK();
}
@@ -300,11 +346,14 @@ uint64_t SegmentWriter::estimate_segment_size() {
return size;
}
-Status SegmentWriter::finalize(uint64_t* segment_file_size, uint64_t* index_size) {
- // check disk capacity
- if (_data_dir != nullptr && _data_dir->reach_capacity_limit((int64_t)estimate_segment_size())) {
- return Status::InternalError("disk {} exceed capacity limit.", _data_dir->path_hash());
+Status SegmentWriter::finalize_columns(uint64_t* index_size) {
+ if (_has_key) {
+ _row_count = _num_rows_written;
+ } else {
+ CHECK_EQ(_row_count, _num_rows_written);
}
+ _num_rows_written = 0;
+
for (auto& column_writer : _column_writers) {
RETURN_IF_ERROR(column_writer->finish());
}
@@ -314,18 +363,50 @@ Status SegmentWriter::finalize(uint64_t* segment_file_size, uint64_t* index_size
RETURN_IF_ERROR(_write_zone_map());
RETURN_IF_ERROR(_write_bitmap_index());
RETURN_IF_ERROR(_write_bloom_filter_index());
- if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
- RETURN_IF_ERROR(_write_primary_key_index());
- } else {
- RETURN_IF_ERROR(_write_short_key_index());
- }
+
*index_size = _file_writer->bytes_appended() - index_offset;
+ if (_has_key) {
+ if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
+ RETURN_IF_ERROR(_write_primary_key_index());
+ } else {
+ RETURN_IF_ERROR(_write_short_key_index());
+ }
+ *index_size = _file_writer->bytes_appended() - index_offset;
+ }
+ // reset all column writers and data_conveter
+ _reset_column_writers();
+ _column_ids.clear();
+ _olap_data_convertor.reset();
+ return Status::OK();
+}
+
+Status SegmentWriter::finalize_footer(uint64_t* segment_file_size) {
RETURN_IF_ERROR(_write_footer());
RETURN_IF_ERROR(_file_writer->finalize());
*segment_file_size = _file_writer->bytes_appended();
return Status::OK();
}
+Status SegmentWriter::finalize(uint64_t* segment_file_size, uint64_t* index_size) {
+ // check disk capacity
+ if (_data_dir != nullptr && _data_dir->reach_capacity_limit((int64_t)estimate_segment_size())) {
+ return Status::InternalError("disk {} exceed capacity limit.", _data_dir->path_hash());
+ }
+
+ RETURN_IF_ERROR(finalize_columns(index_size));
+
+ // writer footer
+ RETURN_IF_ERROR(finalize_footer(segment_file_size));
+ return Status::OK();
+}
+
+void SegmentWriter::_reset_column_writers() {
+ for (auto& column_writer : _column_writers) {
+ column_writer.reset();
+ }
+ _column_writers.clear();
+}
+
// write column data to file one by one
Status SegmentWriter::_write_data() {
for (auto& column_writer : _column_writers) {
@@ -408,13 +489,36 @@ Status SegmentWriter::_write_raw_data(const std::vector<Slice>& slices) {
}
Slice SegmentWriter::min_encoded_key() {
- return (_primary_key_index_builder == nullptr) ? Slice()
+ return (_primary_key_index_builder == nullptr) ? Slice(_min_key.data(), _min_key.size())
: _primary_key_index_builder->min_key();
}
Slice SegmentWriter::max_encoded_key() {
- return (_primary_key_index_builder == nullptr) ? Slice()
+ return (_primary_key_index_builder == nullptr) ? Slice(_max_key.data(), _max_key.size())
: _primary_key_index_builder->max_key();
}
+void SegmentWriter::set_min_max_key(const Slice& key) {
+ if (UNLIKELY(_is_first_row)) {
+ _min_key.append(key.get_data(), key.get_size());
+ _is_first_row = false;
+ }
+ if (key.compare(_max_key) > 0) {
+ _max_key.clear();
+ _max_key.append(key.get_data(), key.get_size());
+ }
+}
+
+void SegmentWriter::set_min_key(const Slice& key) {
+ if (UNLIKELY(_is_first_row)) {
+ _min_key.append(key.get_data(), key.get_size());
+ _is_first_row = false;
+ }
+}
+
+void SegmentWriter::set_max_key(const Slice& key) {
+ _max_key.clear();
+ _max_key.append(key.get_data(), key.get_size());
+}
+
} // namespace segment_v2
} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h
index 4d81dbb2a9..9e15356ff0 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -26,6 +26,7 @@
#include "gen_cpp/segment_v2.pb.h"
#include "gutil/macros.h"
#include "olap/tablet_schema.h"
+#include "util/faststring.h"
#include "vec/core/block.h"
#include "vec/olap/olap_data_convertor.h"
@@ -69,6 +70,9 @@ public:
Status init();
+ // for vertical compaction
+ Status init(const std::vector<uint32_t>& col_ids, bool has_key);
+
template <typename RowType>
Status append_row(const RowType& row);
@@ -78,15 +82,18 @@ public:
uint64_t estimate_segment_size();
- uint32_t num_rows_written() const { return _row_count; }
+ uint32_t num_rows_written() const { return _num_rows_written; }
+ uint32_t row_count() const { return _row_count; }
Status finalize(uint64_t* segment_file_size, uint64_t* index_size);
- static void init_column_meta(ColumnMetaPB* meta, uint32_t* column_id,
- const TabletColumn& column, TabletSchemaSPtr tablet_schema);
+ uint32_t get_segment_id() { return _segment_id; }
- uint32_t get_segment_id() const { return _segment_id; }
+ Status finalize_columns(uint64_t* index_size);
+ Status finalize_footer(uint64_t* segment_file_size);
+ static void init_column_meta(ColumnMetaPB* meta, uint32_t column_id, const TabletColumn& column,
+ TabletSchemaSPtr tablet_schema);
Slice min_encoded_key();
Slice max_encoded_key();
@@ -106,6 +113,15 @@ private:
Status _write_raw_data(const std::vector<Slice>& slices);
std::string _encode_keys(const std::vector<vectorized::IOlapColumnDataAccessor*>& key_columns,
size_t pos, bool null_first = true);
+ // for unique-key merge on write and segment min_max key
+ std::string _full_encode_keys(
+ const std::vector<vectorized::IOlapColumnDataAccessor*>& key_columns, size_t pos,
+ bool null_first = true);
+ void set_min_max_key(const Slice& key);
+ void set_min_key(const Slice& key);
+ void set_max_key(const Slice& key);
+
+ void _reset_column_writers();
private:
uint32_t _segment_id;
@@ -119,17 +135,30 @@ private:
SegmentFooterPB _footer;
size_t _num_key_columns;
+ size_t _num_short_key_columns;
std::unique_ptr<ShortKeyIndexBuilder> _short_key_index_builder;
std::unique_ptr<PrimaryKeyIndexBuilder> _primary_key_index_builder;
std::vector<std::unique_ptr<ColumnWriter>> _column_writers;
std::unique_ptr<MemTracker> _mem_tracker;
- uint32_t _row_count = 0;
- vectorized::OlapBlockDataConvertor _olap_data_convertor;
+ std::unique_ptr<vectorized::OlapBlockDataConvertor> _olap_data_convertor;
// used for building short key index or primary key index during vectorized write.
std::vector<const KeyCoder*> _key_coders;
std::vector<uint16_t> _key_index_size;
size_t _short_key_row_pos = 0;
+
+ std::vector<uint32_t> _column_ids;
+ bool _has_key = true;
+ // _num_rows_written means row count already written in this current column group
+ uint32_t _num_rows_written = 0;
+ // _row_count means total row count of this segment
+ // In vertical compaction row count is recorded when key columns group finish
+ // and _num_rows_written will be updated in value column group
+ uint32_t _row_count = 0;
+
+ bool _is_first_row = true;
+ faststring _min_key;
+ faststring _max_key;
};
} // namespace segment_v2
diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
new file mode 100644
index 0000000000..50324c8dd3
--- /dev/null
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp
@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/vertical_beta_rowset_writer.h"
+
+#include "olap/rowset/beta_rowset.h"
+
+namespace doris {
+
+VerticalBetaRowsetWriter::~VerticalBetaRowsetWriter() {
+ if (!_already_built) {
+ auto fs = _rowset_meta->fs();
+ if (!fs) {
+ return;
+ }
+ for (auto& segment_writer : _segment_writers) {
+ segment_writer.reset();
+ }
+ for (int i = 0; i < _num_segment; ++i) {
+ auto path = BetaRowset::segment_file_path(_context.rowset_dir, _context.rowset_id, i);
+ // Even if an error is encountered, these files that have not been cleaned up
+ // will be cleaned up by the GC background. So here we only print the error
+ // message when we encounter an error.
+ WARN_IF_ERROR(fs->delete_file(path),
+ strings::Substitute("Failed to delete file=$0", path));
+ }
+ }
+}
+
+Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block,
+ const std::vector<uint32_t>& col_ids, bool is_key,
+ uint32_t max_rows_per_segment) {
+ VLOG_NOTICE << "VerticalBetaRowsetWriter::add_columns, columns: " << block->columns();
+ size_t num_rows = block->rows();
+ if (num_rows == 0) {
+ return Status::OK();
+ }
+ if (UNLIKELY(max_rows_per_segment > _context.max_rows_per_segment)) {
+ max_rows_per_segment = _context.max_rows_per_segment;
+ }
+ if (_segment_writers.empty()) {
+ // it must be key columns
+ DCHECK(is_key);
+ std::unique_ptr<segment_v2::SegmentWriter> writer;
+ RETURN_IF_ERROR(_create_segment_writer(col_ids, is_key, &writer));
+ _segment_writers.emplace_back(std::move(writer));
+ _cur_writer_idx = 0;
+ RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, 0, num_rows));
+ } else if (is_key) {
+ if (_segment_writers[_cur_writer_idx]->num_rows_written() > max_rows_per_segment) {
+ // segment is full, need flush columns and create new segment writer
+ RETURN_IF_ERROR(_flush_columns(&_segment_writers[_cur_writer_idx], true));
+ std::unique_ptr<segment_v2::SegmentWriter> writer;
+ RETURN_IF_ERROR(_create_segment_writer(col_ids, is_key, &writer));
+ _segment_writers.emplace_back(std::move(writer));
+ ++_cur_writer_idx;
+ }
+ RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, 0, num_rows));
+ } else {
+ // value columns
+ uint32_t num_rows_written = _segment_writers[_cur_writer_idx]->num_rows_written();
+ VLOG_NOTICE << "num_rows_written: " << num_rows_written
+ << ", _cur_writer_idx: " << _cur_writer_idx;
+ // init if it's first value column write in current segment
+ if (_cur_writer_idx == 0 && num_rows_written == 0) {
+ VLOG_NOTICE << "init first value column segment writer";
+ RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key));
+ }
+ if (num_rows_written > max_rows_per_segment) {
+ RETURN_IF_ERROR(_flush_columns(&_segment_writers[_cur_writer_idx]));
+ // switch to next writer
+ ++_cur_writer_idx;
+ VLOG_NOTICE << "init next value column segment writer: " << _cur_writer_idx;
+ RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key));
+ }
+ RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, 0, num_rows));
+ }
+ if (is_key) {
+ _num_rows_written += num_rows;
+ }
+ return Status::OK();
+}
+
+Status VerticalBetaRowsetWriter::_flush_columns(
+ std::unique_ptr<segment_v2::SegmentWriter>* segment_writer, bool is_key) {
+ uint64_t index_size = 0;
+ VLOG_NOTICE << "flush columns index: " << _cur_writer_idx;
+ RETURN_IF_ERROR((*segment_writer)->finalize_columns(&index_size));
+ if (is_key) {
+ // record segment key bound
+ KeyBoundsPB key_bounds;
+ Slice min_key = (*segment_writer)->min_encoded_key();
+ Slice max_key = (*segment_writer)->max_encoded_key();
+ DCHECK_LE(min_key.compare(max_key), 0);
+ key_bounds.set_min_key(min_key.to_string());
+ key_bounds.set_max_key(max_key.to_string());
+ _segments_encoded_key_bounds.emplace_back(key_bounds);
+ }
+ _total_index_size += static_cast<int64_t>(index_size);
+ return Status::OK();
+}
+
+Status VerticalBetaRowsetWriter::flush_columns() {
+ if (_segment_writers.empty()) {
+ return Status::OK();
+ }
+
+ DCHECK(_segment_writers[_cur_writer_idx]);
+ RETURN_IF_ERROR(_flush_columns(&_segment_writers[_cur_writer_idx]));
+ _cur_writer_idx = 0;
+ return Status::OK();
+}
+
+Status VerticalBetaRowsetWriter::_create_segment_writer(
+ const std::vector<uint32_t>& column_ids, bool is_key,
+ std::unique_ptr<segment_v2::SegmentWriter>* writer) {
+ auto path =
+ BetaRowset::segment_file_path(_context.rowset_dir, _context.rowset_id, _num_segment++);
+ auto fs = _rowset_meta->fs();
+ if (!fs) {
+ return Status::OLAPInternalError(OLAP_ERR_INIT_FAILED);
+ }
+ io::FileWriterPtr file_writer;
+ Status st = fs->create_file(path, &file_writer);
+ if (!st.ok()) {
+ LOG(WARNING) << "failed to create writable file. path=" << path
+ << ", err: " << st.get_error_msg();
+ return st;
+ }
+
+ DCHECK(file_writer != nullptr);
+ segment_v2::SegmentWriterOptions writer_options;
+ writer_options.enable_unique_key_merge_on_write = _context.enable_unique_key_merge_on_write;
+ writer->reset(new segment_v2::SegmentWriter(file_writer.get(), _num_segment,
+ _context.tablet_schema, _context.data_dir,
+ _context.max_rows_per_segment, writer_options));
+ {
+ std::lock_guard<SpinLock> l(_lock);
+ _file_writers.push_back(std::move(file_writer));
+ }
+
+ auto s = (*writer)->init(column_ids, is_key);
+ if (!s.ok()) {
+ LOG(WARNING) << "failed to init segment writer: " << s.to_string();
+ writer->reset(nullptr);
+ return s;
+ }
+ return Status::OK();
+}
+
+Status VerticalBetaRowsetWriter::final_flush() {
+ for (auto& segment_writer : _segment_writers) {
+ uint64_t segment_size = 0;
+ //uint64_t footer_position = 0;
+ auto st = segment_writer->finalize_footer(&segment_size);
+ if (!st.ok()) {
+ LOG(WARNING) << "Fail to finalize segment footer, " << st;
+ return st;
+ }
+ _total_data_size += segment_size;
+ segment_writer.reset();
+ }
+ return Status::OK();
+}
+
+} // namespace doris
diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.h b/be/src/olap/rowset/vertical_beta_rowset_writer.h
new file mode 100644
index 0000000000..2c055ecb41
--- /dev/null
+++ b/be/src/olap/rowset/vertical_beta_rowset_writer.h
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "olap/rowset/beta_rowset_writer.h"
+#include "olap/rowset/segment_v2/segment_writer.h"
+
+namespace doris {
+
+// for vertical compaction
+class VerticalBetaRowsetWriter : public BetaRowsetWriter {
+public:
+ VerticalBetaRowsetWriter() : BetaRowsetWriter() {}
+ ~VerticalBetaRowsetWriter();
+
+ Status add_columns(const vectorized::Block* block, const std::vector<uint32_t>& col_ids,
+ bool is_key, uint32_t max_rows_per_segment);
+
+ // flush last segment's column
+ Status flush_columns();
+
+ // flush when all column finished, flush column footer
+ Status final_flush();
+
+private:
+ // only key group will create segment writer
+ Status _create_segment_writer(const std::vector<uint32_t>& column_ids, bool is_key,
+ std::unique_ptr<segment_v2::SegmentWriter>* writer);
+
+ Status _flush_columns(std::unique_ptr<segment_v2::SegmentWriter>* segment_writer,
+ bool is_key = false);
+
+private:
+ std::vector<std::unique_ptr<segment_v2::SegmentWriter>> _segment_writers;
+ size_t _cur_writer_idx = 0;
+};
+
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp
index 4cbb0d394a..7a4093e912 100644
--- a/be/src/olap/snapshot_manager.cpp
+++ b/be/src/olap/snapshot_manager.cpp
@@ -266,7 +266,7 @@ Status SnapshotManager::_rename_rowset_id(const RowsetMetaPB& rs_meta_pb,
context.segments_overlap = rowset_meta->segments_overlap();
std::unique_ptr<RowsetWriter> rs_writer;
- RETURN_NOT_OK(RowsetFactory::create_rowset_writer(context, &rs_writer));
+ RETURN_NOT_OK(RowsetFactory::create_rowset_writer(context, false, &rs_writer));
res = rs_writer->add_rowset(org_rowset);
if (!res.ok()) {
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 11d125035e..1c8e1a0905 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1661,6 +1661,22 @@ Status Tablet::create_initial_rowset(const int64_t req_version) {
return res;
}
+Status Tablet::create_vertical_rowset_writer(
+ const Version& version, const RowsetStatePB& rowset_state, const SegmentsOverlapPB& overlap,
+ TabletSchemaSPtr tablet_schema, int64_t oldest_write_timestamp,
+ int64_t newest_write_timestamp, std::unique_ptr<RowsetWriter>* rowset_writer) {
+ RowsetWriterContext context;
+ context.version = version;
+ context.rowset_state = rowset_state;
+ context.segments_overlap = overlap;
+ context.oldest_write_timestamp = oldest_write_timestamp;
+ context.newest_write_timestamp = newest_write_timestamp;
+ context.tablet_schema = tablet_schema;
+ context.enable_unique_key_merge_on_write = enable_unique_key_merge_on_write();
+ _init_context_common_fields(context);
+ return RowsetFactory::create_rowset_writer(context, true, rowset_writer);
+}
+
Status Tablet::create_rowset_writer(const Version& version, const RowsetStatePB& rowset_state,
const SegmentsOverlapPB& overlap,
TabletSchemaSPtr tablet_schema, int64_t oldest_write_timestamp,
@@ -1686,7 +1702,7 @@ Status Tablet::create_rowset_writer(const Version& version, const RowsetStatePB&
context.enable_unique_key_merge_on_write = enable_unique_key_merge_on_write();
context.fs = fs;
_init_context_common_fields(context);
- return RowsetFactory::create_rowset_writer(context, rowset_writer);
+ return RowsetFactory::create_rowset_writer(context, false, rowset_writer);
}
Status Tablet::create_rowset_writer(const int64_t& txn_id, const PUniqueId& load_id,
@@ -1704,7 +1720,7 @@ Status Tablet::create_rowset_writer(const int64_t& txn_id, const PUniqueId& load
context.tablet_schema = tablet_schema;
context.enable_unique_key_merge_on_write = enable_unique_key_merge_on_write();
_init_context_common_fields(context);
- return RowsetFactory::create_rowset_writer(context, rowset_writer);
+ return RowsetFactory::create_rowset_writer(context, false, rowset_writer);
}
void Tablet::_init_context_common_fields(RowsetWriterContext& context) {
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 318fdc8748..4a66e8824c 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -312,6 +312,13 @@ public:
TabletSchemaSPtr tablet_schema,
std::unique_ptr<RowsetWriter>* rowset_writer);
+ Status create_vertical_rowset_writer(const Version& version, const RowsetStatePB& rowset_state,
+ const SegmentsOverlapPB& overlap,
+ TabletSchemaSPtr tablet_schema,
+ int64_t oldest_write_timestamp,
+ int64_t newest_write_timestamp,
+ std::unique_ptr<RowsetWriter>* rowset_writer);
+
Status create_rowset(RowsetMetaSharedPtr rowset_meta, RowsetSharedPtr* rowset);
// Cooldown to remote fs.
Status cooldown();
diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp
index e040ffcece..8bb6eb1f9e 100644
--- a/be/src/olap/task/engine_checksum_task.cpp
+++ b/be/src/olap/task/engine_checksum_task.cpp
@@ -53,9 +53,13 @@ Status EngineChecksumTask::_compute_checksum() {
TupleReader reader;
TabletReader::ReaderParams reader_params;
reader_params.tablet = tablet;
+ reader_params.tablet_schema = tablet->tablet_schema();
reader_params.reader_type = READER_CHECKSUM;
reader_params.version = Version(0, _version);
-
+ auto& delete_preds = tablet->delete_predicates();
+ std::copy(delete_preds.cbegin(), delete_preds.cend(),
+ std::inserter(reader_params.delete_predicates,
+ reader_params.delete_predicates.begin()));
{
std::shared_lock rdlock(tablet->get_header_lock());
const RowsetSharedPtr message = tablet->rowset_with_max_version();
diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index 0e83f98167..0cc83168ac 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -231,6 +231,8 @@ set(VEC_FILES
olap/vcollect_iterator.cpp
olap/block_reader.cpp
olap/olap_data_convertor.cpp
+ olap/vertical_merge_iterator.cpp
+ olap/vertical_block_reader.cpp
sink/vmysql_result_writer.cpp
sink/vresult_sink.cpp
sink/vdata_stream_sender.cpp
diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp
index 977fc3d1c7..e9b4c8fd24 100644
--- a/be/src/vec/olap/olap_data_convertor.cpp
+++ b/be/src/vec/olap/olap_data_convertor.cpp
@@ -33,6 +33,15 @@ OlapBlockDataConvertor::OlapBlockDataConvertor(const TabletSchema* tablet_schema
}
}
+OlapBlockDataConvertor::OlapBlockDataConvertor(const TabletSchema* tablet_schema,
+ const std::vector<uint32_t>& col_ids) {
+ assert(tablet_schema);
+ for (const auto& id : col_ids) {
+ const auto& col = tablet_schema->column(id);
+ _convertors.emplace_back(create_olap_column_data_convertor(col));
+ }
+}
+
OlapBlockDataConvertor::OlapColumnDataConvertorBaseUPtr
OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& column) {
switch (column.type()) {
diff --git a/be/src/vec/olap/olap_data_convertor.h b/be/src/vec/olap/olap_data_convertor.h
index 577a60bc42..6898b44a9d 100644
--- a/be/src/vec/olap/olap_data_convertor.h
+++ b/be/src/vec/olap/olap_data_convertor.h
@@ -47,6 +47,7 @@ public:
class OlapBlockDataConvertor {
public:
OlapBlockDataConvertor(const TabletSchema* tablet_schema);
+ OlapBlockDataConvertor(const TabletSchema* tablet_schema, const std::vector<uint32_t>& col_ids);
void set_source_content(const vectorized::Block* block, size_t row_pos, size_t num_rows);
void clear_source_content();
std::pair<Status, IOlapColumnDataAccessor*> convert_column_data(size_t cid);
diff --git a/be/src/vec/olap/vertical_block_reader.cpp b/be/src/vec/olap/vertical_block_reader.cpp
new file mode 100644
index 0000000000..a5c02fb176
--- /dev/null
+++ b/be/src/vec/olap/vertical_block_reader.cpp
@@ -0,0 +1,397 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/olap/vertical_block_reader.h"
+
+#include "common/status.h"
+#include "olap/like_column_predicate.h"
+#include "olap/olap_common.h"
+#include "runtime/mem_pool.h"
+#include "vec/aggregate_functions/aggregate_function_reader.h"
+#include "vec/olap/block_reader.h"
+#include "vec/olap/vcollect_iterator.h"
+#include "vec/olap/vertical_merge_iterator.h"
+
+namespace doris::vectorized {
+
+VerticalBlockReader::~VerticalBlockReader() {
+ for (int i = 0; i < _agg_functions.size(); ++i) {
+ _agg_functions[i]->destroy(_agg_places[i]);
+ delete[] _agg_places[i];
+ }
+}
+
+Status VerticalBlockReader::_get_segment_iterators(const ReaderParams& read_params,
+ std::vector<RowwiseIterator*>* segment_iters) {
+ std::vector<RowsetReaderSharedPtr> rs_readers;
+ auto res = _capture_rs_readers(read_params, &rs_readers);
+ if (!res.ok()) {
+ LOG(WARNING) << "fail to init reader when _capture_rs_readers. res:" << res
+ << ", tablet_id:" << read_params.tablet->tablet_id()
+ << ", schema_hash:" << read_params.tablet->schema_hash()
+ << ", reader_type:" << read_params.reader_type
+ << ", version:" << read_params.version;
+ return res;
+ }
+ _reader_context.batch_size = _batch_size;
+ _reader_context.is_vec = true;
+ _reader_context.is_vertical_compaction = true;
+ for (auto& rs_reader : rs_readers) {
+ // segment iterator will be inited here
+ RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context, segment_iters));
+ rs_reader->reset_read_options();
+ }
+ return Status::OK();
+}
+
+Status VerticalBlockReader::_init_collect_iter(const ReaderParams& read_params) {
+ // get segment iterators
+ std::vector<RowwiseIterator*> segment_iters;
+ RETURN_IF_ERROR(_get_segment_iterators(read_params, &segment_iters));
+
+ // build heap if key column iterator or build vertical merge iterator if value column
+ auto ori_return_col_size = _return_columns.size();
+ if (read_params.is_key_column_group) {
+ uint32_t seq_col_idx = -1;
+ if (read_params.tablet->tablet_schema()->has_sequence_col()) {
+ seq_col_idx = read_params.tablet->tablet_schema()->sequence_col_idx();
+ }
+ _vcollect_iter = new_vertical_heap_merge_iterator(segment_iters, ori_return_col_size,
+ read_params.tablet->keys_type(),
+ seq_col_idx, _row_sources_buffer);
+ } else {
+ _vcollect_iter = new_vertical_mask_merge_iterator(segment_iters, ori_return_col_size,
+ _row_sources_buffer);
+ }
+ // init collect iterator
+ StorageReadOptions opts;
+ RETURN_IF_ERROR(_vcollect_iter->init(opts));
+
+ // In dup keys value columns compact, get first row for _init_agg_state
+ if (!read_params.is_key_column_group && read_params.tablet->keys_type() == KeysType::AGG_KEYS) {
+ auto st = _vcollect_iter->next_row(&_next_row);
+ _eof = st.is_end_of_file();
+ }
+
+ return Status::OK();
+}
+
+void VerticalBlockReader::_init_agg_state(const ReaderParams& read_params) {
+ if (_eof) {
+ return;
+ }
+ DCHECK(_return_columns.size() == _next_row.block->columns());
+ _stored_data_columns = _next_row.block->create_same_struct_block(_batch_size)->mutate_columns();
+
+ _stored_has_null_tag.resize(_stored_data_columns.size());
+ _stored_has_string_tag.resize(_stored_data_columns.size());
+
+ auto& tablet_schema = *_tablet_schema;
+ for (size_t idx = 0; idx < _return_columns.size(); ++idx) {
+ AggregateFunctionPtr function =
+ tablet_schema.column(_return_columns.at(idx))
+ .get_aggregate_function({_next_row.block->get_data_type(idx)},
+ vectorized::AGG_READER_SUFFIX);
+ DCHECK(function != nullptr);
+ _agg_functions.push_back(function);
+ // create aggregate data
+ AggregateDataPtr place = new char[function->size_of_data()];
+ function->create(place);
+ _agg_places.push_back(place);
+
+ // calculate `has_string` tag.
+ _stored_has_string_tag[idx] =
+ _stored_data_columns[idx]->is_column_string() ||
+ (_stored_data_columns[idx]->is_nullable() &&
+ reinterpret_cast<ColumnNullable*>(_stored_data_columns[idx].get())
+ ->get_nested_column_ptr()
+ ->is_column_string());
+ }
+}
+
+Status VerticalBlockReader::init(const ReaderParams& read_params) {
+ StorageReadOptions opts;
+ _batch_size = opts.block_row_max;
+ RETURN_NOT_OK(TabletReader::init(read_params));
+
+ std::vector<RowsetReaderSharedPtr> rs_readers;
+ auto status = _init_collect_iter(read_params);
+ if (!status.ok()) {
+ return status;
+ }
+
+ switch (tablet()->keys_type()) {
+ case KeysType::DUP_KEYS:
+ _next_block_func = &VerticalBlockReader::_direct_next_block;
+ break;
+ case KeysType::UNIQUE_KEYS:
+ _next_block_func = &VerticalBlockReader::_unique_key_next_block;
+ if (_filter_delete) {
+ _delete_filter_column = ColumnUInt8::create();
+ }
+ break;
+ case KeysType::AGG_KEYS:
+ _next_block_func = &VerticalBlockReader::_agg_key_next_block;
+ if (!read_params.is_key_column_group) {
+ _init_agg_state(read_params);
+ }
+ break;
+ default:
+ DCHECK(false) << "No next row function for type:" << tablet()->keys_type();
+ break;
+ }
+ return Status::OK();
+}
+
+Status VerticalBlockReader::_direct_next_block(Block* block, MemPool* mem_pool,
+ ObjectPool* agg_pool, bool* eof) {
+ auto res = _vcollect_iter->next_batch(block);
+ if (UNLIKELY(!res.ok() && !res.is_end_of_file())) {
+ return res;
+ }
+ *eof = (res.is_end_of_file());
+ _eof = *eof;
+ return Status::OK();
+}
+
+void VerticalBlockReader::_append_agg_data(MutableColumns& columns) {
+ _stored_row_ref.push_back(_next_row);
+ _last_agg_data_counter++;
+
+ // execute aggregate when have `batch_size` column or some ref invalid soon
+ bool is_last = (_next_row.block->rows() == _next_row.row_pos + 1);
+ if (is_last || _stored_row_ref.size() == _batch_size) {
+ _update_agg_data(columns);
+ }
+}
+
+void VerticalBlockReader::_update_agg_data(MutableColumns& columns) {
+ // copy data to stored block
+ size_t copy_size = _copy_agg_data();
+
+ // calculate has_null_tag
+ for (size_t idx = 0; idx < _return_columns.size(); ++idx) {
+ _stored_has_null_tag[idx] = _stored_data_columns[idx]->has_null(copy_size);
+ }
+
+ // calculate aggregate and insert
+ int counter_sum = 0;
+ for (int counter : _agg_data_counters) {
+ _update_agg_value(columns, counter_sum, counter_sum + counter - 1);
+ counter_sum += counter;
+ }
+
+ // some key still has value at next block, so do not insert
+ if (_last_agg_data_counter) {
+ _update_agg_value(columns, counter_sum, counter_sum + _last_agg_data_counter - 1, false);
+ _last_agg_data_counter = 0;
+ }
+
+ _agg_data_counters.clear();
+}
+
+void VerticalBlockReader::_update_agg_value(MutableColumns& columns, int begin, int end,
+ bool is_close) {
+ for (size_t idx = 0; idx < _return_columns.size(); ++idx) {
+ AggregateFunctionPtr function = _agg_functions[idx];
+ AggregateDataPtr place = _agg_places[idx];
+ auto column_ptr = _stored_data_columns[idx].get();
+
+ if (begin <= end) {
+ function->add_batch_range(begin, end, place, const_cast<const IColumn**>(&column_ptr),
+ nullptr, _stored_has_null_tag[idx]);
+ }
+
+ if (is_close) {
+ function->insert_result_into(place, *columns[idx]);
+ // reset aggregate data
+ function->destroy(place);
+ function->create(place);
+ }
+ }
+}
+
+size_t VerticalBlockReader::_copy_agg_data() {
+ size_t copy_size = _stored_row_ref.size();
+
+ for (size_t i = 0; i < copy_size; i++) {
+ auto& ref = _stored_row_ref[i];
+ _temp_ref_map[ref.block.get()].emplace_back(ref.row_pos, i);
+ }
+ for (size_t idx = 0; idx < _return_columns.size(); ++idx) {
+ auto& dst_column = _stored_data_columns[idx];
+ if (_stored_has_string_tag[idx]) {
+ //string type should replace ordered
+ for (size_t i = 0; i < copy_size; i++) {
+ auto& ref = _stored_row_ref[i];
+ dst_column->replace_column_data(*ref.block->get_by_position(idx).column,
+ ref.row_pos, i);
+ }
+ } else {
+ for (auto& it : _temp_ref_map) {
+ if (!it.second.empty()) {
+ auto& src_column = *it.first->get_by_position(idx).column;
+ for (auto& pos : it.second) {
+ dst_column->replace_column_data(src_column, pos.first, pos.second);
+ }
+ }
+ }
+ }
+ }
+
+ for (auto& it : _temp_ref_map) {
+ it.second.clear();
+ }
+ _stored_row_ref.clear();
+
+ return copy_size;
+}
+
+Status VerticalBlockReader::_agg_key_next_block(Block* block, MemPool* mem_pool,
+ ObjectPool* agg_pool, bool* eof) {
+ if (_reader_context.is_key_column_group) {
+ // collect_iter will filter agg keys
+ auto res = _vcollect_iter->next_batch(block);
+ if (UNLIKELY(!res.ok() && !res.is_end_of_file())) {
+ return res;
+ }
+ *eof = (res.is_end_of_file());
+ _eof = *eof;
+ return Status::OK();
+ }
+ // handle value agg
+ if (UNLIKELY(_eof)) {
+ *eof = true;
+ return Status::OK();
+ }
+ int target_block_row = 0;
+ auto target_columns = block->mutate_columns();
+
+ // copy first row get from collect_iter in init
+ _append_agg_data(target_columns);
+ target_block_row++;
+
+ do {
+ Status res = _vcollect_iter->next_row(&_next_row);
+ if (UNLIKELY(!res.ok())) {
+ if (UNLIKELY(res.is_end_of_file())) {
+ *eof = true;
+ _eof = true;
+ break;
+ }
+ LOG(WARNING) << "next failed: " << res;
+ return res;
+ }
+ DCHECK(_next_row.block->columns() == block->columns());
+ if (!_next_row.is_same) {
+ if (target_block_row == _batch_size) {
+ break;
+ }
+ _agg_data_counters.push_back(_last_agg_data_counter);
+ _last_agg_data_counter = 0;
+ target_block_row++;
+ }
+ _append_agg_data(target_columns);
+ } while (true);
+
+ _agg_data_counters.push_back(_last_agg_data_counter);
+ _last_agg_data_counter = 0;
+ _update_agg_data(target_columns);
+
+ return Status::OK();
+}
+
+Status VerticalBlockReader::_unique_key_next_block(Block* block, MemPool* mem_pool,
+ ObjectPool* agg_pool, bool* eof) {
+ if (_reader_context.is_key_column_group) {
+ // Record row_source_buffer current size for key column agg flag
+ // _vcollect_iter->next_batch(block) will fill row_source_buffer but delete sign is ignored
+ // we calc delete sign column if it's base compaction and update row_sourece_buffer's agg flag
+ // after we get current block
+ auto row_source_idx = _row_sources_buffer->buffered_size();
+
+ auto res = _vcollect_iter->next_batch(block);
+ if (UNLIKELY(!res.ok() && !res.is_end_of_file())) {
+ return res;
+ }
+ auto block_rows = block->rows();
+ if (_filter_delete && block_rows > 0) {
+ int ori_delete_sign_idx = _reader_context.tablet_schema->field_index(DELETE_SIGN);
+ if (ori_delete_sign_idx < 0) {
+ *eof = (res.is_end_of_file());
+ _eof = *eof;
+ return Status::OK();
+ }
+ // delete sign column must store in last column of the block
+ int delete_sign_idx = block->columns() - 1;
+ DCHECK(delete_sign_idx > 0);
+ auto target_columns = block->mutate_columns();
+ MutableColumnPtr delete_filter_column = (*std::move(_delete_filter_column)).mutate();
+ reinterpret_cast<ColumnUInt8*>(delete_filter_column.get())->resize(block_rows);
+
+ auto* __restrict filter_data =
+ reinterpret_cast<ColumnUInt8*>(delete_filter_column.get())->get_data().data();
+ auto* __restrict delete_data =
+ reinterpret_cast<ColumnInt8*>(target_columns[delete_sign_idx].get())
+ ->get_data()
+ .data();
+ for (int i = 0; i < block_rows; ++i) {
+ bool sign = (delete_data[i] == 0);
+ filter_data[i] = sign;
+ if (UNLIKELY(!sign)) {
+ _row_sources_buffer->set_agg_flag(row_source_idx + i, true);
+ }
+ }
+
+ ColumnWithTypeAndName column_with_type_and_name {_delete_filter_column,
+ std::make_shared<DataTypeUInt8>(),
+ "__DORIS_COMPACTION_FILTER__"};
+ block->insert(column_with_type_and_name);
+ Block::filter_block(block, target_columns.size(), target_columns.size());
+ _stats.rows_del_filtered += block_rows - block->rows();
+ DCHECK(block->try_get_by_name("__DORIS_COMPACTION_FILTER__") == nullptr);
+ }
+ *eof = (res.is_end_of_file());
+ _eof = *eof;
+ return Status::OK();
+ }
+ int target_block_row = 0;
+ auto target_columns = block->mutate_columns();
+ size_t column_count = block->columns();
+ do {
+ Status res = _vcollect_iter->unique_key_next_row(&_next_row);
+ if (UNLIKELY(!res.ok())) {
+ if (UNLIKELY(res.is_end_of_file())) {
+ *eof = true;
+ _eof = true;
+ break;
+ }
+ LOG(WARNING) << "next failed: " << res;
+ return res;
+ }
+ const auto& src_block = _next_row.block;
+ assert(src_block->columns() == column_count);
+ for (size_t i = 0; i < column_count; ++i) {
+ target_columns[i]->insert_from(*(src_block->get_by_position(i).column),
+ _next_row.row_pos);
+ }
+ ++target_block_row;
+ } while (target_block_row < _batch_size);
+ return Status::OK();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/olap/vertical_block_reader.h b/be/src/vec/olap/vertical_block_reader.h
new file mode 100644
index 0000000000..7c2e99eacf
--- /dev/null
+++ b/be/src/vec/olap/vertical_block_reader.h
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <parallel_hashmap/phmap.h>
+
+#include "olap/reader.h"
+#include "olap/rowset/rowset_reader.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/olap/vcollect_iterator.h"
+#include "vec/olap/vertical_merge_iterator.h"
+
+#pragma once
+
+namespace doris {
+
+namespace vectorized {
+
+class VerticalBlockReader final : public TabletReader {
+public:
+ VerticalBlockReader(RowSourcesBuffer* row_sources_buffer)
+ : _row_sources_buffer(row_sources_buffer) {}
+
+ ~VerticalBlockReader() override;
+
+ // Initialize VerticalBlockReader with tablet, data version and fetch range.
+ Status init(const ReaderParams& read_params) override;
+
+ Status next_block_with_aggregation(Block* block, MemPool* mem_pool, ObjectPool* agg_pool,
+ bool* eof) override {
+ return (this->*_next_block_func)(block, mem_pool, agg_pool, eof);
+ }
+
+ Status next_row_with_aggregation(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool,
+ bool* eof) override {
+ return Status::OK();
+ }
+
+ uint64_t merged_rows() const override {
+ DCHECK(_vcollect_iter);
+ return _vcollect_iter->merged_rows();
+ }
+
+private:
+ // Directly read row from rowset and pass to upper caller. No need to do aggregation.
+ // This is usually used for DUPLICATE KEY tables
+ Status _direct_next_block(Block* block, MemPool* mem_pool, ObjectPool* agg_pool, bool* eof);
+ // For normal AGGREGATE KEY tables, read data by a merge heap.
+ Status _agg_key_next_block(Block* block, MemPool* mem_pool, ObjectPool* agg_pool, bool* eof);
+ // For UNIQUE KEY tables, read data by a merge heap.
+ // The difference from _agg_key_next_block is that it will read the data from high version to low version,
+ // to minimize the comparison time in merge heap.
+ Status _unique_key_next_block(Block* block, MemPool* mem_pool, ObjectPool* agg_pool, bool* eof);
+
+ Status _init_collect_iter(const ReaderParams& read_params);
+
+ Status _get_segment_iterators(const ReaderParams& read_params,
+ std::vector<RowwiseIterator*>* segment_iters);
+
+ void _init_agg_state(const ReaderParams& read_params);
+ void _append_agg_data(MutableColumns& columns);
+ void _update_agg_data(MutableColumns& columns);
+ size_t _copy_agg_data();
+ void _update_agg_value(MutableColumns& columns, int begin, int end, bool is_close = true);
+
+private:
+ std::shared_ptr<RowwiseIterator> _vcollect_iter;
+ IteratorRowRef _next_row {{}, -1, false};
+
+ bool _eof = false;
+
+ Status (VerticalBlockReader::*_next_block_func)(Block* block, MemPool* mem_pool,
+ ObjectPool* agg_pool, bool* eof) = nullptr;
+
+ RowSourcesBuffer* _row_sources_buffer;
+ ColumnPtr _delete_filter_column;
+
+ // for agg mode
+ std::vector<AggregateFunctionPtr> _agg_functions;
+ std::vector<AggregateDataPtr> _agg_places;
+
+ std::vector<int> _normal_columns_idx;
+ std::vector<int> _agg_columns_idx;
+
+ std::vector<int> _agg_data_counters;
+ int _last_agg_data_counter = 0;
+
+ MutableColumns _stored_data_columns;
+ std::vector<IteratorRowRef> _stored_row_ref;
+
+ std::vector<bool> _stored_has_null_tag;
+ std::vector<bool> _stored_has_string_tag;
+
+ phmap::flat_hash_map<const Block*, std::vector<std::pair<int, int>>> _temp_ref_map;
+};
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/olap/vertical_merge_iterator.cpp b/be/src/vec/olap/vertical_merge_iterator.cpp
new file mode 100644
index 0000000000..1dd430b8ca
--- /dev/null
+++ b/be/src/vec/olap/vertical_merge_iterator.cpp
@@ -0,0 +1,546 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/olap/vertical_merge_iterator.h"
+
+namespace doris {
+
+namespace vectorized {
+
+// -------------- row source ---------------//
+RowSource::RowSource(uint16_t source_num, bool agg_flag) {
+ _data = (source_num & SOURCE_FLAG) | (source_num & AGG_FLAG);
+ _data = agg_flag ? (_data | AGG_FLAG) : (_data & SOURCE_FLAG);
+}
+
+uint16_t RowSource::get_source_num() {
+ return _data & SOURCE_FLAG;
+}
+
+bool RowSource::agg_flag() {
+ return (_data & AGG_FLAG) != 0;
+}
+
+void RowSource::set_agg_flag(bool agg_flag) {
+ _data = agg_flag ? (_data | AGG_FLAG) : (_data & SOURCE_FLAG);
+}
+
+uint16_t RowSource::data() const {
+ return _data;
+}
+
+/* -------------- row source buffer ------------- */
+
+// current row_sources must save in memory so agg key can update agg flag
+Status RowSourcesBuffer::append(const std::vector<RowSource>& row_sources) {
+ if (_buffer->allocated_bytes() + row_sources.size() * sizeof(UInt16) >
+ config::vertical_compaction_max_row_source_memory_mb * 1024 * 1024) {
+ // serialize current buffer
+ RETURN_IF_ERROR(_create_buffer_file());
+ RETURN_IF_ERROR(_serialize());
+ _reset_buffer();
+ }
+ for (const auto& source : row_sources) {
+ _buffer->insert_value(source.data());
+ }
+ _total_size += row_sources.size();
+ return Status::OK();
+}
+
+Status RowSourcesBuffer::seek_to_begin() {
+ _buf_idx = 0;
+ if (_fd > 0) {
+ auto offset = lseek(_fd, 0, SEEK_SET);
+ if (offset != 0) {
+ LOG(WARNING) << "failed to seek to 0";
+ return Status::InternalError("failed to seek to 0");
+ }
+ _reset_buffer();
+ }
+ return Status::OK();
+}
+
+Status RowSourcesBuffer::has_remaining() {
+ if (_buf_idx < _buffer->size()) {
+ return Status::OK();
+ }
+ DCHECK(_buf_idx == _buffer->size());
+ if (_fd > 0) {
+ _reset_buffer();
+ auto st = _deserialize();
+ if (!st.ok()) {
+ return st;
+ }
+ return Status::OK();
+ }
+ return Status::EndOfFile("end of row source buffer");
+}
+
+void RowSourcesBuffer::set_agg_flag(uint64_t index, bool agg) {
+ DCHECK(index < _buffer->size());
+ RowSource ori(_buffer->get_data()[index]);
+ ori.set_agg_flag(agg);
+ _buffer->get_data()[index] = ori.data();
+}
+
+size_t RowSourcesBuffer::same_source_count(uint16_t source, size_t limit) {
+ int result = 1;
+ int start = _buf_idx + 1;
+ int end = _buffer->size();
+ while (result < limit && start < end) {
+ RowSource next(_buffer->get_element(start++));
+ if (source != next.get_source_num()) {
+ break;
+ }
+ ++result;
+ }
+ return result;
+}
+
+Status RowSourcesBuffer::_create_buffer_file() {
+ if (_fd != -1) {
+ return Status::OK();
+ }
+ std::stringstream file_path_ss;
+ file_path_ss << _tablet_path << "/compaction_row_source_" << _tablet_id;
+ if (_reader_type == READER_BASE_COMPACTION) {
+ file_path_ss << "_base";
+ } else if (_reader_type == READER_CUMULATIVE_COMPACTION) {
+ file_path_ss << "_cumu";
+ } else {
+ DCHECK(false);
+ return Status::InternalError("unknown reader type");
+ }
+ file_path_ss << ".XXXXXX";
+ std::string file_path = file_path_ss.str();
+ LOG(INFO) << "Vertical compaction row sources buffer path: " << file_path;
+ _fd = mkstemp(file_path.data());
+ if (_fd < 0) {
+ LOG(WARNING) << "failed to create tmp file, file_path=" << file_path;
+ return Status::InternalError("failed to create tmp file");
+ }
+ // file will be released after fd is close
+ unlink(file_path.data());
+ return Status::OK();
+}
+
+Status RowSourcesBuffer::flush() {
+ if (_fd > 0 && !_buffer->empty()) {
+ RETURN_IF_ERROR(_serialize());
+ _reset_buffer();
+ }
+ return Status::OK();
+}
+
+Status RowSourcesBuffer::_serialize() {
+ size_t rows = _buffer->size();
+ if (rows == 0) {
+ return Status::OK();
+ }
+ // write size
+ ssize_t bytes_written = ::write(_fd, &rows, sizeof(rows));
+ if (bytes_written != sizeof(size_t)) {
+ LOG(WARNING) << "failed to write buffer size to file, bytes_written=" << bytes_written;
+ return Status::InternalError("fail to write buffer size to file");
+ }
+ // write data
+ StringRef ref = _buffer->get_raw_data();
+ bytes_written = ::write(_fd, ref.data, ref.size * sizeof(UInt16));
+ if (bytes_written != _buffer->byte_size()) {
+ LOG(WARNING) << "failed to write buffer data to file, bytes_written=" << bytes_written
+ << " buffer size=" << _buffer->byte_size();
+ return Status::InternalError("fail to write buffer size to file");
+ }
+ return Status::OK();
+}
+
+Status RowSourcesBuffer::_deserialize() {
+ size_t rows = 0;
+ ssize_t bytes_read = ::read(_fd, &rows, sizeof(rows));
+ if (bytes_read == 0) {
+ LOG(WARNING) << "end of row source buffer file";
+ return Status::EndOfFile("end of row source buffer file");
+ } else if (bytes_read != sizeof(size_t)) {
+ LOG(WARNING) << "failed to read buffer size from file, bytes_read=" << bytes_read;
+ return Status::InternalError("failed to read buffer size from file");
+ }
+ _buffer->resize(rows);
+ auto& internal_data = _buffer->get_data();
+ bytes_read = ::read(_fd, internal_data.data(), rows * sizeof(UInt16));
+ if (bytes_read != rows * sizeof(UInt16)) {
+ LOG(WARNING) << "failed to read buffer data from file, bytes_read=" << bytes_read
+ << ", expect bytes=" << rows * sizeof(UInt16);
+ return Status::InternalError("failed to read buffer data from file");
+ }
+ return Status::OK();
+}
+
+// ---------- vertical merge iterator context ----------//
+Status VerticalMergeIteratorContext::block_reset(const std::shared_ptr<Block>& block) {
+ if (!*block) {
+ const Schema& schema = _iter->schema();
+ const auto& column_ids = schema.column_ids();
+ for (size_t i = 0; i < schema.num_column_ids(); ++i) {
+ auto column_desc = schema.column(column_ids[i]);
+ auto data_type = Schema::get_data_type_ptr(*column_desc);
+ if (data_type == nullptr) {
+ return Status::RuntimeError("invalid data type");
+ }
+ auto column = data_type->create_column();
+ column->reserve(_block_row_max);
+ block->insert(ColumnWithTypeAndName(std::move(column), data_type, column_desc->name()));
+ }
+ } else {
+ block->clear_column_data();
+ }
+ return Status::OK();
+}
+
+bool VerticalMergeIteratorContext::compare(const VerticalMergeIteratorContext& rhs) const {
+ int cmp_res = _block->compare_at(_index_in_block, rhs._index_in_block, _num_key_columns,
+ *rhs._block, -1);
+ if (cmp_res != 0) {
+ return cmp_res > 0;
+ }
+ auto col_cmp_res = 0;
+ if (_seq_col_idx != -1) {
+ DCHECK(_block->columns() >= _num_key_columns);
+ auto real_seq_idx = _num_key_columns;
+ col_cmp_res = _block->compare_column_at(_index_in_block, rhs._index_in_block, real_seq_idx,
+ *rhs._block, -1);
+ }
+ auto result = (col_cmp_res == 0) ? (_order < rhs.order()) : (col_cmp_res < 0);
+ result ? set_is_same(true) : rhs.set_is_same(true);
+ return result;
+}
+
+void VerticalMergeIteratorContext::copy_rows(Block* block, size_t count) {
+ Block& src = *_block;
+ Block& dst = *block;
+ DCHECK(count > 0);
+
+ auto start = _index_in_block;
+ _index_in_block += count - 1;
+
+ for (size_t i = 0; i < _ori_return_cols; ++i) {
+ auto& s_col = src.get_by_position(i);
+ auto& d_col = dst.get_by_position(i);
+
+ ColumnPtr& s_cp = s_col.column;
+ ColumnPtr& d_cp = d_col.column;
+
+ d_cp->assume_mutable()->insert_range_from(*s_cp, start, count);
+ }
+}
+// `advanced = false` when current block finished
+void VerticalMergeIteratorContext::copy_rows(Block* block, bool advanced) {
+ Block& src = *_block;
+ Block& dst = *block;
+ if (_cur_batch_num == 0) {
+ return;
+ }
+
+ // copy a row to dst block column by column
+ size_t start = _index_in_block - _cur_batch_num + 1 - advanced;
+ DCHECK(start >= 0);
+
+ for (size_t i = 0; i < _ori_return_cols; ++i) {
+ auto& s_col = src.get_by_position(i);
+ auto& d_col = dst.get_by_position(i);
+
+ ColumnPtr& s_cp = s_col.column;
+ ColumnPtr& d_cp = d_col.column;
+
+ d_cp->assume_mutable()->insert_range_from(*s_cp, start, _cur_batch_num);
+ }
+ _cur_batch_num = 0;
+}
+
+Status VerticalMergeIteratorContext::init(const StorageReadOptions& opts) {
+ _block_row_max = opts.block_row_max;
+ RETURN_IF_ERROR(_load_next_block());
+ if (valid()) {
+ RETURN_IF_ERROR(advance());
+ }
+ return Status::OK();
+}
+
+Status VerticalMergeIteratorContext::advance() {
+ // NOTE: we increase _index_in_block directly to valid one check
+ do {
+ _index_in_block++;
+ if (LIKELY(_index_in_block < _block->rows())) {
+ return Status::OK();
+ }
+ // current batch has no data, load next batch
+ RETURN_IF_ERROR(_load_next_block());
+ } while (_valid);
+ return Status::OK();
+}
+
+Status VerticalMergeIteratorContext::_load_next_block() {
+ do {
+ if (_block != nullptr) {
+ _block_list.push_back(_block);
+ _block = nullptr;
+ }
+ for (auto it = _block_list.begin(); it != _block_list.end(); it++) {
+ if (it->use_count() == 1) {
+ block_reset(*it);
+ _block = *it;
+ _block_list.erase(it);
+ break;
+ }
+ }
+ if (_block == nullptr) {
+ _block = std::make_shared<Block>();
+ block_reset(_block);
+ }
+ Status st = _iter->next_batch(_block.get());
+ if (!st.ok()) {
+ _valid = false;
+ if (st.is_end_of_file()) {
+ return Status::OK();
+ } else {
+ return st;
+ }
+ }
+ // erase delete handler columns
+ if (_block->columns() > _ori_return_cols) {
+ for (auto i = _block->columns() - 1; i >= _ori_return_cols; --i) {
+ _block->erase(i);
+ }
+ }
+ } while (_block->rows() == 0);
+ _index_in_block = -1;
+ _valid = true;
+ return Status::OK();
+}
+
+// ---------------- VerticalHeapMergeIterator ------------- //
+Status VerticalHeapMergeIterator::next_batch(Block* block) {
+ size_t row_idx = 0;
+ VerticalMergeIteratorContext* pre_ctx = nullptr;
+ std::vector<RowSource> tmp_row_sources;
+ while (_get_size(block) < _block_row_max) {
+ if (_merge_heap.empty()) {
+ LOG(INFO) << "_merge_heap empty";
+ break;
+ }
+
+ auto ctx = _merge_heap.top();
+ _merge_heap.pop();
+ if (ctx->is_same()) {
+ tmp_row_sources.emplace_back(ctx->order(), true);
+ } else {
+ tmp_row_sources.emplace_back(ctx->order(), false);
+ }
+ if (ctx->is_same() &&
+ (_keys_type == KeysType::UNIQUE_KEYS || _keys_type == KeysType::AGG_KEYS)) {
+ // skip cur row, copy pre ctx
+ ++_merged_rows;
+ if (pre_ctx) {
+ pre_ctx->copy_rows(block);
+ pre_ctx = nullptr;
+ }
+ } else {
+ ctx->add_cur_batch();
+ if (pre_ctx != ctx) {
+ if (pre_ctx) {
+ pre_ctx->copy_rows(block);
+ }
+ pre_ctx = ctx;
+ }
+ row_idx++;
+ if (ctx->is_cur_block_finished() || row_idx >= _block_row_max) {
+ // current block finished, ctx not advance
+ // so copy start_idx = (_index_in_block - _cur_batch_num + 1)
+ ctx->copy_rows(block, false);
+ pre_ctx = nullptr;
+ }
+ }
+
+ RETURN_IF_ERROR(ctx->advance());
+ if (ctx->valid()) {
+ _merge_heap.push(ctx);
+ } else {
+ // Release ctx earlier to reduce resource consumed
+ delete ctx;
+ }
+ }
+ RETURN_IF_ERROR(_row_sources_buf->append(tmp_row_sources));
+ if (!_merge_heap.empty()) {
+ return Status::OK();
+ }
+ return Status::EndOfFile("no more data in segment");
+}
+
+Status VerticalHeapMergeIterator::init(const StorageReadOptions& opts) {
+ if (_origin_iters.empty()) {
+ return Status::OK();
+ }
+ _schema = &(*_origin_iters.begin())->schema();
+
+ auto seg_order = 0;
+ for (auto iter : _origin_iters) {
+ auto ctx = std::make_unique<VerticalMergeIteratorContext>(iter, _ori_return_cols, seg_order,
+ _seq_col_idx);
+ RETURN_IF_ERROR(ctx->init(opts));
+ if (!ctx->valid()) {
+ continue;
+ }
+ _merge_heap.push(ctx.release());
+ ++seg_order;
+ }
+ _origin_iters.clear();
+
+ _block_row_max = opts.block_row_max;
+ return Status::OK();
+}
+
+// ---------------- VerticalMaskMergeIterator ------------- //
+Status VerticalMaskMergeIterator::next_row(vectorized::IteratorRowRef* ref) {
+ DCHECK(_row_sources_buf);
+ auto st = _row_sources_buf->has_remaining();
+ if (!st.ok()) {
+ if (st.is_end_of_file()) {
+ for (auto iter : _origin_iter_ctx) {
+ RETURN_IF_ERROR(iter->advance());
+ DCHECK(!iter->valid());
+ }
+ }
+ return st;
+ }
+ auto row_source = _row_sources_buf->current();
+ uint16_t order = row_source.get_source_num();
+ auto& ctx = _origin_iter_ctx[order];
+ if (UNLIKELY(ctx->is_first_row())) {
+ // first row in block, don't call ctx->advance
+ // Except first row, we call advance first and than get cur row
+ ctx->set_cur_row_ref(ref);
+ ref->is_same = row_source.agg_flag();
+
+ ctx->set_is_first_row(false);
+ _row_sources_buf->advance();
+ return Status::OK();
+ }
+ RETURN_IF_ERROR(ctx->advance());
+ ctx->set_cur_row_ref(ref);
+ ref->is_same = row_source.agg_flag();
+
+ _row_sources_buf->advance();
+ return Status::OK();
+}
+
+Status VerticalMaskMergeIterator::unique_key_next_row(vectorized::IteratorRowRef* ref) {
+ DCHECK(_row_sources_buf);
+ auto st = _row_sources_buf->has_remaining();
+ while (st.ok()) {
+ auto row_source = _row_sources_buf->current();
+ uint16_t order = row_source.get_source_num();
+ auto& ctx = _origin_iter_ctx[order];
+ if (UNLIKELY(ctx->is_first_row()) && !row_source.agg_flag()) {
+ // first row in block, don't call ctx->advance
+ // Except first row, we call advance first and than get cur row
+ ctx->set_cur_row_ref(ref);
+ ctx->set_is_first_row(false);
+ _row_sources_buf->advance();
+ return Status::OK();
+ }
+ RETURN_IF_ERROR(ctx->advance());
+ _row_sources_buf->advance();
+ if (!row_source.agg_flag()) {
+ ctx->set_cur_row_ref(ref);
+ return Status::OK();
+ }
+ st = _row_sources_buf->has_remaining();
+ }
+ if (st.is_end_of_file()) {
+ for (auto iter : _origin_iter_ctx) {
+ RETURN_IF_ERROR(iter->advance());
+ DCHECK(!iter->valid());
+ }
+ }
+ return st;
+}
+
+Status VerticalMaskMergeIterator::next_batch(Block* block) {
+ DCHECK(_row_sources_buf);
+ size_t rows = 0;
+ auto st = _row_sources_buf->has_remaining();
+ while (rows < _block_row_max && st.ok()) {
+ uint16_t order = _row_sources_buf->current().get_source_num();
+ DCHECK(order < _origin_iter_ctx.size());
+ auto& ctx = _origin_iter_ctx[order];
+
+ // find max same source count in cur ctx
+ size_t limit = std::min(ctx->remain_rows(), _block_row_max - rows);
+ auto same_source_cnt = _row_sources_buf->same_source_count(order, limit);
+ _row_sources_buf->advance(same_source_cnt);
+ // copy rows to block
+ ctx->copy_rows(block, same_source_cnt);
+ RETURN_IF_ERROR(ctx->advance());
+ rows += same_source_cnt;
+ st = _row_sources_buf->has_remaining();
+ }
+ if (st.is_end_of_file()) {
+ for (auto iter : _origin_iter_ctx) {
+ RETURN_IF_ERROR(iter->advance());
+ DCHECK(!iter->valid());
+ }
+ }
+ return st;
+}
+
+Status VerticalMaskMergeIterator::init(const StorageReadOptions& opts) {
+ if (_origin_iters.empty()) {
+ return Status::OK();
+ }
+ _schema = &(*_origin_iters.begin())->schema();
+
+ for (auto iter : _origin_iters) {
+ auto ctx = std::make_unique<VerticalMergeIteratorContext>(iter, _ori_return_cols, -1, -1);
+ RETURN_IF_ERROR(ctx->init(opts));
+ if (!ctx->valid()) {
+ continue;
+ }
+ _origin_iter_ctx.emplace_back(ctx.release());
+ }
+
+ _origin_iters.clear();
+
+ _block_row_max = opts.block_row_max;
+ return Status::OK();
+}
+
+// interfaces to create vertical merge iterator
+std::shared_ptr<RowwiseIterator> new_vertical_heap_merge_iterator(
+ const std::vector<RowwiseIterator*>& inputs, size_t ori_return_cols, KeysType keys_type,
+ uint32_t seq_col_idx, RowSourcesBuffer* row_sources) {
+ return std::make_shared<VerticalHeapMergeIterator>(std::move(inputs), ori_return_cols,
+ keys_type, seq_col_idx, row_sources);
+}
+
+std::shared_ptr<RowwiseIterator> new_vertical_mask_merge_iterator(
+ const std::vector<RowwiseIterator*>& inputs, size_t ori_return_cols,
+ RowSourcesBuffer* row_sources) {
+ return std::make_shared<VerticalMaskMergeIterator>(std::move(inputs), ori_return_cols,
+ row_sources);
+}
+
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/olap/vertical_merge_iterator.h b/be/src/vec/olap/vertical_merge_iterator.h
new file mode 100644
index 0000000000..d8ce2b516c
--- /dev/null
+++ b/be/src/vec/olap/vertical_merge_iterator.h
@@ -0,0 +1,312 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "common/status.h"
+#include "olap/iterators.h"
+#include "olap/schema.h"
+#include "vec/columns/columns_number.h"
+#include "vec/core/block.h"
+
+#pragma once
+
+namespace doris {
+
+namespace vectorized {
+
+// Row source represent row location in multi-segments
+// use a uint16_t to store info
+// the lower 15 bits means segment_id in segment pool, and the higher 1 bits means agg flag.
+// In unique-key, agg flags means this key should be deleted, this comes from two way: old version
+// key or delete_sign.
+class RowSource {
+public:
+ RowSource(uint16_t data) : _data(data) {}
+ RowSource(uint16_t source_num, bool agg_flag);
+
+ uint16_t get_source_num();
+ bool agg_flag();
+ void set_agg_flag(bool agg_flag);
+ uint16_t data() const;
+
+private:
+ uint16_t _data;
+ static const uint16_t SOURCE_FLAG = 0x7FFF;
+ static const uint16_t AGG_FLAG = 0x8000;
+};
+
+/* rows source buffer
+this buffer should have a memory limit, once reach memory limit, write
+buffer data to tmp file.
+usage:
+ RowSourcesBuffer buffer(tablet_id, tablet_storage_path, reader_type);
+ buffer.append()
+ buffer.append()
+ buffer.flush()
+ buffer.seek_to_begin()
+ while (buffer.has_remaining().ok()) {
+ auto cur = buffer.current().get_source_num();
+ auto same = buffer.same_source_count(cur, limit);
+ // do copy block data
+ buffer.advance(same);
+ }
+*/
+class RowSourcesBuffer {
+public:
+ RowSourcesBuffer(int64_t tablet_id, const std::string& tablet_path, ReaderType reader_type)
+ : _tablet_id(tablet_id),
+ _tablet_path(tablet_path),
+ _reader_type(reader_type),
+ _buffer(ColumnUInt16::create()) {}
+
+ ~RowSourcesBuffer() {
+ _reset_buffer();
+ if (_fd > 0) {
+ ::close(_fd);
+ }
+ }
+
+ // write batch row source
+ Status append(const std::vector<RowSource>& row_sources);
+ Status flush();
+
+ RowSource current() {
+ DCHECK(_buf_idx < _buffer->size());
+ return RowSource(_buffer->get_element(_buf_idx));
+ }
+ void advance(int32_t step = 1) {
+ DCHECK(_buf_idx + step <= _buffer->size());
+ _buf_idx += step;
+ }
+
+ uint64_t buf_idx() { return _buf_idx; }
+ uint64_t total_size() { return _total_size; }
+ uint64_t buffered_size() { return _buffer->size(); }
+ void set_agg_flag(uint64_t index, bool agg);
+
+ Status has_remaining();
+
+ Status seek_to_begin();
+
+ size_t same_source_count(uint16_t source, size_t limit);
+
+private:
+ Status _create_buffer_file();
+ Status _serialize();
+ Status _deserialize();
+ void _reset_buffer() {
+ _buffer->clear();
+ _buf_idx = 0;
+ }
+
+private:
+ int64_t _tablet_id;
+ std::string _tablet_path;
+ ReaderType _reader_type;
+ uint64_t _buf_idx = 0;
+ int _fd = -1;
+ ColumnUInt16::MutablePtr _buffer;
+ uint64_t _total_size = 0;
+};
+
+// --------------- VerticalMergeIteratorContext ------------- //
+// takes ownership of rowwise iterator
+class VerticalMergeIteratorContext {
+public:
+ VerticalMergeIteratorContext(RowwiseIterator* iter, size_t ori_return_cols, uint32_t order,
+ uint32_t seq_col_idx)
+ : _iter(iter),
+ _ori_return_cols(ori_return_cols),
+ _order(order),
+ _seq_col_idx(seq_col_idx),
+ _num_key_columns(iter->schema().num_key_columns()) {}
+
+ VerticalMergeIteratorContext(const VerticalMergeIteratorContext&) = delete;
+ VerticalMergeIteratorContext(VerticalMergeIteratorContext&&) = delete;
+ VerticalMergeIteratorContext& operator=(const VerticalMergeIteratorContext&) = delete;
+ VerticalMergeIteratorContext& operator=(VerticalMergeIteratorContext&&) = delete;
+
+ ~VerticalMergeIteratorContext() {
+ delete _iter;
+ _iter = nullptr;
+ }
+ Status block_reset(const std::shared_ptr<Block>& block);
+ Status init(const StorageReadOptions& opts);
+ bool compare(const VerticalMergeIteratorContext& rhs) const;
+ void copy_rows(Block* block, bool advanced = true);
+ void copy_rows(Block* block, size_t count);
+
+ Status advance();
+
+ // Return if it has remaining data in this context.
+ // Only when this function return true, current_row()
+ // will return a valid row
+ bool valid() const { return _valid; }
+
+ uint32_t order() const { return _order; }
+
+ void set_is_same(bool is_same) const { _is_same = is_same; }
+
+ bool is_same() { return _is_same; }
+
+ void add_cur_batch() { _cur_batch_num++; }
+
+ bool is_cur_block_finished() { return _index_in_block == _block->rows() - 1; }
+
+ size_t remain_rows() { return _block->rows() - _index_in_block; }
+
+ bool is_first_row() { return _is_first_row; }
+ void set_is_first_row(bool is_first_row) { _is_first_row = is_first_row; }
+ void set_cur_row_ref(vectorized::IteratorRowRef* ref) {
+ ref->block = _block;
+ ref->row_pos = _index_in_block;
+ }
+
+private:
+ // Load next block into _block
+ Status _load_next_block();
+
+ RowwiseIterator* _iter;
+ size_t _ori_return_cols = 0;
+
+ // segment order, used to compare key
+ uint32_t _order = -1;
+
+ uint32_t _seq_col_idx = -1;
+
+ bool _valid = false;
+ mutable bool _is_same = false;
+ size_t _index_in_block = -1;
+ size_t _block_row_max = 0;
+ int _num_key_columns;
+ size_t _cur_batch_num = 0;
+
+ // used to store data load from iterator->next_batch(Block*)
+ std::shared_ptr<Block> _block;
+ // used to store data still on block view
+ std::list<std::shared_ptr<Block>> _block_list;
+ // use to identify whether it's first block load from RowwiseIterator
+ bool _is_first_row = true;
+};
+
+// --------------- VerticalHeapMergeIterator ------------- //
+class VerticalHeapMergeIterator : public RowwiseIterator {
+public:
+ // VerticalMergeIterator takes the ownership of input iterators
+ VerticalHeapMergeIterator(std::vector<RowwiseIterator*> iters, size_t ori_return_cols,
+ KeysType keys_type, int32_t seq_col_idx,
+ RowSourcesBuffer* row_sources_buf)
+ : _origin_iters(std::move(iters)),
+ _ori_return_cols(ori_return_cols),
+ _keys_type(keys_type),
+ _seq_col_idx(seq_col_idx),
+ _row_sources_buf(row_sources_buf) {}
+
+ ~VerticalHeapMergeIterator() override {
+ while (!_merge_heap.empty()) {
+ auto ctx = _merge_heap.top();
+ _merge_heap.pop();
+ delete ctx;
+ }
+ }
+
+ Status init(const StorageReadOptions& opts) override;
+ Status next_batch(Block* block) override;
+ const Schema& schema() const override { return *_schema; }
+ uint64_t merged_rows() const override { return _merged_rows; }
+
+private:
+ int _get_size(Block* block) { return block->rows(); }
+
+private:
+ // It will be released after '_merge_heap' has been built.
+ std::vector<RowwiseIterator*> _origin_iters;
+ size_t _ori_return_cols;
+
+ const Schema* _schema = nullptr;
+
+ struct VerticalMergeContextComparator {
+ bool operator()(const VerticalMergeIteratorContext* lhs,
+ const VerticalMergeIteratorContext* rhs) const {
+ return lhs->compare(*rhs);
+ }
+ };
+
+ using VMergeHeap = std::priority_queue<VerticalMergeIteratorContext*,
+ std::vector<VerticalMergeIteratorContext*>,
+ VerticalMergeContextComparator>;
+
+ VMergeHeap _merge_heap;
+ int _block_row_max = 0;
+ KeysType _keys_type;
+ int32_t _seq_col_idx = -1;
+ RowSourcesBuffer* _row_sources_buf;
+ uint32_t _merged_rows = 0;
+};
+
+// --------------- VerticalMaskMergeIterator ------------- //
+class VerticalMaskMergeIterator : public RowwiseIterator {
+public:
+ // VerticalMaskMergeIterator takes the ownership of input iterators
+ VerticalMaskMergeIterator(std::vector<RowwiseIterator*> iters, size_t ori_return_cols,
+ RowSourcesBuffer* row_sources_buf)
+ : _origin_iters(std::move(iters)),
+ _ori_return_cols(ori_return_cols),
+ _row_sources_buf(row_sources_buf) {}
+
+ ~VerticalMaskMergeIterator() override {
+ for (auto iter : _origin_iter_ctx) {
+ delete iter;
+ }
+ }
+
+ Status init(const StorageReadOptions& opts) override;
+
+ Status next_batch(Block* block) override;
+
+ const Schema& schema() const override { return *_schema; }
+
+ Status next_row(IteratorRowRef* ref) override;
+
+ Status unique_key_next_row(IteratorRowRef* ref) override;
+
+private:
+ int _get_size(Block* block) { return block->rows(); }
+
+private:
+ // released after build ctx
+ std::vector<RowwiseIterator*> _origin_iters;
+ size_t _ori_return_cols = 0;
+
+ std::vector<VerticalMergeIteratorContext*> _origin_iter_ctx;
+
+ const Schema* _schema = nullptr;
+
+ int _block_row_max = 0;
+ RowSourcesBuffer* _row_sources_buf;
+};
+
+// segment merge iterator
+std::shared_ptr<RowwiseIterator> new_vertical_heap_merge_iterator(
+ const std::vector<RowwiseIterator*>& inputs, size_t _ori_return_cols, KeysType key_type,
+ uint32_t seq_col_idx, RowSourcesBuffer* row_sources_buf);
+
+std::shared_ptr<RowwiseIterator> new_vertical_mask_merge_iterator(
+ const std::vector<RowwiseIterator*>& inputs, size_t ori_return_cols,
+ RowSourcesBuffer* row_sources_buf);
+
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/olap/vgeneric_iterators.cpp b/be/src/vec/olap/vgeneric_iterators.cpp
index 20b4479e2e..5939c05d8d 100644
--- a/be/src/vec/olap/vgeneric_iterators.cpp
+++ b/be/src/vec/olap/vgeneric_iterators.cpp
@@ -250,7 +250,6 @@ Status VAutoIncrementIterator::init(const StorageReadOptions& opts) {
Status VMergeIteratorContext::init(const StorageReadOptions& opts) {
_block_row_max = opts.block_row_max;
_record_rowids = opts.record_rowids;
- RETURN_IF_ERROR(_iter->init(opts));
RETURN_IF_ERROR(_load_next_block());
if (valid()) {
RETURN_IF_ERROR(advance());
diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt
index ec4a252872..0d6a6e6332 100644
--- a/be/test/CMakeLists.txt
+++ b/be/test/CMakeLists.txt
@@ -204,6 +204,7 @@ set(OLAP_TEST_FILES
olap/rowid_conversion_test.cpp
olap/remote_rowset_gc_test.cpp
olap/segcompaction_test.cpp
+ olap/ordered_data_compaction_test.cpp
)
set(RUNTIME_TEST_FILES
@@ -375,6 +376,7 @@ set(VEC_TEST_FILES
vec/runtime/vdatetime_value_test.cpp
vec/utils/arrow_column_to_doris_column_test.cpp
vec/olap/char_type_padding_test.cpp
+ vec/olap/vertical_compaction_test.cpp
)
add_executable(doris_be_test
${AGENT_TEST_FILES}
diff --git a/be/test/io/cache/remote_file_cache_test.cpp b/be/test/io/cache/remote_file_cache_test.cpp
index 5de0a5f955..813c8807f5 100644
--- a/be/test/io/cache/remote_file_cache_test.cpp
+++ b/be/test/io/cache/remote_file_cache_test.cpp
@@ -139,8 +139,8 @@ protected:
EXPECT_TRUE(st.ok());
EXPECT_TRUE(file_writer->close().ok());
- EXPECT_EQ("", writer.min_encoded_key().to_string());
- EXPECT_EQ("", writer.max_encoded_key().to_string());
+ EXPECT_NE("", writer.min_encoded_key().to_string());
+ EXPECT_NE("", writer.max_encoded_key().to_string());
st = segment_v2::Segment::open(fs, path, "", 0, {}, query_schema, res);
EXPECT_TRUE(st.ok());
diff --git a/be/test/olap/rowid_conversion_test.cpp b/be/test/olap/ordered_data_compaction_test.cpp
similarity index 52%
copy from be/test/olap/rowid_conversion_test.cpp
copy to be/test/olap/ordered_data_compaction_test.cpp
index 27b43fec3c..06b9ecf2ad 100644
--- a/be/test/olap/rowid_conversion_test.cpp
+++ b/be/test/olap/ordered_data_compaction_test.cpp
@@ -1,3 +1,4 @@
+
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
@@ -15,15 +16,12 @@
// specific language governing permissions and limitations
// under the License.
-#include "olap/rowid_conversion.h"
-
#include <gtest/gtest.h>
-#include "common/logging.h"
-#include "olap/data_dir.h"
-#include "olap/delete_handler.h"
+#include <vector>
+
+#include "olap/cumulative_compaction.h"
#include "olap/merger.h"
-#include "olap/row_cursor.h"
#include "olap/rowset/beta_rowset.h"
#include "olap/rowset/rowset.h"
#include "olap/rowset/rowset_factory.h"
@@ -35,13 +33,16 @@
#include "olap/tablet_schema.h"
#include "olap/tablet_schema_helper.h"
#include "util/file_utils.h"
+#include "vec/olap/vertical_block_reader.h"
+#include "vec/olap/vertical_merge_iterator.h"
namespace doris {
+namespace vectorized {
static const uint32_t MAX_PATH_LEN = 1024;
static StorageEngine* k_engine = nullptr;
-class TestRowIdConversion : public testing::TestWithParam<std::tuple<KeysType, bool, bool>> {
+class OrderedDataCompactionTest : public ::testing::Test {
protected:
void SetUp() override {
char buffer[MAX_PATH_LEN];
@@ -58,8 +59,10 @@ protected:
doris::EngineOptions options;
k_engine = new StorageEngine(options);
StorageEngine::_s_instance = k_engine;
- }
+ config::enable_ordered_data_compaction = true;
+ config::ordered_data_compaction_min_segment_size = 10;
+ }
void TearDown() override {
if (FileUtils::check_exist(absolute_dir)) {
EXPECT_TRUE(FileUtils::remove_all(absolute_dir).ok());
@@ -75,7 +78,7 @@ protected:
TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
TabletSchemaPB tablet_schema_pb;
tablet_schema_pb.set_keys_type(keys_type);
- tablet_schema_pb.set_num_short_key_columns(2);
+ tablet_schema_pb.set_num_short_key_columns(1);
tablet_schema_pb.set_num_rows_per_row_block(1024);
tablet_schema_pb.set_compress_kind(COMPRESS_NONE);
tablet_schema_pb.set_next_column_unique_id(4);
@@ -119,11 +122,46 @@ protected:
return tablet_schema;
}
- void create_rowset_writer_context(TabletSchemaSPtr tablet_schema,
+ TabletSchemaSPtr create_agg_schema() {
+ TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+ TabletSchemaPB tablet_schema_pb;
+ tablet_schema_pb.set_keys_type(KeysType::AGG_KEYS);
+ tablet_schema_pb.set_num_short_key_columns(1);
+ tablet_schema_pb.set_num_rows_per_row_block(1024);
+ tablet_schema_pb.set_compress_kind(COMPRESS_NONE);
+ tablet_schema_pb.set_next_column_unique_id(4);
+
+ ColumnPB* column_1 = tablet_schema_pb.add_column();
+ column_1->set_unique_id(1);
+ column_1->set_name("c1");
+ column_1->set_type("INT");
+ column_1->set_is_key(true);
+ column_1->set_length(4);
+ column_1->set_index_length(4);
+ column_1->set_is_nullable(false);
+ column_1->set_is_bf_column(false);
+
+ ColumnPB* column_2 = tablet_schema_pb.add_column();
+ column_2->set_unique_id(2);
+ column_2->set_name("c2");
+ column_2->set_type("INT");
+ column_2->set_length(4);
+ column_2->set_index_length(4);
+ column_2->set_is_nullable(true);
+ column_2->set_is_key(false);
+ column_2->set_is_nullable(false);
+ column_2->set_is_bf_column(false);
+ column_2->set_aggregation("SUM");
+
+ tablet_schema->init_from_pb(tablet_schema_pb);
+ return tablet_schema;
+ }
+
+ void create_rowset_writer_context(TabletSchemaSPtr tablet_schema, const std::string& rowset_dir,
const SegmentsOverlapPB& overlap,
uint32_t max_rows_per_segment,
RowsetWriterContext* rowset_writer_context) {
- static int64_t inc_id = 0;
+ static int64_t inc_id = 1000;
RowsetId rowset_id;
rowset_id.init(inc_id);
rowset_writer_context->rowset_id = rowset_id;
@@ -131,7 +169,7 @@ protected:
rowset_writer_context->data_dir = _data_dir.get();
rowset_writer_context->rowset_state = VISIBLE;
rowset_writer_context->tablet_schema = tablet_schema;
- rowset_writer_context->rowset_dir = "tablet_path";
+ rowset_writer_context->rowset_dir = rowset_dir;
rowset_writer_context->version = Version(inc_id, inc_id);
rowset_writer_context->segments_overlap = overlap;
rowset_writer_context->max_rows_per_segment = max_rows_per_segment;
@@ -149,7 +187,8 @@ protected:
}
RowsetSharedPtr create_rowset(
- TabletSchemaSPtr tablet_schema, const SegmentsOverlapPB& overlap,
+ TabletSchemaSPtr tablet_schema, TabletSharedPtr tablet,
+ const SegmentsOverlapPB& overlap,
std::vector<std::vector<std::tuple<int64_t, int64_t>>> rowset_data) {
RowsetWriterContext writer_context;
if (overlap == NONOVERLAPPING) {
@@ -161,10 +200,11 @@ protected:
EXPECT_LT(last_seg_max, cur_seg_min);
}
}
- create_rowset_writer_context(tablet_schema, overlap, UINT32_MAX, &writer_context);
+ create_rowset_writer_context(tablet_schema, tablet->tablet_path(), overlap, UINT32_MAX,
+ &writer_context);
std::unique_ptr<RowsetWriter> rowset_writer;
- Status s = RowsetFactory::create_rowset_writer(writer_context, &rowset_writer);
+ Status s = RowsetFactory::create_rowset_writer(writer_context, true, &rowset_writer);
EXPECT_TRUE(s.ok());
RowCursor input_row;
@@ -200,10 +240,10 @@ protected:
void init_rs_meta(RowsetMetaSharedPtr& pb1, int64_t start, int64_t end) {
std::string json_rowset_meta = R"({
- "rowset_id": 540081,
- "tablet_id": 15673,
- "txn_id": 4042,
- "tablet_schema_hash": 567997577,
+ "rowset_id": 540085,
+ "tablet_id": 15674,
+ "txn_id": 4045,
+ "tablet_schema_hash": 567997588,
"rowset_type": "BETA_ROWSET",
"rowset_state": "VISIBLE",
"start_version": 2,
@@ -288,15 +328,17 @@ protected:
t_tablet_schema.__set_keys_type(TKeysType::UNIQUE_KEYS);
} else if (tablet_schema.keys_type() == DUP_KEYS) {
t_tablet_schema.__set_keys_type(TKeysType::DUP_KEYS);
+ } else if (tablet_schema.keys_type() == AGG_KEYS) {
+ t_tablet_schema.__set_keys_type(TKeysType::AGG_KEYS);
}
t_tablet_schema.__set_storage_type(TStorageType::COLUMN);
t_tablet_schema.__set_columns(cols);
TabletMetaSharedPtr tablet_meta(
- new TabletMeta(1, 1, 1, 1, 1, 1, t_tablet_schema, 1, col_ordinal_to_unique_id,
+ new TabletMeta(2, 2, 2, 2, 2, 2, t_tablet_schema, 2, col_ordinal_to_unique_id,
UniqueId(1, 2), TTabletType::TABLET_TYPE_DISK,
TCompressionType::LZ4F, "", enable_unique_key_merge_on_write));
- TabletSharedPtr tablet(new Tablet(tablet_meta, nullptr));
+ TabletSharedPtr tablet(new Tablet(tablet_meta, _data_dir.get()));
tablet->init();
if (has_delete_handler) {
// delete data with key < 1000
@@ -305,7 +347,7 @@ protected:
condition.column_name = tablet_schema.column(0).name();
condition.condition_op = "<";
condition.condition_values.clear();
- condition.condition_values.push_back("1000");
+ condition.condition_values.push_back("100");
conditions.push_back(condition);
DeletePredicatePB del_pred;
@@ -317,167 +359,19 @@ protected:
return tablet;
}
- void block_create(TabletSchemaSPtr tablet_schema, vectorized::Block* block) {
- block->clear();
- Schema schema(tablet_schema);
- const auto& column_ids = schema.column_ids();
- for (size_t i = 0; i < schema.num_column_ids(); ++i) {
- auto column_desc = schema.column(column_ids[i]);
- auto data_type = Schema::get_data_type_ptr(*column_desc);
- EXPECT_TRUE(data_type != nullptr);
- auto column = data_type->create_column();
- block->insert(vectorized::ColumnWithTypeAndName(std::move(column), data_type,
- column_desc->name()));
- }
- }
-
- void check_rowid_conversion(KeysType keys_type, bool enable_unique_key_merge_on_write,
- uint32_t num_input_rowset, uint32_t num_segments,
- uint32_t rows_per_segment, const SegmentsOverlapPB& overlap,
- bool has_delete_handler) {
- // generate input data
- std::vector<std::vector<std::vector<std::tuple<int64_t, int64_t>>>> input_data;
- generate_input_data(num_input_rowset, num_segments, rows_per_segment, overlap, input_data);
-
- TabletSchemaSPtr tablet_schema = create_schema(keys_type);
- // create input rowset
- vector<RowsetSharedPtr> input_rowsets;
- SegmentsOverlapPB new_overlap = overlap;
- for (auto i = 0; i < num_input_rowset; i++) {
- if (overlap == OVERLAP_UNKNOWN) {
- if (i == 0) {
- new_overlap = NONOVERLAPPING;
- } else {
- new_overlap = OVERLAPPING;
- }
- }
- RowsetSharedPtr rowset = create_rowset(tablet_schema, new_overlap, input_data[i]);
- input_rowsets.push_back(rowset);
- }
-
- // create input rowset reader
- vector<RowsetReaderSharedPtr> input_rs_readers;
- for (auto& rowset : input_rowsets) {
- RowsetReaderSharedPtr rs_reader;
- EXPECT_TRUE(rowset->create_reader(&rs_reader).ok());
- input_rs_readers.push_back(std::move(rs_reader));
- }
-
- // create output rowset writer
- RowsetWriterContext writer_context;
- create_rowset_writer_context(tablet_schema, NONOVERLAPPING, 3456, &writer_context);
- std::unique_ptr<RowsetWriter> output_rs_writer;
- Status s = RowsetFactory::create_rowset_writer(writer_context, &output_rs_writer);
- EXPECT_TRUE(s.ok());
-
- // merge input rowset
- TabletSharedPtr tablet =
- create_tablet(*tablet_schema, enable_unique_key_merge_on_write,
- output_rs_writer->version().first - 1, has_delete_handler);
- Merger::Statistics stats;
- RowIdConversion rowid_conversion;
- stats.rowid_conversion = &rowid_conversion;
- s = Merger::vmerge_rowsets(tablet, READER_BASE_COMPACTION, tablet_schema, input_rs_readers,
- output_rs_writer.get(), &stats);
- EXPECT_TRUE(s.ok());
- RowsetSharedPtr out_rowset = output_rs_writer->build();
-
- // create output rowset reader
- RowsetReaderContext reader_context;
- reader_context.tablet_schema = tablet_schema;
- reader_context.need_ordered_result = false;
- std::vector<uint32_t> return_columns = {0, 1};
- reader_context.return_columns = &return_columns;
- reader_context.is_vec = true;
- RowsetReaderSharedPtr output_rs_reader;
- create_and_init_rowset_reader(out_rowset.get(), reader_context, &output_rs_reader);
-
- // read output rowset data
- vectorized::Block output_block;
- std::vector<std::tuple<int64_t, int64_t>> output_data;
- do {
- block_create(tablet_schema, &output_block);
- s = output_rs_reader->next_block(&output_block);
- auto columns = output_block.get_columns_with_type_and_name();
- EXPECT_EQ(columns.size(), 2);
- for (auto i = 0; i < output_block.rows(); i++) {
- output_data.emplace_back(columns[0].column->get_int(i),
- columns[1].column->get_int(i));
- }
- } while (s == Status::OK());
- EXPECT_EQ(Status::OLAPInternalError(OLAP_ERR_DATA_EOF), s);
- EXPECT_EQ(out_rowset->rowset_meta()->num_rows(), output_data.size());
- std::vector<uint32_t> segment_num_rows;
- EXPECT_TRUE(output_rs_reader->get_segment_num_rows(&segment_num_rows).ok());
- if (has_delete_handler) {
- // All keys less than 1000 are deleted by delete handler
- for (auto& item : output_data) {
- EXPECT_GE(std::get<0>(item), 1000);
- }
- }
-
- // check rowid conversion
- uint64_t count = 0;
- for (auto rs_id = 0; rs_id < input_data.size(); rs_id++) {
- for (auto s_id = 0; s_id < input_data[rs_id].size(); s_id++) {
- for (auto row_id = 0; row_id < input_data[rs_id][s_id].size(); row_id++) {
- RowLocation src(input_rowsets[rs_id]->rowset_id(), s_id, row_id);
- RowLocation dst;
- int res = rowid_conversion.get(src, &dst);
- if (res < 0) {
- continue;
- }
- size_t rowid_in_output_data = dst.row_id;
- for (auto n = 1; n <= dst.segment_id; n++) {
- rowid_in_output_data += segment_num_rows[n - 1];
- }
- EXPECT_EQ(std::get<0>(output_data[rowid_in_output_data]),
- std::get<0>(input_data[rs_id][s_id][row_id]));
- EXPECT_EQ(std::get<1>(output_data[rowid_in_output_data]),
- std::get<1>(input_data[rs_id][s_id][row_id]));
- count++;
- }
- }
- }
- EXPECT_EQ(count, output_data.size());
- }
- // if overlap == NONOVERLAPPING, all rowsets are non overlapping;
- // if overlap == OVERLAPPING, all rowsets are overlapping;
- // if overlap == OVERLAP_UNKNOWN, the first rowset is non overlapping, the
- // others are overlaping.
+ // all rowset's data are non overlappint
void generate_input_data(
uint32_t num_input_rowset, uint32_t num_segments, uint32_t rows_per_segment,
- const SegmentsOverlapPB& overlap,
std::vector<std::vector<std::vector<std::tuple<int64_t, int64_t>>>>& input_data) {
- EXPECT_GE(rows_per_segment, 10);
- EXPECT_GE(num_segments * rows_per_segment, 500);
- bool is_overlap = false;
+ static int data = 0;
for (auto i = 0; i < num_input_rowset; i++) {
- if (overlap == OVERLAPPING) {
- is_overlap = true;
- } else if (overlap == NONOVERLAPPING) {
- is_overlap = false;
- } else {
- if (i == 0) {
- is_overlap = false;
- } else {
- is_overlap = true;
- }
- }
std::vector<std::vector<std::tuple<int64_t, int64_t>>> rowset_data;
for (auto j = 0; j < num_segments; j++) {
std::vector<std::tuple<int64_t, int64_t>> segment_data;
for (auto n = 0; n < rows_per_segment; n++) {
- int64_t c1 = j * rows_per_segment + n;
- // There are 500 rows of data overlap between rowsets
- if (i > 0) {
- c1 += i * num_segments * rows_per_segment - 500;
- }
- if (is_overlap && j > 0) {
- // There are 10 rows of data overlap between segments
- c1 += j * rows_per_segment - 10;
- }
- int64_t c2 = c1 + 1;
+ int64_t c1 = data;
+ int64_t c2 = data + 1;
+ ++data;
segment_data.emplace_back(c1, c2);
}
rowset_data.emplace_back(segment_data);
@@ -486,136 +380,110 @@ protected:
}
}
+ void block_create(TabletSchemaSPtr tablet_schema, vectorized::Block* block) {
+ block->clear();
+ Schema schema(tablet_schema);
+ const auto& column_ids = schema.column_ids();
+ for (size_t i = 0; i < schema.num_column_ids(); ++i) {
+ auto column_desc = schema.column(column_ids[i]);
+ auto data_type = Schema::get_data_type_ptr(*column_desc);
+ EXPECT_TRUE(data_type != nullptr);
+ auto column = data_type->create_column();
+ block->insert(vectorized::ColumnWithTypeAndName(std::move(column), data_type,
+ column_desc->name()));
+ }
+ }
+
private:
- const std::string kTestDir = "/ut_dir/rowid_conversion_test";
+ const std::string kTestDir = "/ut_dir/vertical_compaction_test";
string absolute_dir;
std::unique_ptr<DataDir> _data_dir;
};
-TEST_F(TestRowIdConversion, Basic) {
- // rowset_id, segment_id, row_id
- int input_data[11][3] = {{0, 0, 0}, {0, 0, 1}, {0, 0, 2}, {0, 0, 3}, {0, 1, 0}, {0, 1, 1},
- {0, 1, 2}, {1, 0, 0}, {1, 0, 1}, {1, 0, 2}, {1, 0, 3}};
-
- RowsetId src_rowset;
- RowsetId dst_rowset;
- dst_rowset.init(3);
+TEST_F(OrderedDataCompactionTest, test_01) {
+ auto num_input_rowset = 5;
+ auto num_segments = 2;
+ auto rows_per_segment = 100;
+ std::vector<std::vector<std::vector<std::tuple<int64_t, int64_t>>>> input_data;
+ generate_input_data(num_input_rowset, num_segments, rows_per_segment, input_data);
+ for (auto rs_id = 0; rs_id < input_data.size(); rs_id++) {
+ for (auto s_id = 0; s_id < input_data[rs_id].size(); s_id++) {
+ for (auto row_id = 0; row_id < input_data[rs_id][s_id].size(); row_id++) {
+ LOG(INFO) << "input data: " << std::get<0>(input_data[rs_id][s_id][row_id]) << " "
+ << std::get<1>(input_data[rs_id][s_id][row_id]);
+ }
+ }
+ }
- std::vector<RowLocation> rss_row_ids;
- for (auto i = 0; i < 11; i++) {
- src_rowset.init(input_data[i][0]);
- RowLocation rss_row_id(src_rowset, input_data[i][1], input_data[i][2]);
- rss_row_ids.push_back(rss_row_id);
+ TabletSchemaSPtr tablet_schema = create_schema();
+ TabletSharedPtr tablet = create_tablet(*tablet_schema, false, 10000, false);
+ EXPECT_TRUE(FileUtils::create_dir(tablet->tablet_path()).ok());
+ // create input rowset
+ vector<RowsetSharedPtr> input_rowsets;
+ SegmentsOverlapPB new_overlap = NONOVERLAPPING;
+ for (auto i = 0; i < num_input_rowset; i++) {
+ RowsetSharedPtr rowset = create_rowset(tablet_schema, tablet, new_overlap, input_data[i]);
+ input_rowsets.push_back(rowset);
+ }
+ //auto end_version = input_rowsets.back()->end_version();
+ CumulativeCompaction cu_compaction(tablet);
+ cu_compaction.set_input_rowset(input_rowsets);
+ EXPECT_EQ(cu_compaction.handle_ordered_data_compaction(), true);
+ for (int i = 0; i < 100; ++i) {
+ LOG(INFO) << "stop";
}
- RowIdConversion rowid_conversion;
- src_rowset.init(0);
- std::vector<uint32_t> rs0_segment_num_rows = {4, 3};
- rowid_conversion.init_segment_map(src_rowset, rs0_segment_num_rows);
- src_rowset.init(1);
- std::vector<uint32_t> rs1_segment_num_rows = {4};
- rowid_conversion.init_segment_map(src_rowset, rs1_segment_num_rows);
- rowid_conversion.set_dst_rowset_id(dst_rowset);
-
- std::vector<uint32_t> dst_segment_num_rows = {4, 3, 4};
- rowid_conversion.add(rss_row_ids, dst_segment_num_rows);
-
- int res = 0;
- src_rowset.init(0);
- RowLocation src0(src_rowset, 0, 0);
- RowLocation dst0;
- res = rowid_conversion.get(src0, &dst0);
-
- EXPECT_EQ(dst0.rowset_id, dst_rowset);
- EXPECT_EQ(dst0.segment_id, 0);
- EXPECT_EQ(dst0.row_id, 0);
- EXPECT_EQ(res, 0);
-
- src_rowset.init(0);
- RowLocation src1(src_rowset, 1, 2);
- RowLocation dst1;
- res = rowid_conversion.get(src1, &dst1);
-
- EXPECT_EQ(dst1.rowset_id, dst_rowset);
- EXPECT_EQ(dst1.segment_id, 1);
- EXPECT_EQ(dst1.row_id, 2);
- EXPECT_EQ(res, 0);
-
- src_rowset.init(1);
- RowLocation src2(src_rowset, 0, 3);
- RowLocation dst2;
- res = rowid_conversion.get(src2, &dst2);
-
- EXPECT_EQ(dst2.rowset_id, dst_rowset);
- EXPECT_EQ(dst2.segment_id, 2);
- EXPECT_EQ(dst2.row_id, 3);
- EXPECT_EQ(res, 0);
-
- src_rowset.init(1);
- RowLocation src3(src_rowset, 0, 4);
- RowLocation dst3;
- res = rowid_conversion.get(src3, &dst3);
- EXPECT_EQ(res, -1);
-
- src_rowset.init(100);
- RowLocation src4(src_rowset, 5, 4);
- RowLocation dst4;
- res = rowid_conversion.get(src4, &dst4);
- EXPECT_EQ(res, -1);
-}
-INSTANTIATE_TEST_SUITE_P(
- Parameters, TestRowIdConversion,
- ::testing::ValuesIn(std::vector<std::tuple<KeysType, bool, bool>> {
- // Parameters: data_type, enable_unique_key_merge_on_write, has_delete_handler
- {DUP_KEYS, false, false},
- {UNIQUE_KEYS, false, false},
- {UNIQUE_KEYS, true, false},
- {DUP_KEYS, false, true},
- {UNIQUE_KEYS, false, true},
- {UNIQUE_KEYS, true, true}}));
-
-TEST_P(TestRowIdConversion, Conversion) {
- KeysType keys_type = std::get<0>(GetParam());
- bool enable_unique_key_merge_on_write = std::get<1>(GetParam());
- bool has_delete_handler = std::get<2>(GetParam());
-
- // if num_input_rowset = 2, VCollectIterator::Level1Iterator::_merge = flase
- // if num_input_rowset = 3, VCollectIterator::Level1Iterator::_merge = true
- for (auto num_input_rowset = 2; num_input_rowset <= 3; num_input_rowset++) {
- uint32_t rows_per_segment = 4567;
- // RowsetReader: SegmentIterator
- {
- uint32_t num_segments = 1;
- SegmentsOverlapPB overlap = NONOVERLAPPING;
- std::vector<std::vector<std::vector<std::tuple<int64_t, int64_t>>>> input_data;
- check_rowid_conversion(keys_type, enable_unique_key_merge_on_write, num_input_rowset,
- num_segments, rows_per_segment, overlap, has_delete_handler);
- }
- // RowsetReader: VMergeIterator
- {
- uint32_t num_segments = 2;
- SegmentsOverlapPB overlap = OVERLAPPING;
- std::vector<std::vector<std::vector<std::tuple<int64_t, int64_t>>>> input_data;
- check_rowid_conversion(keys_type, enable_unique_key_merge_on_write, num_input_rowset,
- num_segments, rows_per_segment, overlap, has_delete_handler);
+ RowsetSharedPtr out_rowset = cu_compaction.output_rowset();
+
+ // create output rowset reader
+ RowsetReaderContext reader_context;
+ reader_context.tablet_schema = tablet_schema;
+ reader_context.need_ordered_result = false;
+ std::vector<uint32_t> return_columns = {0, 1};
+ reader_context.return_columns = &return_columns;
+ reader_context.is_vec = true;
+ RowsetReaderSharedPtr output_rs_reader;
+ LOG(INFO) << "create rowset reader in test";
+ create_and_init_rowset_reader(out_rowset.get(), reader_context, &output_rs_reader);
+
+ // read output rowset data
+ vectorized::Block output_block;
+ std::vector<std::tuple<int64_t, int64_t>> output_data;
+ Status s = Status::OK();
+ do {
+ block_create(tablet_schema, &output_block);
+ s = output_rs_reader->next_block(&output_block);
+ auto columns = output_block.get_columns_with_type_and_name();
+ EXPECT_EQ(columns.size(), 2);
+ for (auto i = 0; i < output_block.rows(); i++) {
+ output_data.emplace_back(columns[0].column->get_int(i), columns[1].column->get_int(i));
}
- // RowsetReader: VUnionIterator
- {
- uint32_t num_segments = 2;
- SegmentsOverlapPB overlap = NONOVERLAPPING;
- std::vector<std::vector<std::vector<std::tuple<int64_t, int64_t>>>> input_data;
- check_rowid_conversion(keys_type, enable_unique_key_merge_on_write, num_input_rowset,
- num_segments, rows_per_segment, overlap, has_delete_handler);
- }
- // RowsetReader: VUnionIterator + VMergeIterator
- {
- uint32_t num_segments = 2;
- SegmentsOverlapPB overlap = OVERLAP_UNKNOWN;
- std::vector<std::vector<std::vector<std::tuple<int64_t, int64_t>>>> input_data;
- check_rowid_conversion(keys_type, enable_unique_key_merge_on_write, num_input_rowset,
- num_segments, rows_per_segment, overlap, has_delete_handler);
+ } while (s == Status::OK());
+ EXPECT_EQ(Status::OLAPInternalError(OLAP_ERR_DATA_EOF), s);
+ EXPECT_EQ(out_rowset->rowset_meta()->num_rows(), output_data.size());
+ EXPECT_EQ(output_data.size(), num_input_rowset * num_segments * rows_per_segment);
+ std::vector<uint32_t> segment_num_rows;
+ EXPECT_TRUE(output_rs_reader->get_segment_num_rows(&segment_num_rows).ok());
+ // check vertical compaction result
+ for (auto id = 0; id < output_data.size(); id++) {
+ LOG(INFO) << "output data: " << std::get<0>(output_data[id]) << " "
+ << std::get<1>(output_data[id]);
+ }
+ int dst_id = 0;
+ for (auto rs_id = 0; rs_id < input_data.size(); rs_id++) {
+ for (auto s_id = 0; s_id < input_data[rs_id].size(); s_id++) {
+ for (auto row_id = 0; row_id < input_data[rs_id][s_id].size(); row_id++) {
+ LOG(INFO) << "input data: " << std::get<0>(input_data[rs_id][s_id][row_id]) << " "
+ << std::get<1>(input_data[rs_id][s_id][row_id]);
+ EXPECT_EQ(std::get<0>(input_data[rs_id][s_id][row_id]),
+ std::get<0>(output_data[dst_id]));
+ EXPECT_EQ(std::get<1>(input_data[rs_id][s_id][row_id]),
+ std::get<1>(output_data[dst_id]));
+ dst_id++;
+ }
}
}
}
-} // namespace doris
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/be/test/olap/rowid_conversion_test.cpp b/be/test/olap/rowid_conversion_test.cpp
index 27b43fec3c..a08f17f4c4 100644
--- a/be/test/olap/rowid_conversion_test.cpp
+++ b/be/test/olap/rowid_conversion_test.cpp
@@ -75,7 +75,7 @@ protected:
TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
TabletSchemaPB tablet_schema_pb;
tablet_schema_pb.set_keys_type(keys_type);
- tablet_schema_pb.set_num_short_key_columns(2);
+ tablet_schema_pb.set_num_short_key_columns(1);
tablet_schema_pb.set_num_rows_per_row_block(1024);
tablet_schema_pb.set_compress_kind(COMPRESS_NONE);
tablet_schema_pb.set_next_column_unique_id(4);
@@ -164,7 +164,7 @@ protected:
create_rowset_writer_context(tablet_schema, overlap, UINT32_MAX, &writer_context);
std::unique_ptr<RowsetWriter> rowset_writer;
- Status s = RowsetFactory::create_rowset_writer(writer_context, &rowset_writer);
+ Status s = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer);
EXPECT_TRUE(s.ok());
RowCursor input_row;
@@ -367,7 +367,7 @@ protected:
RowsetWriterContext writer_context;
create_rowset_writer_context(tablet_schema, NONOVERLAPPING, 3456, &writer_context);
std::unique_ptr<RowsetWriter> output_rs_writer;
- Status s = RowsetFactory::create_rowset_writer(writer_context, &output_rs_writer);
+ Status s = RowsetFactory::create_rowset_writer(writer_context, false, &output_rs_writer);
EXPECT_TRUE(s.ok());
// merge input rowset
diff --git a/be/test/olap/rowset/beta_rowset_test.cpp b/be/test/olap/rowset/beta_rowset_test.cpp
index 2df689fc04..ea8a3485fa 100644
--- a/be/test/olap/rowset/beta_rowset_test.cpp
+++ b/be/test/olap/rowset/beta_rowset_test.cpp
@@ -182,7 +182,7 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) {
create_rowset_writer_context(tablet_schema, &writer_context);
std::unique_ptr<RowsetWriter> rowset_writer;
- s = RowsetFactory::create_rowset_writer(writer_context, &rowset_writer);
+ s = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer);
EXPECT_EQ(Status::OK(), s);
RowCursor input_row;
diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp b/be/test/olap/rowset/segment_v2/segment_test.cpp
index 068d5b3ee3..5f9ff06d2b 100644
--- a/be/test/olap/rowset/segment_v2/segment_test.cpp
+++ b/be/test/olap/rowset/segment_v2/segment_test.cpp
@@ -146,29 +146,22 @@ protected:
EXPECT_TRUE(st.ok());
EXPECT_TRUE(file_writer->close().ok());
// Check min/max key generation
- if (build_schema->keys_type() == UNIQUE_KEYS && opts.enable_unique_key_merge_on_write) {
- // Create min row
- for (int cid = 0; cid < build_schema->num_key_columns(); ++cid) {
- RowCursorCell cell = row.cell(cid);
- generator(0, cid, 0 / opts.num_rows_per_block, cell);
- }
- std::string min_encoded_key;
- encode_key<RowCursor, true, true>(&min_encoded_key, row,
- build_schema->num_key_columns());
- EXPECT_EQ(min_encoded_key, writer.min_encoded_key().to_string());
- // Create max row
- for (int cid = 0; cid < build_schema->num_key_columns(); ++cid) {
- RowCursorCell cell = row.cell(cid);
- generator(nrows - 1, cid, (nrows - 1) / opts.num_rows_per_block, cell);
- }
- std::string max_encoded_key;
- encode_key<RowCursor, true, true>(&max_encoded_key, row,
- build_schema->num_key_columns());
- EXPECT_EQ(max_encoded_key, writer.max_encoded_key().to_string());
- } else {
- EXPECT_EQ("", writer.min_encoded_key().to_string());
- EXPECT_EQ("", writer.max_encoded_key().to_string());
+ // Create min row
+ for (int cid = 0; cid < build_schema->num_key_columns(); ++cid) {
+ RowCursorCell cell = row.cell(cid);
+ generator(0, cid, 0 / opts.num_rows_per_block, cell);
+ }
+ std::string min_encoded_key;
+ encode_key<RowCursor, true, true>(&min_encoded_key, row, build_schema->num_key_columns());
+ EXPECT_EQ(min_encoded_key, writer.min_encoded_key().to_string());
+ // Create max row
+ for (int cid = 0; cid < build_schema->num_key_columns(); ++cid) {
+ RowCursorCell cell = row.cell(cid);
+ generator(nrows - 1, cid, (nrows - 1) / opts.num_rows_per_block, cell);
}
+ std::string max_encoded_key;
+ encode_key<RowCursor, true, true>(&max_encoded_key, row, build_schema->num_key_columns());
+ EXPECT_EQ(max_encoded_key, writer.max_encoded_key().to_string());
st = Segment::open(fs, path, "", 0, {}, query_schema, res);
EXPECT_TRUE(st.ok());
diff --git a/be/test/olap/segcompaction_test.cpp b/be/test/olap/segcompaction_test.cpp
index 205fa0cbbd..a93ad79c4d 100644
--- a/be/test/olap/segcompaction_test.cpp
+++ b/be/test/olap/segcompaction_test.cpp
@@ -218,7 +218,7 @@ TEST_F(SegCompactionTest, SegCompactionThenRead) {
create_rowset_writer_context(10047, tablet_schema, &writer_context);
std::unique_ptr<RowsetWriter> rowset_writer;
- s = RowsetFactory::create_rowset_writer(writer_context, &rowset_writer);
+ s = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer);
EXPECT_EQ(Status::OK(), s);
RowCursor input_row;
@@ -324,7 +324,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) {
create_rowset_writer_context(10048, tablet_schema, &writer_context);
std::unique_ptr<RowsetWriter> rowset_writer;
- s = RowsetFactory::create_rowset_writer(writer_context, &rowset_writer);
+ s = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer);
EXPECT_EQ(Status::OK(), s);
RowCursor input_row;
@@ -467,7 +467,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) {
create_rowset_writer_context(10049, tablet_schema, &writer_context);
std::unique_ptr<RowsetWriter> rowset_writer;
- s = RowsetFactory::create_rowset_writer(writer_context, &rowset_writer);
+ s = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer);
EXPECT_EQ(Status::OK(), s);
RowCursor input_row;
diff --git a/be/test/testutil/mock_rowset.h b/be/test/testutil/mock_rowset.h
index 17865ce3d8..234a64eeab 100644
--- a/be/test/testutil/mock_rowset.h
+++ b/be/test/testutil/mock_rowset.h
@@ -37,7 +37,8 @@ class MockRowset : public Rowset {
return Status::NotSupported("MockRowset not support this method.");
}
- virtual Status link_files_to(const std::string& dir, RowsetId new_rowset_id) override {
+ virtual Status link_files_to(const std::string& dir, RowsetId new_rowset_id,
+ size_t start_seg_id) override {
return Status::NotSupported("MockRowset not support this method.");
}
diff --git a/be/test/vec/olap/vertical_compaction_test.cpp b/be/test/vec/olap/vertical_compaction_test.cpp
new file mode 100644
index 0000000000..28e605ef8f
--- /dev/null
+++ b/be/test/vec/olap/vertical_compaction_test.cpp
@@ -0,0 +1,862 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+#include "olap/merger.h"
+#include "olap/rowset/beta_rowset.h"
+#include "olap/rowset/rowset.h"
+#include "olap/rowset/rowset_factory.h"
+#include "olap/rowset/rowset_reader.h"
+#include "olap/rowset/rowset_reader_context.h"
+#include "olap/rowset/rowset_writer.h"
+#include "olap/rowset/rowset_writer_context.h"
+#include "olap/schema.h"
+#include "olap/tablet_schema.h"
+#include "olap/tablet_schema_helper.h"
+#include "util/file_utils.h"
+#include "vec/olap/vertical_block_reader.h"
+#include "vec/olap/vertical_merge_iterator.h"
+
+namespace doris {
+namespace vectorized {
+
+static const uint32_t MAX_PATH_LEN = 1024;
+static StorageEngine* k_engine = nullptr;
+
+class VerticalCompactionTest : public ::testing::Test {
+protected:
+ void SetUp() override {
+ char buffer[MAX_PATH_LEN];
+ EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
+ absolute_dir = std::string(buffer) + kTestDir;
+
+ if (FileUtils::check_exist(absolute_dir)) {
+ EXPECT_TRUE(FileUtils::remove_all(absolute_dir).ok());
+ }
+ EXPECT_TRUE(FileUtils::create_dir(absolute_dir).ok());
+ EXPECT_TRUE(FileUtils::create_dir(absolute_dir + "/tablet_path").ok());
+ _data_dir = std::make_unique<DataDir>(absolute_dir);
+ _data_dir->update_capacity();
+ doris::EngineOptions options;
+ k_engine = new StorageEngine(options);
+ StorageEngine::_s_instance = k_engine;
+ }
+ void TearDown() override {
+ if (FileUtils::check_exist(absolute_dir)) {
+ EXPECT_TRUE(FileUtils::remove_all(absolute_dir).ok());
+ }
+ if (k_engine != nullptr) {
+ k_engine->stop();
+ delete k_engine;
+ k_engine = nullptr;
+ }
+ }
+
+ TabletSchemaSPtr create_schema(KeysType keys_type = DUP_KEYS) {
+ TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+ TabletSchemaPB tablet_schema_pb;
+ tablet_schema_pb.set_keys_type(keys_type);
+ tablet_schema_pb.set_num_short_key_columns(1);
+ tablet_schema_pb.set_num_rows_per_row_block(1024);
+ tablet_schema_pb.set_compress_kind(COMPRESS_NONE);
+ tablet_schema_pb.set_next_column_unique_id(4);
+
+ ColumnPB* column_1 = tablet_schema_pb.add_column();
+ column_1->set_unique_id(1);
+ column_1->set_name("c1");
+ column_1->set_type("INT");
+ column_1->set_is_key(true);
+ column_1->set_length(4);
+ column_1->set_index_length(4);
+ column_1->set_is_nullable(false);
+ column_1->set_is_bf_column(false);
+
+ ColumnPB* column_2 = tablet_schema_pb.add_column();
+ column_2->set_unique_id(2);
+ column_2->set_name("c2");
+ column_2->set_type("INT");
+ column_2->set_length(4);
+ column_2->set_index_length(4);
+ column_2->set_is_nullable(true);
+ column_2->set_is_key(false);
+ column_2->set_is_nullable(false);
+ column_2->set_is_bf_column(false);
+
+ // unique table must contains the DELETE_SIGN column
+ if (keys_type == UNIQUE_KEYS) {
+ ColumnPB* column_3 = tablet_schema_pb.add_column();
+ column_3->set_unique_id(3);
+ column_3->set_name(DELETE_SIGN);
+ column_3->set_type("TINYINT");
+ column_3->set_length(1);
+ column_3->set_index_length(1);
+ column_3->set_is_nullable(false);
+ column_3->set_is_key(false);
+ column_3->set_is_nullable(false);
+ column_3->set_is_bf_column(false);
+ }
+
+ tablet_schema->init_from_pb(tablet_schema_pb);
+ return tablet_schema;
+ }
+
+ TabletSchemaSPtr create_agg_schema() {
+ TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+ TabletSchemaPB tablet_schema_pb;
+ tablet_schema_pb.set_keys_type(KeysType::AGG_KEYS);
+ tablet_schema_pb.set_num_short_key_columns(1);
+ tablet_schema_pb.set_num_rows_per_row_block(1024);
+ tablet_schema_pb.set_compress_kind(COMPRESS_NONE);
+ tablet_schema_pb.set_next_column_unique_id(4);
+
+ ColumnPB* column_1 = tablet_schema_pb.add_column();
+ column_1->set_unique_id(1);
+ column_1->set_name("c1");
+ column_1->set_type("INT");
+ column_1->set_is_key(true);
+ column_1->set_length(4);
+ column_1->set_index_length(4);
+ column_1->set_is_nullable(false);
+ column_1->set_is_bf_column(false);
+
+ ColumnPB* column_2 = tablet_schema_pb.add_column();
+ column_2->set_unique_id(2);
+ column_2->set_name("c2");
+ column_2->set_type("INT");
+ column_2->set_length(4);
+ column_2->set_index_length(4);
+ column_2->set_is_nullable(true);
+ column_2->set_is_key(false);
+ column_2->set_is_nullable(false);
+ column_2->set_is_bf_column(false);
+ column_2->set_aggregation("SUM");
+
+ tablet_schema->init_from_pb(tablet_schema_pb);
+ return tablet_schema;
+ }
+
+ void create_rowset_writer_context(TabletSchemaSPtr tablet_schema,
+ const SegmentsOverlapPB& overlap,
+ uint32_t max_rows_per_segment,
+ RowsetWriterContext* rowset_writer_context) {
+ static int64_t inc_id = 1000;
+ RowsetId rowset_id;
+ rowset_id.init(inc_id);
+ rowset_writer_context->rowset_id = rowset_id;
+ rowset_writer_context->rowset_type = BETA_ROWSET;
+ rowset_writer_context->data_dir = _data_dir.get();
+ rowset_writer_context->rowset_state = VISIBLE;
+ rowset_writer_context->tablet_schema = tablet_schema;
+ rowset_writer_context->rowset_dir = "tablet_path";
+ rowset_writer_context->version = Version(inc_id, inc_id);
+ rowset_writer_context->segments_overlap = overlap;
+ rowset_writer_context->max_rows_per_segment = max_rows_per_segment;
+ inc_id++;
+ }
+
+ void create_and_init_rowset_reader(Rowset* rowset, RowsetReaderContext& context,
+ RowsetReaderSharedPtr* result) {
+ auto s = rowset->create_reader(result);
+ EXPECT_TRUE(s.ok());
+ EXPECT_TRUE(*result != nullptr);
+
+ s = (*result)->init(&context);
+ EXPECT_TRUE(s.ok());
+ }
+
+ RowsetSharedPtr create_rowset(
+ TabletSchemaSPtr tablet_schema, const SegmentsOverlapPB& overlap,
+ std::vector<std::vector<std::tuple<int64_t, int64_t>>> rowset_data) {
+ RowsetWriterContext writer_context;
+ if (overlap == NONOVERLAPPING) {
+ for (auto i = 1; i < rowset_data.size(); i++) {
+ auto& last_seg_data = rowset_data[i - 1];
+ auto& cur_seg_data = rowset_data[i];
+ int64_t last_seg_max = std::get<0>(last_seg_data[last_seg_data.size() - 1]);
+ int64_t cur_seg_min = std::get<0>(cur_seg_data[0]);
+ EXPECT_LT(last_seg_max, cur_seg_min);
+ }
+ }
+ create_rowset_writer_context(tablet_schema, overlap, UINT32_MAX, &writer_context);
+
+ std::unique_ptr<RowsetWriter> rowset_writer;
+ Status s = RowsetFactory::create_rowset_writer(writer_context, true, &rowset_writer);
+ EXPECT_TRUE(s.ok());
+
+ RowCursor input_row;
+ input_row.init(tablet_schema);
+
+ uint32_t num_rows = 0;
+ for (int i = 0; i < rowset_data.size(); ++i) {
+ MemPool mem_pool;
+ for (int rid = 0; rid < rowset_data[i].size(); ++rid) {
+ uint32_t c1 = std::get<0>(rowset_data[i][rid]);
+ uint32_t c2 = std::get<1>(rowset_data[i][rid]);
+ input_row.set_field_content(0, reinterpret_cast<char*>(&c1), &mem_pool);
+ input_row.set_field_content(1, reinterpret_cast<char*>(&c2), &mem_pool);
+ if (tablet_schema->keys_type() == UNIQUE_KEYS) {
+ uint8_t num = 0;
+ input_row.set_field_content(2, reinterpret_cast<char*>(&num), &mem_pool);
+ }
+ s = rowset_writer->add_row(input_row);
+ EXPECT_TRUE(s.ok());
+ num_rows++;
+ }
+ s = rowset_writer->flush();
+ EXPECT_TRUE(s.ok());
+ }
+
+ RowsetSharedPtr rowset;
+ rowset = rowset_writer->build();
+ EXPECT_TRUE(rowset != nullptr);
+ EXPECT_EQ(rowset_data.size(), rowset->rowset_meta()->num_segments());
+ EXPECT_EQ(num_rows, rowset->rowset_meta()->num_rows());
+ return rowset;
+ }
+
+ void init_rs_meta(RowsetMetaSharedPtr& pb1, int64_t start, int64_t end) {
+ std::string json_rowset_meta = R"({
+ "rowset_id": 540085,
+ "tablet_id": 15674,
+ "txn_id": 4045,
+ "tablet_schema_hash": 567997588,
+ "rowset_type": "BETA_ROWSET",
+ "rowset_state": "VISIBLE",
+ "start_version": 2,
+ "end_version": 2,
+ "num_rows": 3929,
+ "total_disk_size": 84699,
+ "data_disk_size": 84464,
+ "index_disk_size": 235,
+ "empty": false,
+ "load_id": {
+ "hi": -5350970832824939812,
+ "lo": -6717994719194512122
+ },
+ "creation_time": 1553765670,
+ "alpha_rowset_extra_meta_pb": {
+ "segment_groups": [
+ {
+ "segment_group_id": 0,
+ "num_segments": 2,
+ "index_size": 132,
+ "data_size": 576,
+ "num_rows": 5,
+ "zone_maps": [
+ {
+ "min": "MQ==",
+ "max": "NQ==",
+ "null_flag": false
+ },
+ {
+ "min": "MQ==",
+ "max": "Mw==",
+ "null_flag": false
+ },
+ {
+ "min": "J2J1c2gn",
+ "max": "J3RvbSc=",
+ "null_flag": false
+ }
+ ],
+ "empty": false
+ }]
+ }
+ })";
+ pb1->init_from_json(json_rowset_meta);
+ pb1->set_start_version(start);
+ pb1->set_end_version(end);
+ pb1->set_creation_time(10000);
+ }
+
+ void add_delete_predicate(TabletSharedPtr tablet, DeletePredicatePB& del_pred,
+ int64_t version) {
+ RowsetMetaSharedPtr rsm(new RowsetMeta());
+ init_rs_meta(rsm, version, version);
+ RowsetId id;
+ id.init(version * 1000);
+ rsm->set_rowset_id(id);
+ rsm->set_delete_predicate(del_pred);
+ rsm->set_tablet_schema(tablet->tablet_schema());
+ RowsetSharedPtr rowset = std::make_shared<BetaRowset>(tablet->tablet_schema(), "", rsm);
+ tablet->add_rowset(rowset);
+ }
+
+ TabletSharedPtr create_tablet(const TabletSchema& tablet_schema,
+ bool enable_unique_key_merge_on_write, int64_t version,
+ bool has_delete_handler) {
+ std::vector<TColumn> cols;
+ std::unordered_map<uint32_t, uint32_t> col_ordinal_to_unique_id;
+ for (auto i = 0; i < tablet_schema.num_columns(); i++) {
+ const TabletColumn& column = tablet_schema.column(i);
+ TColumn col;
+ col.column_type.type = TPrimitiveType::INT;
+ col.__set_column_name(column.name());
+ col.__set_is_key(column.is_key());
+ cols.push_back(col);
+ col_ordinal_to_unique_id[i] = column.unique_id();
+ }
+
+ TTabletSchema t_tablet_schema;
+ t_tablet_schema.__set_short_key_column_count(tablet_schema.num_short_key_columns());
+ t_tablet_schema.__set_schema_hash(3333);
+ if (tablet_schema.keys_type() == UNIQUE_KEYS) {
+ t_tablet_schema.__set_keys_type(TKeysType::UNIQUE_KEYS);
+ } else if (tablet_schema.keys_type() == DUP_KEYS) {
+ t_tablet_schema.__set_keys_type(TKeysType::DUP_KEYS);
+ } else if (tablet_schema.keys_type() == AGG_KEYS) {
+ t_tablet_schema.__set_keys_type(TKeysType::AGG_KEYS);
+ }
+ t_tablet_schema.__set_storage_type(TStorageType::COLUMN);
+ t_tablet_schema.__set_columns(cols);
+ TabletMetaSharedPtr tablet_meta(
+ new TabletMeta(2, 2, 2, 2, 2, 2, t_tablet_schema, 2, col_ordinal_to_unique_id,
+ UniqueId(1, 2), TTabletType::TABLET_TYPE_DISK,
+ TCompressionType::LZ4F, "", enable_unique_key_merge_on_write));
+
+ TabletSharedPtr tablet(new Tablet(tablet_meta, nullptr));
+ tablet->init();
+ if (has_delete_handler) {
+ // delete data with key < 1000
+ std::vector<TCondition> conditions;
+ TCondition condition;
+ condition.column_name = tablet_schema.column(0).name();
+ condition.condition_op = "<";
+ condition.condition_values.clear();
+ condition.condition_values.push_back("100");
+ conditions.push_back(condition);
+
+ DeletePredicatePB del_pred;
+ Status st =
+ DeleteHandler::generate_delete_predicate(tablet_schema, conditions, &del_pred);
+ EXPECT_EQ(Status::OK(), st);
+ add_delete_predicate(tablet, del_pred, version);
+ }
+ return tablet;
+ }
+
+ // all rowset's data are same
+ void generate_input_data(
+ uint32_t num_input_rowset, uint32_t num_segments, uint32_t rows_per_segment,
+ const SegmentsOverlapPB& overlap,
+ std::vector<std::vector<std::vector<std::tuple<int64_t, int64_t>>>>& input_data) {
+ for (auto i = 0; i < num_input_rowset; i++) {
+ std::vector<std::vector<std::tuple<int64_t, int64_t>>> rowset_data;
+ for (auto j = 0; j < num_segments; j++) {
+ std::vector<std::tuple<int64_t, int64_t>> segment_data;
+ for (auto n = 0; n < rows_per_segment; n++) {
+ int64_t c1 = j * rows_per_segment + n;
+ int64_t c2 = c1 + 1;
+ segment_data.emplace_back(c1, c2);
+ }
+ rowset_data.emplace_back(segment_data);
+ }
+ input_data.emplace_back(rowset_data);
+ }
+ }
+
+ void block_create(TabletSchemaSPtr tablet_schema, vectorized::Block* block) {
+ block->clear();
+ Schema schema(tablet_schema);
+ const auto& column_ids = schema.column_ids();
+ for (size_t i = 0; i < schema.num_column_ids(); ++i) {
+ auto column_desc = schema.column(column_ids[i]);
+ auto data_type = Schema::get_data_type_ptr(*column_desc);
+ EXPECT_TRUE(data_type != nullptr);
+ auto column = data_type->create_column();
+ block->insert(vectorized::ColumnWithTypeAndName(std::move(column), data_type,
+ column_desc->name()));
+ }
+ }
+
+private:
+ const std::string kTestDir = "/ut_dir/vertical_compaction_test";
+ string absolute_dir;
+ std::unique_ptr<DataDir> _data_dir;
+};
+
+TEST_F(VerticalCompactionTest, TestRowSourcesBuffer) {
+ RowSourcesBuffer buffer(100, absolute_dir, READER_CUMULATIVE_COMPACTION);
+ RowSource s1(0, 0);
+ RowSource s2(0, 0);
+ RowSource s3(1, 1);
+ RowSource s4(1, 0);
+ RowSource s5(2, 0);
+ RowSource s6(2, 0);
+ std::vector<RowSource> tmp_row_source;
+ tmp_row_source.emplace_back(s1);
+ tmp_row_source.emplace_back(s2);
+ tmp_row_source.emplace_back(s3);
+ tmp_row_source.emplace_back(s4);
+ tmp_row_source.emplace_back(s5);
+ tmp_row_source.emplace_back(s6);
+
+ EXPECT_TRUE(buffer.append(tmp_row_source).ok());
+ EXPECT_EQ(buffer.total_size(), 6);
+ size_t limit = 10;
+ buffer.flush();
+ buffer.seek_to_begin();
+
+ int idx = -1;
+ while (buffer.has_remaining().ok()) {
+ if (++idx == 1) {
+ EXPECT_TRUE(buffer.current().agg_flag());
+ }
+ auto cur = buffer.current().get_source_num();
+ auto same = buffer.same_source_count(cur, limit);
+ EXPECT_EQ(same, 2);
+ buffer.advance(same);
+ }
+
+ RowSourcesBuffer buffer1(101, absolute_dir, READER_CUMULATIVE_COMPACTION);
+ EXPECT_TRUE(buffer1.append(tmp_row_source).ok());
+ EXPECT_TRUE(buffer1.append(tmp_row_source).ok());
+ buffer1.set_agg_flag(2, false);
+ buffer1.set_agg_flag(4, true);
+ buffer1.flush();
+ buffer1.seek_to_begin();
+ EXPECT_EQ(buffer1.total_size(), 12);
+ idx = -1;
+ while (buffer1.has_remaining().ok()) {
+ if (++idx == 1) {
+ EXPECT_FALSE(buffer1.current().agg_flag());
+ }
+ if (++idx == 0) {
+ EXPECT_TRUE(buffer1.current().agg_flag());
+ }
+ std::cout << buffer1.buf_idx() << std::endl;
+ auto cur = buffer1.current().get_source_num();
+ auto same = buffer1.same_source_count(cur, limit);
+ EXPECT_EQ(same, 2);
+ buffer1.advance(same);
+ }
+}
+
+TEST_F(VerticalCompactionTest, TestDupKeyVerticalMerge) {
+ auto num_input_rowset = 2;
+ auto num_segments = 2;
+ auto rows_per_segment = 100;
+ SegmentsOverlapPB overlap = NONOVERLAPPING;
+ std::vector<std::vector<std::vector<std::tuple<int64_t, int64_t>>>> input_data;
+ generate_input_data(num_input_rowset, num_segments, rows_per_segment, overlap, input_data);
+ for (auto rs_id = 0; rs_id < input_data.size(); rs_id++) {
+ for (auto s_id = 0; s_id < input_data[rs_id].size(); s_id++) {
+ for (auto row_id = 0; row_id < input_data[rs_id][s_id].size(); row_id++) {
+ LOG(INFO) << "input data: " << std::get<0>(input_data[rs_id][s_id][row_id]) << " "
+ << std::get<1>(input_data[rs_id][s_id][row_id]);
+ }
+ }
+ }
+
+ TabletSchemaSPtr tablet_schema = create_schema();
+ // create input rowset
+ vector<RowsetSharedPtr> input_rowsets;
+ SegmentsOverlapPB new_overlap = overlap;
+ for (auto i = 0; i < num_input_rowset; i++) {
+ if (overlap == OVERLAP_UNKNOWN) {
+ if (i == 0) {
+ new_overlap = NONOVERLAPPING;
+ } else {
+ new_overlap = OVERLAPPING;
+ }
+ }
+ RowsetSharedPtr rowset = create_rowset(tablet_schema, new_overlap, input_data[i]);
+ input_rowsets.push_back(rowset);
+ }
+ // create input rowset reader
+ vector<RowsetReaderSharedPtr> input_rs_readers;
+ for (auto& rowset : input_rowsets) {
+ RowsetReaderSharedPtr rs_reader;
+ EXPECT_TRUE(rowset->create_reader(&rs_reader).ok());
+ input_rs_readers.push_back(std::move(rs_reader));
+ }
+
+ // create output rowset writer
+ RowsetWriterContext writer_context;
+ create_rowset_writer_context(tablet_schema, NONOVERLAPPING, 3456, &writer_context);
+ std::unique_ptr<RowsetWriter> output_rs_writer;
+ Status s = RowsetFactory::create_rowset_writer(writer_context, true, &output_rs_writer);
+ EXPECT_TRUE(s.ok());
+
+ // merge input rowset
+ bool has_delete_handler = false;
+ TabletSharedPtr tablet = create_tablet(
+ *tablet_schema, false, output_rs_writer->version().first - 1, has_delete_handler);
+ Merger::Statistics stats;
+ RowIdConversion rowid_conversion;
+ stats.rowid_conversion = &rowid_conversion;
+ s = Merger::vertical_merge_rowsets(tablet, READER_BASE_COMPACTION, tablet_schema,
+ input_rs_readers, output_rs_writer.get(), 100, &stats);
+ EXPECT_TRUE(s.ok());
+ RowsetSharedPtr out_rowset = output_rs_writer->build();
+
+ // create output rowset reader
+ RowsetReaderContext reader_context;
+ reader_context.tablet_schema = tablet_schema;
+ reader_context.need_ordered_result = false;
+ std::vector<uint32_t> return_columns = {0, 1};
+ reader_context.return_columns = &return_columns;
+ reader_context.is_vec = true;
+ RowsetReaderSharedPtr output_rs_reader;
+ LOG(INFO) << "create rowset reader in test";
+ create_and_init_rowset_reader(out_rowset.get(), reader_context, &output_rs_reader);
+
+ // read output rowset data
+ vectorized::Block output_block;
+ std::vector<std::tuple<int64_t, int64_t>> output_data;
+ do {
+ block_create(tablet_schema, &output_block);
+ s = output_rs_reader->next_block(&output_block);
+ auto columns = output_block.get_columns_with_type_and_name();
+ EXPECT_EQ(columns.size(), 2);
+ for (auto i = 0; i < output_block.rows(); i++) {
+ output_data.emplace_back(columns[0].column->get_int(i), columns[1].column->get_int(i));
+ }
+ } while (s == Status::OK());
+ EXPECT_EQ(Status::OLAPInternalError(OLAP_ERR_DATA_EOF), s);
+ EXPECT_EQ(out_rowset->rowset_meta()->num_rows(), output_data.size());
+ EXPECT_EQ(output_data.size(), num_input_rowset * num_segments * rows_per_segment);
+ std::vector<uint32_t> segment_num_rows;
+ EXPECT_TRUE(output_rs_reader->get_segment_num_rows(&segment_num_rows).ok());
+ // check vertical compaction result
+ for (auto id = 0; id < output_data.size(); id++) {
+ LOG(INFO) << "output data: " << std::get<0>(output_data[id]) << " "
+ << std::get<1>(output_data[id]);
+ }
+ int dst_id = 0;
+ for (auto rs_id = 0; rs_id < input_data.size(); rs_id++) {
+ dst_id = 0;
+ for (auto s_id = 0; s_id < input_data[rs_id].size(); s_id++) {
+ for (auto row_id = 0; row_id < input_data[rs_id][s_id].size(); row_id++) {
+ LOG(INFO) << "input data: " << std::get<0>(input_data[rs_id][s_id][row_id]) << " "
+ << std::get<1>(input_data[rs_id][s_id][row_id]);
+ EXPECT_EQ(std::get<0>(input_data[rs_id][s_id][row_id]),
+ std::get<0>(output_data[dst_id]));
+ EXPECT_EQ(std::get<1>(input_data[rs_id][s_id][row_id]),
+ std::get<1>(output_data[dst_id]));
+ dst_id += 2;
+ }
+ }
+ }
+}
+
+TEST_F(VerticalCompactionTest, TestUniqueKeyVerticalMerge) {
+ auto num_input_rowset = 2;
+ auto num_segments = 2;
+ auto rows_per_segment = 100;
+ SegmentsOverlapPB overlap = NONOVERLAPPING;
+ std::vector<std::vector<std::vector<std::tuple<int64_t, int64_t>>>> input_data;
+ generate_input_data(num_input_rowset, num_segments, rows_per_segment, overlap, input_data);
+ for (auto rs_id = 0; rs_id < input_data.size(); rs_id++) {
+ for (auto s_id = 0; s_id < input_data[rs_id].size(); s_id++) {
+ for (auto row_id = 0; row_id < input_data[rs_id][s_id].size(); row_id++) {
+ LOG(INFO) << "input data: " << std::get<0>(input_data[rs_id][s_id][row_id]) << " "
+ << std::get<1>(input_data[rs_id][s_id][row_id]);
+ }
+ }
+ }
+
+ TabletSchemaSPtr tablet_schema = create_schema(UNIQUE_KEYS);
+ // create input rowset
+ vector<RowsetSharedPtr> input_rowsets;
+ SegmentsOverlapPB new_overlap = overlap;
+ for (auto i = 0; i < num_input_rowset; i++) {
+ if (overlap == OVERLAP_UNKNOWN) {
+ if (i == 0) {
+ new_overlap = NONOVERLAPPING;
+ } else {
+ new_overlap = OVERLAPPING;
+ }
+ }
+ RowsetSharedPtr rowset = create_rowset(tablet_schema, new_overlap, input_data[i]);
+ input_rowsets.push_back(rowset);
+ }
+ // create input rowset reader
+ vector<RowsetReaderSharedPtr> input_rs_readers;
+ for (auto& rowset : input_rowsets) {
+ RowsetReaderSharedPtr rs_reader;
+ EXPECT_TRUE(rowset->create_reader(&rs_reader).ok());
+ input_rs_readers.push_back(std::move(rs_reader));
+ }
+
+ // create output rowset writer
+ RowsetWriterContext writer_context;
+ create_rowset_writer_context(tablet_schema, NONOVERLAPPING, 3456, &writer_context);
+ std::unique_ptr<RowsetWriter> output_rs_writer;
+ Status s = RowsetFactory::create_rowset_writer(writer_context, true, &output_rs_writer);
+ EXPECT_TRUE(s.ok());
+
+ // merge input rowset
+ bool has_delete_handler = false;
+ TabletSharedPtr tablet = create_tablet(
+ *tablet_schema, false, output_rs_writer->version().first - 1, has_delete_handler);
+ Merger::Statistics stats;
+ RowIdConversion rowid_conversion;
+ stats.rowid_conversion = &rowid_conversion;
+ s = Merger::vertical_merge_rowsets(tablet, READER_BASE_COMPACTION, tablet_schema,
+ input_rs_readers, output_rs_writer.get(), 100, &stats);
+ EXPECT_TRUE(s.ok());
+ RowsetSharedPtr out_rowset = output_rs_writer->build();
+
+ // create output rowset reader
+ RowsetReaderContext reader_context;
+ reader_context.tablet_schema = tablet_schema;
+ reader_context.need_ordered_result = false;
+ std::vector<uint32_t> return_columns = {0, 1};
+ reader_context.return_columns = &return_columns;
+ reader_context.is_vec = true;
+ RowsetReaderSharedPtr output_rs_reader;
+ LOG(INFO) << "create rowset reader in test";
+ create_and_init_rowset_reader(out_rowset.get(), reader_context, &output_rs_reader);
+
+ // read output rowset data
+ vectorized::Block output_block;
+ std::vector<std::tuple<int64_t, int64_t>> output_data;
+ do {
+ block_create(tablet_schema, &output_block);
+ s = output_rs_reader->next_block(&output_block);
+ auto columns = output_block.get_columns_with_type_and_name();
+ EXPECT_EQ(columns.size(), 2);
+ for (auto i = 0; i < output_block.rows(); i++) {
+ output_data.emplace_back(columns[0].column->get_int(i), columns[1].column->get_int(i));
+ }
+ } while (s == Status::OK());
+ EXPECT_EQ(Status::OLAPInternalError(OLAP_ERR_DATA_EOF), s);
+ EXPECT_EQ(out_rowset->rowset_meta()->num_rows(), output_data.size());
+ EXPECT_EQ(output_data.size(), num_segments * rows_per_segment);
+ std::vector<uint32_t> segment_num_rows;
+ EXPECT_TRUE(output_rs_reader->get_segment_num_rows(&segment_num_rows).ok());
+ // check vertical compaction result
+ for (auto id = 0; id < output_data.size(); id++) {
+ LOG(INFO) << "output data: " << std::get<0>(output_data[id]) << " "
+ << std::get<1>(output_data[id]);
+ }
+ int dst_id = 0;
+ for (auto s_id = 0; s_id < input_data[0].size(); s_id++) {
+ for (auto row_id = 0; row_id < input_data[0][s_id].size(); row_id++) {
+ EXPECT_EQ(std::get<0>(input_data[0][s_id][row_id]), std::get<0>(output_data[dst_id]));
+ EXPECT_EQ(std::get<1>(input_data[0][s_id][row_id]), std::get<1>(output_data[dst_id]));
+ dst_id++;
+ }
+ }
+}
+
+TEST_F(VerticalCompactionTest, TestDupKeyVerticalMergeWithDelete) {
+ auto num_input_rowset = 2;
+ auto num_segments = 2;
+ auto rows_per_segment = 100;
+ SegmentsOverlapPB overlap = NONOVERLAPPING;
+ std::vector<std::vector<std::vector<std::tuple<int64_t, int64_t>>>> input_data;
+ generate_input_data(num_input_rowset, num_segments, rows_per_segment, overlap, input_data);
+ for (auto rs_id = 0; rs_id < input_data.size(); rs_id++) {
+ for (auto s_id = 0; s_id < input_data[rs_id].size(); s_id++) {
+ for (auto row_id = 0; row_id < input_data[rs_id][s_id].size(); row_id++) {
+ LOG(INFO) << "input data: " << std::get<0>(input_data[rs_id][s_id][row_id]) << " "
+ << std::get<1>(input_data[rs_id][s_id][row_id]);
+ }
+ }
+ }
+
+ TabletSchemaSPtr tablet_schema = create_schema(DUP_KEYS);
+ // create input rowset
+ vector<RowsetSharedPtr> input_rowsets;
+ SegmentsOverlapPB new_overlap = overlap;
+ for (auto i = 0; i < num_input_rowset; i++) {
+ if (overlap == OVERLAP_UNKNOWN) {
+ if (i == 0) {
+ new_overlap = NONOVERLAPPING;
+ } else {
+ new_overlap = OVERLAPPING;
+ }
+ }
+ RowsetSharedPtr rowset = create_rowset(tablet_schema, new_overlap, input_data[i]);
+ input_rowsets.push_back(rowset);
+ }
+ // create input rowset reader
+ vector<RowsetReaderSharedPtr> input_rs_readers;
+ for (auto& rowset : input_rowsets) {
+ RowsetReaderSharedPtr rs_reader;
+ EXPECT_TRUE(rowset->create_reader(&rs_reader).ok());
+ input_rs_readers.push_back(std::move(rs_reader));
+ }
+
+ // create output rowset writer
+ RowsetWriterContext writer_context;
+ create_rowset_writer_context(tablet_schema, NONOVERLAPPING, 3456, &writer_context);
+ std::unique_ptr<RowsetWriter> output_rs_writer;
+ Status s = RowsetFactory::create_rowset_writer(writer_context, true, &output_rs_writer);
+ EXPECT_TRUE(s.ok());
+
+ // merge input rowset
+ bool has_delete_handler = true;
+ TabletSharedPtr tablet = create_tablet(*tablet_schema, false, output_rs_writer->version().first,
+ has_delete_handler);
+ Merger::Statistics stats;
+ RowIdConversion rowid_conversion;
+ stats.rowid_conversion = &rowid_conversion;
+ s = Merger::vertical_merge_rowsets(tablet, READER_BASE_COMPACTION, tablet_schema,
+ input_rs_readers, output_rs_writer.get(), 100, &stats);
+ EXPECT_TRUE(s.ok());
+ RowsetSharedPtr out_rowset = output_rs_writer->build();
+
+ // create output rowset reader
+ RowsetReaderContext reader_context;
+ reader_context.tablet_schema = tablet_schema;
+ reader_context.need_ordered_result = false;
+ std::vector<uint32_t> return_columns = {0, 1};
+ reader_context.return_columns = &return_columns;
+ reader_context.is_vec = true;
+ RowsetReaderSharedPtr output_rs_reader;
+ LOG(INFO) << "create rowset reader in test";
+ create_and_init_rowset_reader(out_rowset.get(), reader_context, &output_rs_reader);
+
+ // read output rowset data
+ vectorized::Block output_block;
+ std::vector<std::tuple<int64_t, int64_t>> output_data;
+ do {
+ block_create(tablet_schema, &output_block);
+ s = output_rs_reader->next_block(&output_block);
+ auto columns = output_block.get_columns_with_type_and_name();
+ EXPECT_EQ(columns.size(), 2);
+ for (auto i = 0; i < output_block.rows(); i++) {
+ output_data.emplace_back(columns[0].column->get_int(i), columns[1].column->get_int(i));
+ }
+ } while (s == Status::OK());
+ EXPECT_EQ(Status::OLAPInternalError(OLAP_ERR_DATA_EOF), s);
+ EXPECT_EQ(out_rowset->rowset_meta()->num_rows(), output_data.size());
+ EXPECT_EQ(output_data.size(),
+ num_input_rowset * num_segments * rows_per_segment - num_input_rowset * 100);
+ std::vector<uint32_t> segment_num_rows;
+ EXPECT_TRUE(output_rs_reader->get_segment_num_rows(&segment_num_rows).ok());
+ if (has_delete_handler) {
+ // All keys less than 1000 are deleted by delete handler
+ for (auto& item : output_data) {
+ EXPECT_GE(std::get<0>(item), 100);
+ }
+ }
+}
+
+TEST_F(VerticalCompactionTest, TestAggKeyVerticalMerge) {
+ auto num_input_rowset = 2;
+ auto num_segments = 2;
+ auto rows_per_segment = 100;
+ SegmentsOverlapPB overlap = NONOVERLAPPING;
+ std::vector<std::vector<std::vector<std::tuple<int64_t, int64_t>>>> input_data;
+ generate_input_data(num_input_rowset, num_segments, rows_per_segment, overlap, input_data);
+ for (auto rs_id = 0; rs_id < input_data.size(); rs_id++) {
+ for (auto s_id = 0; s_id < input_data[rs_id].size(); s_id++) {
+ for (auto row_id = 0; row_id < input_data[rs_id][s_id].size(); row_id++) {
+ LOG(INFO) << "input data: " << std::get<0>(input_data[rs_id][s_id][row_id]) << " "
+ << std::get<1>(input_data[rs_id][s_id][row_id]);
+ }
+ }
+ }
+
+ TabletSchemaSPtr tablet_schema = create_agg_schema();
+ // create input rowset
+ vector<RowsetSharedPtr> input_rowsets;
+ SegmentsOverlapPB new_overlap = overlap;
+ for (auto i = 0; i < num_input_rowset; i++) {
+ if (overlap == OVERLAP_UNKNOWN) {
+ if (i == 0) {
+ new_overlap = NONOVERLAPPING;
+ } else {
+ new_overlap = OVERLAPPING;
+ }
+ }
+ RowsetSharedPtr rowset = create_rowset(tablet_schema, new_overlap, input_data[i]);
+ input_rowsets.push_back(rowset);
+ }
+ // create input rowset reader
+ vector<RowsetReaderSharedPtr> input_rs_readers;
+ for (auto& rowset : input_rowsets) {
+ RowsetReaderSharedPtr rs_reader;
+ EXPECT_TRUE(rowset->create_reader(&rs_reader).ok());
+ input_rs_readers.push_back(std::move(rs_reader));
+ }
+
+ // create output rowset writer
+ RowsetWriterContext writer_context;
+ create_rowset_writer_context(tablet_schema, NONOVERLAPPING, 3456, &writer_context);
+ std::unique_ptr<RowsetWriter> output_rs_writer;
+ Status s = RowsetFactory::create_rowset_writer(writer_context, true, &output_rs_writer);
+ EXPECT_TRUE(s.ok());
+
+ // merge input rowset
+ bool has_delete_handler = false;
+ TabletSharedPtr tablet = create_tablet(
+ *tablet_schema, false, output_rs_writer->version().first - 1, has_delete_handler);
+ Merger::Statistics stats;
+ RowIdConversion rowid_conversion;
+ stats.rowid_conversion = &rowid_conversion;
+ s = Merger::vertical_merge_rowsets(tablet, READER_BASE_COMPACTION, tablet_schema,
+ input_rs_readers, output_rs_writer.get(), 100, &stats);
+ EXPECT_TRUE(s.ok());
+ RowsetSharedPtr out_rowset = output_rs_writer->build();
+
+ // create output rowset reader
+ RowsetReaderContext reader_context;
+ reader_context.tablet_schema = tablet_schema;
+ reader_context.need_ordered_result = false;
+ std::vector<uint32_t> return_columns = {0, 1};
+ reader_context.return_columns = &return_columns;
+ reader_context.is_vec = true;
+ RowsetReaderSharedPtr output_rs_reader;
+ LOG(INFO) << "create rowset reader in test";
+ create_and_init_rowset_reader(out_rowset.get(), reader_context, &output_rs_reader);
+
+ // read output rowset data
+ vectorized::Block output_block;
+ std::vector<std::tuple<int64_t, int64_t>> output_data;
+ do {
+ block_create(tablet_schema, &output_block);
+ s = output_rs_reader->next_block(&output_block);
+ auto columns = output_block.get_columns_with_type_and_name();
+ EXPECT_EQ(columns.size(), 2);
+ for (auto i = 0; i < output_block.rows(); i++) {
+ output_data.emplace_back(columns[0].column->get_int(i), columns[1].column->get_int(i));
+ }
+ } while (s == Status::OK());
+ EXPECT_EQ(Status::OLAPInternalError(OLAP_ERR_DATA_EOF), s);
+ EXPECT_EQ(out_rowset->rowset_meta()->num_rows(), output_data.size());
+ EXPECT_EQ(output_data.size(), num_segments * rows_per_segment);
+ std::vector<uint32_t> segment_num_rows;
+ EXPECT_TRUE(output_rs_reader->get_segment_num_rows(&segment_num_rows).ok());
+ // check vertical compaction result
+ for (auto id = 0; id < output_data.size(); id++) {
+ LOG(INFO) << "output data: " << std::get<0>(output_data[id]) << " "
+ << std::get<1>(output_data[id]);
+ }
+ int dst_id = 0;
+ for (auto s_id = 0; s_id < input_data[0].size(); s_id++) {
+ for (auto row_id = 0; row_id < input_data[0][s_id].size(); row_id++) {
+ LOG(INFO) << "input data: " << std::get<0>(input_data[0][s_id][row_id]) << " "
+ << std::get<1>(input_data[0][s_id][row_id]);
+ EXPECT_EQ(std::get<0>(input_data[0][s_id][row_id]), std::get<0>(output_data[dst_id]));
+ EXPECT_EQ(std::get<1>(input_data[0][s_id][row_id]) * 2,
+ std::get<1>(output_data[dst_id]));
+ dst_id++;
+ }
+ }
+}
+
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/regression-test/data/compaction/test_compaction_agg_keys_with_delete.out b/regression-test/data/compaction/test_compaction_agg_keys_with_delete.out
new file mode 100644
index 0000000000..2fb615c3d5
--- /dev/null
+++ b/regression-test/data/compaction/test_compaction_agg_keys_with_delete.out
@@ -0,0 +1,7 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20 \N \N
+
+-- !select_default2 --
+4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20 \N \N
+
diff --git a/regression-test/data/compaction/test_compaction_dup_keys_with_delete.out b/regression-test/data/compaction/test_compaction_dup_keys_with_delete.out
new file mode 100644
index 0000000000..b2c2b42f47
--- /dev/null
+++ b/regression-test/data/compaction/test_compaction_dup_keys_with_delete.out
@@ -0,0 +1,7 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+
+-- !select_default2 --
+4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+
diff --git a/regression-test/data/compaction/test_compaction_uniq_keys_with_delete.out b/regression-test/data/compaction/test_compaction_uniq_keys_with_delete.out
new file mode 100644
index 0000000000..ea06a5aa3c
--- /dev/null
+++ b/regression-test/data/compaction/test_compaction_uniq_keys_with_delete.out
@@ -0,0 +1,15 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+2 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-03T00:00 2020-01-03T00:00 2017-10-01T11:11:11.140 2017-10-01T11:11:11.120111 2020-01-03T00:00 1 32 20
+
+-- !select_default1 --
+3 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+
+-- !select_default2 --
+3 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+
+-- !select_default3 --
+3 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+
diff --git a/regression-test/data/compaction/test_vertical_compaction_agg_keys.out b/regression-test/data/compaction/test_vertical_compaction_agg_keys.out
new file mode 100644
index 0000000000..b4db7b90f7
--- /dev/null
+++ b/regression-test/data/compaction/test_vertical_compaction_agg_keys.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-02T00:00 2020-01-02T00:00 2017-10-01T11:11:11.160 2017-10-01T11:11:11.100111 2020-01-02T00:00 2 31 19 \N \N
+
+-- !select_default1 --
+2 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-03T00:00 2020-01-03T00:00 2017-10-01T11:11:11.140 2017-10-01T11:11:11.120111 2020-01-03T00:00 2 32 20 \N \N
+
+-- !select_default2 --
+2 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-03T00:00 2020-01-03T00:00 2017-10-01T11:11:11.140 2017-10-01T11:11:11.120111 2020-01-03T00:00 2 32 20 \N \N
+3 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N 2020-01-04T00:00 \N 2017-10-01T11:11:11.150111 2020-01-05T00:00 3 34 20 \N \N
+4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20 \N \N
+
+-- !select_default3 --
+2 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-03T00:00 2020-01-03T00:00 2017-10-01T11:11:11.140 2017-10-01T11:11:11.120111 2020-01-03T00:00 2 32 20 \N \N
+3 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N 2020-01-04T00:00 \N 2017-10-01T11:11:11.150111 2020-01-05T00:00 3 34 20 \N \N
+4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20 \N \N
diff --git a/regression-test/data/compaction/test_vertical_compaction_dup_keys.out b/regression-test/data/compaction/test_vertical_compaction_dup_keys.out
new file mode 100644
index 0000000000..05159c0146
--- /dev/null
+++ b/regression-test/data/compaction/test_vertical_compaction_dup_keys.out
@@ -0,0 +1,25 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-01T00:00 2020-01-01T00:00 2017-10-01T11:11:11.170 2017-10-01T11:11:11.110111 2020-01-01T00:00 1 30 20
+1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-02T00:00 2020-01-02T00:00 2017-10-01T11:11:11.160 2017-10-01T11:11:11.100111 2020-01-02T00:00 1 31 19
+
+-- !select_default1 --
+2 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-02T00:00 2020-01-02T00:00 2017-10-01T11:11:11.150 2017-10-01T11:11:11.130111 2020-01-02T00:00 1 31 21
+2 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-03T00:00 2020-01-03T00:00 2017-10-01T11:11:11.140 2017-10-01T11:11:11.120111 2020-01-03T00:00 1 32 20
+
+-- !select_default2 --
+2 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-02T00:00 2020-01-02T00:00 2017-10-01T11:11:11.150 2017-10-01T11:11:11.130111 2020-01-02T00:00 1 31 21
+2 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-03T00:00 2020-01-03T00:00 2017-10-01T11:11:11.140 2017-10-01T11:11:11.120111 2020-01-03T00:00 1 32 20
+3 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+3 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-03T00:00 2020-01-03T00:00 2017-10-01T11:11:11.100 2017-10-01T11:11:11.140111 2020-01-03T00:00 1 32 22
+3 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-04T00:00 2020-01-04T00:00 2017-10-01T11:11:11.110 2017-10-01T11:11:11.150111 2020-01-04T00:00 1 33 21
+4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+
+-- !select_default3 --
+2 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-02T00:00 2020-01-02T00:00 2017-10-01T11:11:11.150 2017-10-01T11:11:11.130111 2020-01-02T00:00 1 31 21
+2 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-03T00:00 2020-01-03T00:00 2017-10-01T11:11:11.140 2017-10-01T11:11:11.120111 2020-01-03T00:00 1 32 20
+3 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+3 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-03T00:00 2020-01-03T00:00 2017-10-01T11:11:11.100 2017-10-01T11:11:11.140111 2020-01-03T00:00 1 32 22
+3 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-04T00:00 2020-01-04T00:00 2017-10-01T11:11:11.110 2017-10-01T11:11:11.150111 2020-01-04T00:00 1 33 21
+4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+
diff --git a/regression-test/data/compaction/test_vertical_compaction_uniq_keys.out b/regression-test/data/compaction/test_vertical_compaction_uniq_keys.out
new file mode 100644
index 0000000000..863a064c64
--- /dev/null
+++ b/regression-test/data/compaction/test_vertical_compaction_uniq_keys.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-02T00:00 2020-01-02T00:00 2017-10-01T11:11:11.160 2017-10-01T11:11:11.100111 2020-01-02T00:00 1 31 19
+
+-- !select_default1 --
+2 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-03T00:00 2020-01-03T00:00 2017-10-01T11:11:11.140 2017-10-01T11:11:11.120111 2020-01-03T00:00 1 32 20
+
+-- !select_default2 --
+2 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-03T00:00 2020-01-03T00:00 2017-10-01T11:11:11.140 2017-10-01T11:11:11.120111 2020-01-03T00:00 1 32 20
+3 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+
+-- !select_default3 --
+2 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-03T00:00 2020-01-03T00:00 2017-10-01T11:11:11.140 2017-10-01T11:11:11.120111 2020-01-03T00:00 1 32 20
+3 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
+4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20
diff --git a/regression-test/suites/compaction/test_compaction_agg_keys_with_delete.groovy b/regression-test/suites/compaction/test_compaction_agg_keys_with_delete.groovy
new file mode 100644
index 0000000000..c641fa4cbe
--- /dev/null
+++ b/regression-test/suites/compaction/test_compaction_agg_keys_with_delete.groovy
@@ -0,0 +1,219 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_compaction_agg_keys_with_delete") {
+ def tableName = "test_compaction_agg_keys_with_delete_regression_test"
+
+ try {
+ //BackendId,Cluster,IP,HeartbeatPort,BePort,HttpPort,BrpcPort,LastStartTime,LastHeartbeat,Alive,SystemDecommissioned,ClusterDecommissioned,TabletNum,DataUsedCapacity,AvailCapacity,TotalCapacity,UsedPct,MaxDiskUsedPct,Tag,ErrMsg,Version,Status
+ String[][] backends = sql """ show backends; """
+ assertTrue(backends.size() > 0)
+ String backend_id;
+ def backendId_to_backendIP = [:]
+ def backendId_to_backendHttpPort = [:]
+ for (String[] backend in backends) {
+ backendId_to_backendIP.put(backend[0], backend[2])
+ backendId_to_backendHttpPort.put(backend[0], backend[5])
+ }
+
+ backend_id = backendId_to_backendIP.keySet()[0]
+ StringBuilder showConfigCommand = new StringBuilder();
+ showConfigCommand.append("curl -X GET http://")
+ showConfigCommand.append(backendId_to_backendIP.get(backend_id))
+ showConfigCommand.append(":")
+ showConfigCommand.append(backendId_to_backendHttpPort.get(backend_id))
+ showConfigCommand.append("/api/show_config")
+ logger.info(showConfigCommand.toString())
+ def process = showConfigCommand.toString().execute()
+ int code = process.waitFor()
+ String err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ String out = process.getText()
+ logger.info("Show config: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def configList = parseJson(out.trim())
+ assert configList instanceof List
+
+ boolean disableAutoCompaction = true
+ for (Object ele in (List) configList) {
+ assert ele instanceof List<String>
+ if (((List<String>) ele)[0] == "disable_auto_compaction") {
+ disableAutoCompaction = Boolean.parseBoolean(((List<String>) ele)[2])
+ }
+ }
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName} (
+ `user_id` LARGEINT NOT NULL COMMENT "用户id",
+ `date` DATE NOT NULL COMMENT "数据灌入日期时间",
+ `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_1` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_2` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间",
+ `city` VARCHAR(20) COMMENT "用户所在城市",
+ `age` SMALLINT COMMENT "用户年龄",
+ `sex` TINYINT COMMENT "用户性别",
+ `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
+ `last_update_date` DATETIME REPLACE_IF_NOT_NULL DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间",
+ `datetime_val1` DATETIMEV2(3) REPLACE DEFAULT "1970-01-01 00:00:00.111" COMMENT "用户最后一次访问时间",
+ `datetime_val2` DATETIME(6) REPLACE_IF_NOT_NULL DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间",
+ `last_visit_date_not_null` DATETIME REPLACE NOT NULL DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
+ `cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费",
+ `max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间",
+ `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间",
+ `hll_col` HLL HLL_UNION NOT NULL COMMENT "HLL列",
+ `bitmap_col` Bitmap BITMAP_UNION NOT NULL COMMENT "bitmap列" )
+ AGGREGATE KEY(`user_id`, `date`, `datev2`, `datetimev2_1`, `datetimev2_2`, `city`, `age`, `sex`) DISTRIBUTED BY HASH(`user_id`)
+ PROPERTIES ( "replication_num" = "1" );
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-01', '2020-01-01', '2017-10-01 11:11:11.170000', '2017-10-01 11:11:11.110111', '2020-01-01', 1, 30, 20, hll_hash(1), to_bitmap(1))
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.160000', '2017-10-01 11:11:11.100111', '2020-01-02', 1, 31, 19, hll_hash(2), to_bitmap(2))
+ """
+
+ sql """
+ DELETE FROM ${tableName} where user_id <= 5
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.150000', '2017-10-01 11:11:11.130111', '2020-01-02', 1, 31, 21, hll_hash(2), to_bitmap(2))
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.140000', '2017-10-01 11:11:11.120111', '2020-01-03', 1, 32, 20, hll_hash(3), to_bitmap(3))
+ """
+
+ sql """
+ DELETE FROM ${tableName} where user_id <= 5
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.100000', '2017-10-01 11:11:11.140111', '2020-01-03', 1, 32, 22, hll_hash(3), to_bitmap(3))
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-04', '2020-01-04', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.150111', '2020-01-04', 1, 33, 21, hll_hash(4), to_bitmap(4))
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20, hll_hash(5), to_bitmap(5))
+ """
+
+ sql """
+ DELETE FROM ${tableName} where user_id <= 5
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (4, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20, hll_hash(5), to_bitmap(5))
+ """
+
+ qt_select_default """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+
+ //TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,PathHash,MetaUrl,CompactionStatus
+ String[][] tablets = sql """ show tablets from ${tableName}; """
+
+ // trigger compactions for all tablets in ${tableName}
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ StringBuilder sb = new StringBuilder();
+ sb.append("curl -X POST http://")
+ sb.append(backendId_to_backendIP.get(backend_id))
+ sb.append(":")
+ sb.append(backendId_to_backendHttpPort.get(backend_id))
+ sb.append("/api/compaction/run?tablet_id=")
+ sb.append(tablet_id)
+ sb.append("&compact_type=cumulative")
+
+ String command = sb.toString()
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactJson = parseJson(out.trim())
+ if (compactJson.status.toLowerCase() == "fail") {
+ assertEquals(disableAutoCompaction, false)
+ logger.info("Compaction was done automatically!")
+ }
+ if (disableAutoCompaction) {
+ assertEquals("success", compactJson.status.toLowerCase())
+ }
+ }
+
+ // wait for all compactions done
+ for (String[] tablet in tablets) {
+ boolean running = true
+ do {
+ Thread.sleep(1000)
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ StringBuilder sb = new StringBuilder();
+ sb.append("curl -X GET http://")
+ sb.append(backendId_to_backendIP.get(backend_id))
+ sb.append(":")
+ sb.append(backendId_to_backendHttpPort.get(backend_id))
+ sb.append("/api/compaction/run_status?tablet_id=")
+ sb.append(tablet_id)
+
+ String command = sb.toString()
+ logger.info(command)
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactionStatus = parseJson(out.trim())
+ assertEquals("success", compactionStatus.status.toLowerCase())
+ running = compactionStatus.run_status
+ } while (running)
+ }
+
+ int rowCount = 0
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ StringBuilder sb = new StringBuilder();
+ def compactionStatusUrlIndex = 17
+ sb.append("curl -X GET ")
+ sb.append(tablet[compactionStatusUrlIndex])
+ String command = sb.toString()
+ // wait for cleaning stale_rowsets
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def tabletJson = parseJson(out.trim())
+ assert tabletJson.rowsets instanceof List
+ for (String rowset in (List<String>) tabletJson.rowsets) {
+ rowCount += Integer.parseInt(rowset.split(" ")[1])
+ }
+ }
+ assert (rowCount < 8)
+ qt_select_default2 """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+ } finally {
+ try_sql("DROP TABLE IF EXISTS ${tableName}")
+ }
+}
diff --git a/regression-test/suites/compaction/test_compaction_dup_keys_with_delete.groovy b/regression-test/suites/compaction/test_compaction_dup_keys_with_delete.groovy
new file mode 100644
index 0000000000..f57229072d
--- /dev/null
+++ b/regression-test/suites/compaction/test_compaction_dup_keys_with_delete.groovy
@@ -0,0 +1,217 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_compaction_dup_keys_with_delete") {
+ def tableName = "test_compaction_dup_keys_with_delete_regression_test"
+
+ try {
+ //BackendId,Cluster,IP,HeartbeatPort,BePort,HttpPort,BrpcPort,LastStartTime,LastHeartbeat,Alive,SystemDecommissioned,ClusterDecommissioned,TabletNum,DataUsedCapacity,AvailCapacity,TotalCapacity,UsedPct,MaxDiskUsedPct,Tag,ErrMsg,Version,Status
+ String[][] backends = sql """ show backends; """
+ assertTrue(backends.size() > 0)
+ String backend_id;
+ def backendId_to_backendIP = [:]
+ def backendId_to_backendHttpPort = [:]
+ for (String[] backend in backends) {
+ backendId_to_backendIP.put(backend[0], backend[2])
+ backendId_to_backendHttpPort.put(backend[0], backend[5])
+ }
+
+ backend_id = backendId_to_backendIP.keySet()[0]
+ StringBuilder showConfigCommand = new StringBuilder();
+ showConfigCommand.append("curl -X GET http://")
+ showConfigCommand.append(backendId_to_backendIP.get(backend_id))
+ showConfigCommand.append(":")
+ showConfigCommand.append(backendId_to_backendHttpPort.get(backend_id))
+ showConfigCommand.append("/api/show_config")
+ logger.info(showConfigCommand.toString())
+ def process = showConfigCommand.toString().execute()
+ int code = process.waitFor()
+ String err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ String out = process.getText()
+ logger.info("Show config: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def configList = parseJson(out.trim())
+ assert configList instanceof List
+
+ boolean disableAutoCompaction = true
+ for (Object ele in (List) configList) {
+ assert ele instanceof List<String>
+ if (((List<String>) ele)[0] == "disable_auto_compaction") {
+ disableAutoCompaction = Boolean.parseBoolean(((List<String>) ele)[2])
+ }
+ }
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName} (
+ `user_id` LARGEINT NOT NULL COMMENT "用户id",
+ `date` DATE NOT NULL COMMENT "数据灌入日期时间",
+ `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_1` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_2` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间",
+ `city` VARCHAR(20) COMMENT "用户所在城市",
+ `age` SMALLINT COMMENT "用户年龄",
+ `sex` TINYINT COMMENT "用户性别",
+ `last_visit_date` DATETIME DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
+ `last_update_date` DATETIME DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间",
+ `datetime_val1` DATETIMEV2(3) DEFAULT "1970-01-01 00:00:00.111" COMMENT "用户最后一次访问时间",
+ `datetime_val2` DATETIME(6) DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间",
+ `last_visit_date_not_null` DATETIME NOT NULL DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
+ `cost` BIGINT DEFAULT "0" COMMENT "用户总消费",
+ `max_dwell_time` INT DEFAULT "0" COMMENT "用户最大停留时间",
+ `min_dwell_time` INT DEFAULT "99999" COMMENT "用户最小停留时间")
+ DUPLICATE KEY(`user_id`, `date`, `datev2`, `datetimev2_1`, `datetimev2_2`, `city`, `age`, `sex`) DISTRIBUTED BY HASH(`user_id`)
+ PROPERTIES ( "replication_num" = "1" );
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-01', '2020-01-01', '2017-10-01 11:11:11.170000', '2017-10-01 11:11:11.110111', '2020-01-01', 1, 30, 20)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.160000', '2017-10-01 11:11:11.100111', '2020-01-02', 1, 31, 19)
+ """
+
+ sql """
+ DELETE FROM ${tableName} where user_id <= 5
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.150000', '2017-10-01 11:11:11.130111', '2020-01-02', 1, 31, 21)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.140000', '2017-10-01 11:11:11.120111', '2020-01-03', 1, 32, 20)
+ """
+
+ sql """
+ DELETE FROM ${tableName} where user_id <= 5
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.100000', '2017-10-01 11:11:11.140111', '2020-01-03', 1, 32, 22)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-04', '2020-01-04', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.150111', '2020-01-04', 1, 33, 21)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20)
+ """
+
+ sql """
+ DELETE FROM ${tableName} where user_id <= 5
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (4, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20)
+ """
+
+ qt_select_default """ SELECT * FROM ${tableName} t ORDER BY user_id,date,city,age,sex,last_visit_date,last_update_date,last_visit_date_not_null,cost,max_dwell_time,min_dwell_time; """
+
+ //TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,PathHash,MetaUrl,CompactionStatus
+ String[][] tablets = sql """ show tablets from ${tableName}; """
+
+ // trigger compactions for all tablets in ${tableName}
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ StringBuilder sb = new StringBuilder();
+ sb.append("curl -X POST http://")
+ sb.append(backendId_to_backendIP.get(backend_id))
+ sb.append(":")
+ sb.append(backendId_to_backendHttpPort.get(backend_id))
+ sb.append("/api/compaction/run?tablet_id=")
+ sb.append(tablet_id)
+ sb.append("&compact_type=cumulative")
+
+ String command = sb.toString()
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactJson = parseJson(out.trim())
+ if (compactJson.status.toLowerCase() == "fail") {
+ assertEquals(disableAutoCompaction, false)
+ logger.info("Compaction was done automatically!")
+ }
+ if (disableAutoCompaction) {
+ assertEquals("success", compactJson.status.toLowerCase())
+ }
+ }
+
+ // wait for all compactions done
+ for (String[] tablet in tablets) {
+ boolean running = true
+ do {
+ Thread.sleep(1000)
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ StringBuilder sb = new StringBuilder();
+ sb.append("curl -X GET http://")
+ sb.append(backendId_to_backendIP.get(backend_id))
+ sb.append(":")
+ sb.append(backendId_to_backendHttpPort.get(backend_id))
+ sb.append("/api/compaction/run_status?tablet_id=")
+ sb.append(tablet_id)
+
+ String command = sb.toString()
+ logger.info(command)
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactionStatus = parseJson(out.trim())
+ assertEquals("success", compactionStatus.status.toLowerCase())
+ running = compactionStatus.run_status
+ } while (running)
+ }
+
+ int rowCount = 0
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ StringBuilder sb = new StringBuilder();
+ def compactionStatusUrlIndex = 17
+ sb.append("curl -X GET ")
+ sb.append(tablet[compactionStatusUrlIndex])
+ String command = sb.toString()
+ // wait for cleaning stale_rowsets
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def tabletJson = parseJson(out.trim())
+ assert tabletJson.rowsets instanceof List
+ for (String rowset in (List<String>) tabletJson.rowsets) {
+ rowCount += Integer.parseInt(rowset.split(" ")[1])
+ }
+ }
+ assert (rowCount <= 8)
+ qt_select_default2 """ SELECT * FROM ${tableName} t ORDER BY user_id,date,city,age,sex,last_visit_date,last_update_date,last_visit_date_not_null,cost,max_dwell_time,min_dwell_time; """
+ } finally {
+ try_sql("DROP TABLE IF EXISTS ${tableName}")
+ }
+}
diff --git a/regression-test/suites/compaction/test_compaction_uniq_keys_with_delete.groovy b/regression-test/suites/compaction/test_compaction_uniq_keys_with_delete.groovy
new file mode 100644
index 0000000000..caffb945d5
--- /dev/null
+++ b/regression-test/suites/compaction/test_compaction_uniq_keys_with_delete.groovy
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_compaction_uniq_keys_with_delete") {
+ def tableName = "test_compaction_uniq_keys_with_delete_regression_test"
+
+ try {
+ //BackendId,Cluster,IP,HeartbeatPort,BePort,HttpPort,BrpcPort,LastStartTime,LastHeartbeat,Alive,SystemDecommissioned,ClusterDecommissioned,TabletNum,DataUsedCapacity,AvailCapacity,TotalCapacity,UsedPct,MaxDiskUsedPct,Tag,ErrMsg,Version,Status
+ String[][] backends = sql """ show backends; """
+ assertTrue(backends.size() > 0)
+ String backend_id;
+ def backendId_to_backendIP = [:]
+ def backendId_to_backendHttpPort = [:]
+ for (String[] backend in backends) {
+ backendId_to_backendIP.put(backend[0], backend[2])
+ backendId_to_backendHttpPort.put(backend[0], backend[5])
+ }
+
+ backend_id = backendId_to_backendIP.keySet()[0]
+ StringBuilder showConfigCommand = new StringBuilder();
+ showConfigCommand.append("curl -X GET http://")
+ showConfigCommand.append(backendId_to_backendIP.get(backend_id))
+ showConfigCommand.append(":")
+ showConfigCommand.append(backendId_to_backendHttpPort.get(backend_id))
+ showConfigCommand.append("/api/show_config")
+ logger.info(showConfigCommand.toString())
+ def process = showConfigCommand.toString().execute()
+ int code = process.waitFor()
+ String err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ String out = process.getText()
+ logger.info("Show config: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def configList = parseJson(out.trim())
+ assert configList instanceof List
+
+ boolean disableAutoCompaction = true
+ for (Object ele in (List) configList) {
+ assert ele instanceof List<String>
+ if (((List<String>) ele)[0] == "disable_auto_compaction") {
+ disableAutoCompaction = Boolean.parseBoolean(((List<String>) ele)[2])
+ }
+ }
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName} (
+ `user_id` LARGEINT NOT NULL COMMENT "用户id",
+ `date` DATE NOT NULL COMMENT "数据灌入日期时间",
+ `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_1` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_2` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间",
+ `city` VARCHAR(20) COMMENT "用户所在城市",
+ `age` SMALLINT COMMENT "用户年龄",
+ `sex` TINYINT COMMENT "用户性别",
+ `last_visit_date` DATETIME DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
+ `last_update_date` DATETIME DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间",
+ `datetime_val1` DATETIMEV2(3) DEFAULT "1970-01-01 00:00:00.111" COMMENT "用户最后一次访问时间",
+ `datetime_val2` DATETIME(6) DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间",
+ `last_visit_date_not_null` DATETIME NOT NULL DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
+ `cost` BIGINT DEFAULT "0" COMMENT "用户总消费",
+ `max_dwell_time` INT DEFAULT "0" COMMENT "用户最大停留时间",
+ `min_dwell_time` INT DEFAULT "99999" COMMENT "用户最小停留时间")
+ UNIQUE KEY(`user_id`, `date`, `datev2`, `datetimev2_1`, `datetimev2_2`, `city`, `age`, `sex`) DISTRIBUTED BY HASH(`user_id`)
+ PROPERTIES ( "replication_num" = "1" );
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-01', '2020-01-01', '2017-10-01 11:11:11.170000', '2017-10-01 11:11:11.110111', '2020-01-01', 1, 30, 20)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.160000', '2017-10-01 11:11:11.100111', '2020-01-02', 1, 31, 19)
+ """
+
+ sql """
+ DELETE FROM ${tableName} where user_id <= 5
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.150000', '2017-10-01 11:11:11.130111', '2020-01-02', 1, 31, 21)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.140000', '2017-10-01 11:11:11.120111', '2020-01-03', 1, 32, 20)
+ """
+
+ sql """
+ DELETE FROM ${tableName} where user_id <= 1
+ """
+
+ qt_select_default """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.100000', '2017-10-01 11:11:11.140111', '2020-01-03', 1, 32, 22)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-04', '2020-01-04', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.150111', '2020-01-04', 1, 33, 21)
+ """
+
+ sql """
+ DELETE FROM ${tableName} where user_id <= 2
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20)
+ """
+
+ qt_select_default1 """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (4, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20)
+ """
+
+ qt_select_default2 """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+
+ //TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,PathHash,MetaUrl,CompactionStatus
+ String[][] tablets = sql """ show tablets from ${tableName}; """
+
+ // trigger compactions for all tablets in ${tableName}
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ StringBuilder sb = new StringBuilder();
+ sb.append("curl -X POST http://")
+ sb.append(backendId_to_backendIP.get(backend_id))
+ sb.append(":")
+ sb.append(backendId_to_backendHttpPort.get(backend_id))
+ sb.append("/api/compaction/run?tablet_id=")
+ sb.append(tablet_id)
+ sb.append("&compact_type=cumulative")
+
+ String command = sb.toString()
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactJson = parseJson(out.trim())
+ if (compactJson.status.toLowerCase() == "fail") {
+ assertEquals(disableAutoCompaction, false)
+ logger.info("Compaction was done automatically!")
+ }
+ if (disableAutoCompaction) {
+ assertEquals("success", compactJson.status.toLowerCase())
+ }
+ }
+
+ // wait for all compactions done
+ for (String[] tablet in tablets) {
+ boolean running = true
+ do {
+ Thread.sleep(1000)
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ StringBuilder sb = new StringBuilder();
+ sb.append("curl -X GET http://")
+ sb.append(backendId_to_backendIP.get(backend_id))
+ sb.append(":")
+ sb.append(backendId_to_backendHttpPort.get(backend_id))
+ sb.append("/api/compaction/run_status?tablet_id=")
+ sb.append(tablet_id)
+
+ String command = sb.toString()
+ logger.info(command)
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactionStatus = parseJson(out.trim())
+ assertEquals("success", compactionStatus.status.toLowerCase())
+ running = compactionStatus.run_status
+ } while (running)
+ }
+
+ int rowCount = 0
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ StringBuilder sb = new StringBuilder();
+ def compactionStatusUrlIndex = 17
+ sb.append("curl -X GET ")
+ sb.append(tablet[compactionStatusUrlIndex])
+ String command = sb.toString()
+ // wait for cleaning stale_rowsets
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def tabletJson = parseJson(out.trim())
+ assert tabletJson.rowsets instanceof List
+ for (String rowset in (List<String>) tabletJson.rowsets) {
+ rowCount += Integer.parseInt(rowset.split(" ")[1])
+ }
+ }
+ assert (rowCount < 8)
+ qt_select_default3 """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+ } finally {
+ try_sql("DROP TABLE IF EXISTS ${tableName}")
+ }
+}
diff --git a/regression-test/suites/compaction/test_vertical_compaction_agg_keys.groovy b/regression-test/suites/compaction/test_vertical_compaction_agg_keys.groovy
new file mode 100644
index 0000000000..a68b6c3911
--- /dev/null
+++ b/regression-test/suites/compaction/test_vertical_compaction_agg_keys.groovy
@@ -0,0 +1,256 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_vertical_compaction_agg_keys") {
+ def tableName = "vertical_compaction_agg_keys_regression_test"
+
+ def set_be_config = { ->
+ String[][] backends = sql """ show backends; """
+ assertTrue(backends.size() > 0)
+ for (String[] backend in backends) {
+ StringBuilder setConfigCommand = new StringBuilder();
+ setConfigCommand.append("curl -X POST http://")
+ setConfigCommand.append(backend[2])
+ setConfigCommand.append(":")
+ setConfigCommand.append(backend[5])
+ setConfigCommand.append("/api/update_config?")
+ String command1 = setConfigCommand.toString() + "enable_vertical_compaction=true"
+ logger.info(command1)
+ def process1 = command1.execute()
+ int code = process1.waitFor()
+ assertEquals(code, 0)
+ }
+ }
+ def reset_be_config = { ->
+ String[][] backends = sql """ show backends; """
+ assertTrue(backends.size() > 0)
+ for (String[] backend in backends) {
+ StringBuilder setConfigCommand = new StringBuilder();
+ setConfigCommand.append("curl -X POST http://")
+ setConfigCommand.append(backend[2])
+ setConfigCommand.append(":")
+ setConfigCommand.append(backend[5])
+ setConfigCommand.append("/api/update_config?")
+ String command1 = setConfigCommand.toString() + "enable_vertical_compaction=false"
+ logger.info(command1)
+ def process1 = command1.execute()
+ int code = process1.waitFor()
+ assertEquals(code, 0)
+ }
+ }
+
+ try {
+ //BackendId,Cluster,IP,HeartbeatPort,BePort,HttpPort,BrpcPort,LastStartTime,LastHeartbeat,Alive,SystemDecommissioned,ClusterDecommissioned,TabletNum,DataUsedCapacity,AvailCapacity,TotalCapacity,UsedPct,MaxDiskUsedPct,Tag,ErrMsg,Version,Status
+ String[][] backends = sql """ show backends; """
+ assertTrue(backends.size() > 0)
+ String backend_id;
+ def backendId_to_backendIP = [:]
+ def backendId_to_backendHttpPort = [:]
+ for (String[] backend in backends) {
+ backendId_to_backendIP.put(backend[0], backend[2])
+ backendId_to_backendHttpPort.put(backend[0], backend[5])
+ }
+
+ backend_id = backendId_to_backendIP.keySet()[0]
+ StringBuilder showConfigCommand = new StringBuilder();
+ showConfigCommand.append("curl -X GET http://")
+ showConfigCommand.append(backendId_to_backendIP.get(backend_id))
+ showConfigCommand.append(":")
+ showConfigCommand.append(backendId_to_backendHttpPort.get(backend_id))
+ showConfigCommand.append("/api/show_config")
+ logger.info(showConfigCommand.toString())
+ def process = showConfigCommand.toString().execute()
+ int code = process.waitFor()
+ String err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ String out = process.getText()
+ logger.info("Show config: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def configList = parseJson(out.trim())
+ assert configList instanceof List
+
+ boolean disableAutoCompaction = true
+ for (Object ele in (List) configList) {
+ assert ele instanceof List<String>
+ if (((List<String>) ele)[0] == "disable_auto_compaction") {
+ disableAutoCompaction = Boolean.parseBoolean(((List<String>) ele)[2])
+ }
+ }
+ set_be_config.call()
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE ${tableName} (
+ `user_id` LARGEINT NOT NULL COMMENT "用户id",
+ `date` DATE NOT NULL COMMENT "数据灌入日期时间",
+ `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_1` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_2` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间",
+ `city` VARCHAR(20) COMMENT "用户所在城市",
+ `age` SMALLINT COMMENT "用户年龄",
+ `sex` TINYINT COMMENT "用户性别",
+ `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
+ `last_update_date` DATETIME REPLACE_IF_NOT_NULL DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间",
+ `datetime_val1` DATETIMEV2(3) REPLACE DEFAULT "1970-01-01 00:00:00.111" COMMENT "用户最后一次访问时间",
+ `datetime_val2` DATETIME(6) REPLACE_IF_NOT_NULL DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间",
+ `last_visit_date_not_null` DATETIME REPLACE NOT NULL DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
+ `cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费",
+ `max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间",
+ `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间",
+ `hll_col` HLL HLL_UNION NOT NULL COMMENT "HLL列",
+ `bitmap_col` Bitmap BITMAP_UNION NOT NULL COMMENT "bitmap列" )
+ AGGREGATE KEY(`user_id`, `date`, `datev2`, `datetimev2_1`, `datetimev2_2`, `city`, `age`, `sex`) DISTRIBUTED BY HASH(`user_id`) BUCKETS 10
+ PROPERTIES ( "replication_num" = "1" );
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-01', '2020-01-01', '2017-10-01 11:11:11.170000', '2017-10-01 11:11:11.110111', '2020-01-01', 1, 30, 20, hll_hash(1), to_bitmap(1))
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.160000', '2017-10-01 11:11:11.100111', '2020-01-02', 1, 31, 19, hll_hash(2), to_bitmap(2))
+ """
+
+ sql """
+ DELETE from ${tableName} where user_id < 0
+ """
+
+ qt_select_default """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.150000', '2017-10-01 11:11:11.130111', '2020-01-02', 1, 31, 21, hll_hash(2), to_bitmap(2))
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.140000', '2017-10-01 11:11:11.120111', '2020-01-03', 1, 32, 20, hll_hash(3), to_bitmap(3))
+ """
+
+ sql """
+ DELETE from ${tableName} where user_id <= 1
+ """
+
+ qt_select_default1 """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.100000', '2017-10-01 11:11:11.140111', '2020-01-03', 1, 32, 22, hll_hash(3), to_bitmap(3))
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-04', '2020-01-04', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.150111', '2020-01-04', 1, 33, 21, hll_hash(4), to_bitmap(4))
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20, hll_hash(5), to_bitmap(5))
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (4, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20, hll_hash(5), to_bitmap(5))
+ """
+
+ qt_select_default2 """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+
+ //TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,PathHash,MetaUrl,CompactionStatus
+ String[][] tablets = sql """ show tablets from ${tableName}; """
+
+ // trigger compactions for all tablets in ${tableName}
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ StringBuilder sb = new StringBuilder();
+ sb.append("curl -X POST http://")
+ sb.append(backendId_to_backendIP.get(backend_id))
+ sb.append(":")
+ sb.append(backendId_to_backendHttpPort.get(backend_id))
+ sb.append("/api/compaction/run?tablet_id=")
+ sb.append(tablet_id)
+ sb.append("&compact_type=cumulative")
+
+ String command = sb.toString()
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactJson = parseJson(out.trim())
+ if (compactJson.status.toLowerCase() == "fail") {
+ assertEquals(disableAutoCompaction, false)
+ logger.info("Compaction was done automatically!")
+ }
+ if (disableAutoCompaction) {
+ assertEquals("success", compactJson.status.toLowerCase())
+ }
+ }
+
+ // wait for all compactions done
+ for (String[] tablet in tablets) {
+ boolean running = true
+ do {
+ Thread.sleep(1000)
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ StringBuilder sb = new StringBuilder();
+ sb.append("curl -X GET http://")
+ sb.append(backendId_to_backendIP.get(backend_id))
+ sb.append(":")
+ sb.append(backendId_to_backendHttpPort.get(backend_id))
+ sb.append("/api/compaction/run_status?tablet_id=")
+ sb.append(tablet_id)
+
+ String command = sb.toString()
+ logger.info(command)
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactionStatus = parseJson(out.trim())
+ assertEquals("success", compactionStatus.status.toLowerCase())
+ running = compactionStatus.run_status
+ } while (running)
+ }
+
+ int rowCount = 0
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ StringBuilder sb = new StringBuilder();
+ def compactionStatusUrlIndex = 17
+ sb.append("curl -X GET ")
+ sb.append(tablet[compactionStatusUrlIndex])
+ String command = sb.toString()
+ // wait for cleaning stale_rowsets
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def tabletJson = parseJson(out.trim())
+ assert tabletJson.rowsets instanceof List
+ for (String rowset in (List<String>) tabletJson.rowsets) {
+ rowCount += Integer.parseInt(rowset.split(" ")[1])
+ }
+ }
+ assert (rowCount < 8)
+ qt_select_default3 """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+ } finally {
+ try_sql("DROP TABLE IF EXISTS ${tableName}")
+ reset_be_config.call()
+ }
+}
diff --git a/regression-test/suites/compaction/test_vertical_compaction_dup_keys.groovy b/regression-test/suites/compaction/test_vertical_compaction_dup_keys.groovy
new file mode 100644
index 0000000000..ac91094ed9
--- /dev/null
+++ b/regression-test/suites/compaction/test_vertical_compaction_dup_keys.groovy
@@ -0,0 +1,255 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_vertical_compaction_dup_keys") {
+ def tableName = "vertical_compaction_dup_keys_regression_test"
+
+ def set_be_config = { ->
+ String[][] backends = sql """ show backends; """
+ assertTrue(backends.size() > 0)
+ for (String[] backend in backends) {
+ StringBuilder setConfigCommand = new StringBuilder();
+ setConfigCommand.append("curl -X POST http://")
+ setConfigCommand.append(backend[2])
+ setConfigCommand.append(":")
+ setConfigCommand.append(backend[5])
+ setConfigCommand.append("/api/update_config?")
+ String command1 = setConfigCommand.toString() + "enable_vertical_compaction=true"
+ logger.info(command1)
+ def process1 = command1.execute()
+ int code = process1.waitFor()
+ assertEquals(code, 0)
+ }
+ }
+ def reset_be_config = { ->
+ String[][] backends = sql """ show backends; """
+ assertTrue(backends.size() > 0)
+ for (String[] backend in backends) {
+ StringBuilder setConfigCommand = new StringBuilder();
+ setConfigCommand.append("curl -X POST http://")
+ setConfigCommand.append(backend[2])
+ setConfigCommand.append(":")
+ setConfigCommand.append(backend[5])
+ setConfigCommand.append("/api/update_config?")
+ String command1 = setConfigCommand.toString() + "enable_vertical_compaction=false"
+ logger.info(command1)
+ def process1 = command1.execute()
+ int code = process1.waitFor()
+ assertEquals(code, 0)
+ }
+ }
+
+ try {
+ //BackendId,Cluster,IP,HeartbeatPort,BePort,HttpPort,BrpcPort,LastStartTime,LastHeartbeat,Alive,SystemDecommissioned,ClusterDecommissioned,TabletNum,DataUsedCapacity,AvailCapacity,TotalCapacity,UsedPct,MaxDiskUsedPct,Tag,ErrMsg,Version,Status
+ String[][] backends = sql """ show backends; """
+ assertTrue(backends.size() > 0)
+ String backend_id;
+ def backendId_to_backendIP = [:]
+ def backendId_to_backendHttpPort = [:]
+ for (String[] backend in backends) {
+ backendId_to_backendIP.put(backend[0], backend[2])
+ backendId_to_backendHttpPort.put(backend[0], backend[5])
+ }
+
+ backend_id = backendId_to_backendIP.keySet()[0]
+ StringBuilder showConfigCommand = new StringBuilder();
+ showConfigCommand.append("curl -X GET http://")
+ showConfigCommand.append(backendId_to_backendIP.get(backend_id))
+ showConfigCommand.append(":")
+ showConfigCommand.append(backendId_to_backendHttpPort.get(backend_id))
+ showConfigCommand.append("/api/show_config")
+ logger.info(showConfigCommand.toString())
+ def process = showConfigCommand.toString().execute()
+ int code = process.waitFor()
+ String err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ String out = process.getText()
+ logger.info("Show config: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def configList = parseJson(out.trim())
+ assert configList instanceof List
+
+ boolean disableAutoCompaction = true
+ for (Object ele in (List) configList) {
+ assert ele instanceof List<String>
+ if (((List<String>) ele)[0] == "disable_auto_compaction") {
+ disableAutoCompaction = Boolean.parseBoolean(((List<String>) ele)[2])
+ }
+ }
+ set_be_config.call()
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE ${tableName} (
+ `user_id` LARGEINT NOT NULL COMMENT "用户id",
+ `date` DATE NOT NULL COMMENT "数据灌入日期时间",
+ `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_1` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_2` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间",
+ `city` VARCHAR(20) COMMENT "用户所在城市",
+ `age` SMALLINT COMMENT "用户年龄",
+ `sex` TINYINT COMMENT "用户性别",
+ `last_visit_date` DATETIME DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
+ `last_update_date` DATETIME DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间",
+ `datetime_val1` DATETIMEV2(3) DEFAULT "1970-01-01 00:00:00.111" COMMENT "用户最后一次访问时间",
+ `datetime_val2` DATETIME(6) DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间",
+ `last_visit_date_not_null` DATETIME NOT NULL DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
+ `cost` BIGINT DEFAULT "0" COMMENT "用户总消费",
+ `max_dwell_time` INT DEFAULT "0" COMMENT "用户最大停留时间",
+ `min_dwell_time` INT DEFAULT "99999" COMMENT "用户最小停留时间")
+ DUPLICATE KEY(`user_id`, `date`, `datev2`, `datetimev2_1`, `datetimev2_2`, `city`, `age`, `sex`) DISTRIBUTED BY HASH(`user_id`)
+ PROPERTIES ( "replication_num" = "1" );
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-01', '2020-01-01', '2017-10-01 11:11:11.170000', '2017-10-01 11:11:11.110111', '2020-01-01', 1, 30, 20)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.160000', '2017-10-01 11:11:11.100111', '2020-01-02', 1, 31, 19)
+ """
+
+ sql """
+ DELETE from ${tableName} where user_id <= 0
+ """
+
+ qt_select_default """ SELECT * FROM ${tableName} t ORDER BY user_id,date,city,age,sex,last_visit_date,last_update_date,last_visit_date_not_null,cost,max_dwell_time,min_dwell_time; """
+
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.150000', '2017-10-01 11:11:11.130111', '2020-01-02', 1, 31, 21)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.140000', '2017-10-01 11:11:11.120111', '2020-01-03', 1, 32, 20)
+ """
+
+ sql """
+ DELETE from ${tableName} where user_id <= 1
+ """
+ qt_select_default1 """ SELECT * FROM ${tableName} t ORDER BY user_id,date,city,age,sex,last_visit_date,last_update_date,last_visit_date_not_null,cost,max_dwell_time,min_dwell_time; """
+
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.100000', '2017-10-01 11:11:11.140111', '2020-01-03', 1, 32, 22)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-04', '2020-01-04', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.150111', '2020-01-04', 1, 33, 21)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (4, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20)
+ """
+
+ qt_select_default2 """ SELECT * FROM ${tableName} t ORDER BY user_id,date,city,age,sex,last_visit_date,last_update_date,last_visit_date_not_null,cost,max_dwell_time,min_dwell_time; """
+
+ //TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,PathHash,MetaUrl,CompactionStatus
+ String[][] tablets = sql """ show tablets from ${tableName}; """
+
+ // trigger compactions for all tablets in ${tableName}
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ StringBuilder sb = new StringBuilder();
+ sb.append("curl -X POST http://")
+ sb.append(backendId_to_backendIP.get(backend_id))
+ sb.append(":")
+ sb.append(backendId_to_backendHttpPort.get(backend_id))
+ sb.append("/api/compaction/run?tablet_id=")
+ sb.append(tablet_id)
+ sb.append("&compact_type=cumulative")
+
+ String command = sb.toString()
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactJson = parseJson(out.trim())
+ if (compactJson.status.toLowerCase() == "fail") {
+ assertEquals(disableAutoCompaction, false)
+ logger.info("Compaction was done automatically!")
+ }
+ if (disableAutoCompaction) {
+ assertEquals("success", compactJson.status.toLowerCase())
+ }
+ }
+
+ // wait for all compactions done
+ for (String[] tablet in tablets) {
+ boolean running = true
+ do {
+ Thread.sleep(1000)
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ StringBuilder sb = new StringBuilder();
+ sb.append("curl -X GET http://")
+ sb.append(backendId_to_backendIP.get(backend_id))
+ sb.append(":")
+ sb.append(backendId_to_backendHttpPort.get(backend_id))
+ sb.append("/api/compaction/run_status?tablet_id=")
+ sb.append(tablet_id)
+
+ String command = sb.toString()
+ logger.info(command)
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactionStatus = parseJson(out.trim())
+ assertEquals("success", compactionStatus.status.toLowerCase())
+ running = compactionStatus.run_status
+ } while (running)
+ }
+
+ int rowCount = 0
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ StringBuilder sb = new StringBuilder();
+ def compactionStatusUrlIndex = 17
+ sb.append("curl -X GET ")
+ sb.append(tablet[compactionStatusUrlIndex])
+ String command = sb.toString()
+ // wait for cleaning stale_rowsets
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def tabletJson = parseJson(out.trim())
+ assert tabletJson.rowsets instanceof List
+ for (String rowset in (List<String>) tabletJson.rowsets) {
+ rowCount += Integer.parseInt(rowset.split(" ")[1])
+ }
+ }
+ assert (rowCount <= 8)
+ qt_select_default3 """ SELECT * FROM ${tableName} t ORDER BY user_id,date,city,age,sex,last_visit_date,last_update_date,last_visit_date_not_null,cost,max_dwell_time,min_dwell_time; """
+ } finally {
+ try_sql("DROP TABLE IF EXISTS ${tableName}")
+ reset_be_config.call()
+ }
+}
diff --git a/regression-test/suites/compaction/test_vertical_compaction_uniq_keys.groovy b/regression-test/suites/compaction/test_vertical_compaction_uniq_keys.groovy
new file mode 100644
index 0000000000..849742846c
--- /dev/null
+++ b/regression-test/suites/compaction/test_vertical_compaction_uniq_keys.groovy
@@ -0,0 +1,253 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_vertical_compaction_uniq_keys") {
+ def tableName = "vertical_compaction_uniq_keys_regression_test"
+
+ def set_be_config = { ->
+ String[][] backends = sql """ show backends; """
+ assertTrue(backends.size() > 0)
+ for (String[] backend in backends) {
+ StringBuilder setConfigCommand = new StringBuilder();
+ setConfigCommand.append("curl -X POST http://")
+ setConfigCommand.append(backend[2])
+ setConfigCommand.append(":")
+ setConfigCommand.append(backend[5])
+ setConfigCommand.append("/api/update_config?")
+ String command1 = setConfigCommand.toString() + "enable_vertical_compaction=true"
+ logger.info(command1)
+ def process1 = command1.execute()
+ int code = process1.waitFor()
+ assertEquals(code, 0)
+ }
+ }
+ def reset_be_config = { ->
+ String[][] backends = sql """ show backends; """
+ assertTrue(backends.size() > 0)
+ for (String[] backend in backends) {
+ StringBuilder setConfigCommand = new StringBuilder();
+ setConfigCommand.append("curl -X POST http://")
+ setConfigCommand.append(backend[2])
+ setConfigCommand.append(":")
+ setConfigCommand.append(backend[5])
+ setConfigCommand.append("/api/update_config?")
+ String command1 = setConfigCommand.toString() + "enable_vertical_compaction=false"
+ logger.info(command1)
+ def process1 = command1.execute()
+ int code = process1.waitFor()
+ assertEquals(code, 0)
+ }
+ }
+
+ try {
+ //BackendId,Cluster,IP,HeartbeatPort,BePort,HttpPort,BrpcPort,LastStartTime,LastHeartbeat,Alive,SystemDecommissioned,ClusterDecommissioned,TabletNum,DataUsedCapacity,AvailCapacity,TotalCapacity,UsedPct,MaxDiskUsedPct,Tag,ErrMsg,Version,Status
+ String[][] backends = sql """ show backends; """
+ assertTrue(backends.size() > 0)
+ String backend_id;
+ def backendId_to_backendIP = [:]
+ def backendId_to_backendHttpPort = [:]
+ for (String[] backend in backends) {
+ backendId_to_backendIP.put(backend[0], backend[2])
+ backendId_to_backendHttpPort.put(backend[0], backend[5])
+ }
+
+ backend_id = backendId_to_backendIP.keySet()[0]
+ StringBuilder showConfigCommand = new StringBuilder();
+ showConfigCommand.append("curl -X GET http://")
+ showConfigCommand.append(backendId_to_backendIP.get(backend_id))
+ showConfigCommand.append(":")
+ showConfigCommand.append(backendId_to_backendHttpPort.get(backend_id))
+ showConfigCommand.append("/api/show_config")
+ logger.info(showConfigCommand.toString())
+ def process = showConfigCommand.toString().execute()
+ int code = process.waitFor()
+ String err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ String out = process.getText()
+ logger.info("Show config: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def configList = parseJson(out.trim())
+ assert configList instanceof List
+
+ boolean disableAutoCompaction = true
+ for (Object ele in (List) configList) {
+ assert ele instanceof List<String>
+ if (((List<String>) ele)[0] == "disable_auto_compaction") {
+ disableAutoCompaction = Boolean.parseBoolean(((List<String>) ele)[2])
+ }
+ }
+ set_be_config.call()
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE ${tableName} (
+ `user_id` LARGEINT NOT NULL COMMENT "用户id",
+ `date` DATE NOT NULL COMMENT "数据灌入日期时间",
+ `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_1` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_2` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间",
+ `city` VARCHAR(20) COMMENT "用户所在城市",
+ `age` SMALLINT COMMENT "用户年龄",
+ `sex` TINYINT COMMENT "用户性别",
+ `last_visit_date` DATETIME DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
+ `last_update_date` DATETIME DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间",
+ `datetime_val1` DATETIMEV2(3) DEFAULT "1970-01-01 00:00:00.111" COMMENT "用户最后一次访问时间",
+ `datetime_val2` DATETIME(6) DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次更新时间",
+ `last_visit_date_not_null` DATETIME NOT NULL DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
+ `cost` BIGINT DEFAULT "0" COMMENT "用户总消费",
+ `max_dwell_time` INT DEFAULT "0" COMMENT "用户最大停留时间",
+ `min_dwell_time` INT DEFAULT "99999" COMMENT "用户最小停留时间")
+ UNIQUE KEY(`user_id`, `date`, `datev2`, `datetimev2_1`, `datetimev2_2`, `city`, `age`, `sex`) DISTRIBUTED BY HASH(`user_id`)
+ PROPERTIES ( "replication_num" = "1" );
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-01', '2020-01-01', '2017-10-01 11:11:11.170000', '2017-10-01 11:11:11.110111', '2020-01-01', 1, 30, 20)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.160000', '2017-10-01 11:11:11.100111', '2020-01-02', 1, 31, 19)
+ """
+
+ sql """
+ DELETE from ${tableName} where user_id <= 0
+ """
+ qt_select_default """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2017-10-01 11:11:11.150000', '2017-10-01 11:11:11.130111', '2020-01-02', 1, 31, 21)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (2, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.140000', '2017-10-01 11:11:11.120111', '2020-01-03', 1, 32, 20)
+ """
+
+ sql """
+ DELETE from ${tableName} where user_id <= 1
+ """
+ qt_select_default1 """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-03', '2020-01-03', '2017-10-01 11:11:11.100000', '2017-10-01 11:11:11.140111', '2020-01-03', 1, 32, 22)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, '2020-01-04', '2020-01-04', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.150111', '2020-01-04', 1, 33, 21)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (3, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20)
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES
+ (4, '2017-10-01', '2017-10-01', '2017-10-01 11:11:11.110000', '2017-10-01 11:11:11.110111', 'Beijing', 10, 1, NULL, NULL, NULL, NULL, '2020-01-05', 1, 34, 20)
+ """
+
+ qt_select_default2 """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+
+ //TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,PathHash,MetaUrl,CompactionStatus
+ String[][] tablets = sql """ show tablets from ${tableName}; """
+
+ // trigger compactions for all tablets in ${tableName}
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ StringBuilder sb = new StringBuilder();
+ sb.append("curl -X POST http://")
+ sb.append(backendId_to_backendIP.get(backend_id))
+ sb.append(":")
+ sb.append(backendId_to_backendHttpPort.get(backend_id))
+ sb.append("/api/compaction/run?tablet_id=")
+ sb.append(tablet_id)
+ sb.append("&compact_type=cumulative")
+
+ String command = sb.toString()
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactJson = parseJson(out.trim())
+ if (compactJson.status.toLowerCase() == "fail") {
+ assertEquals(disableAutoCompaction, false)
+ logger.info("Compaction was done automatically!")
+ }
+ if (disableAutoCompaction) {
+ assertEquals("success", compactJson.status.toLowerCase())
+ }
+ }
+
+ // wait for all compactions done
+ for (String[] tablet in tablets) {
+ boolean running = true
+ do {
+ Thread.sleep(1000)
+ String tablet_id = tablet[0]
+ backend_id = tablet[2]
+ StringBuilder sb = new StringBuilder();
+ sb.append("curl -X GET http://")
+ sb.append(backendId_to_backendIP.get(backend_id))
+ sb.append(":")
+ sb.append(backendId_to_backendHttpPort.get(backend_id))
+ sb.append("/api/compaction/run_status?tablet_id=")
+ sb.append(tablet_id)
+
+ String command = sb.toString()
+ logger.info(command)
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def compactionStatus = parseJson(out.trim())
+ assertEquals("success", compactionStatus.status.toLowerCase())
+ running = compactionStatus.run_status
+ } while (running)
+ }
+
+ int rowCount = 0
+ for (String[] tablet in tablets) {
+ String tablet_id = tablet[0]
+ StringBuilder sb = new StringBuilder();
+ def compactionStatusUrlIndex = 17
+ sb.append("curl -X GET ")
+ sb.append(tablet[compactionStatusUrlIndex])
+ String command = sb.toString()
+ // wait for cleaning stale_rowsets
+ process = command.execute()
+ code = process.waitFor()
+ err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
+ out = process.getText()
+ logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err)
+ assertEquals(code, 0)
+ def tabletJson = parseJson(out.trim())
+ assert tabletJson.rowsets instanceof List
+ for (String rowset in (List<String>) tabletJson.rowsets) {
+ rowCount += Integer.parseInt(rowset.split(" ")[1])
+ }
+ }
+ assert (rowCount < 8)
+ qt_select_default3 """ SELECT * FROM ${tableName} t ORDER BY user_id; """
+ } finally {
+ try_sql("DROP TABLE IF EXISTS ${tableName}")
+ reset_be_config.call()
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org