You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/12/22 15:28:38 UTC
[doris] branch master updated: [refactor](non-vec) Remove non vec code from be (#15278)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 83a99a0f8b [refactor](non-vec) Remove non vec code from be (#15278)
83a99a0f8b is described below
commit 83a99a0f8b84d63419b7317846232e04667e1f28
Author: yiguolei <67...@qq.com>
AuthorDate: Thu Dec 22 23:28:30 2022 +0800
[refactor](non-vec) Remove non vec code from be (#15278)
* [refactor](removecode) remove some non-vectorization
Co-authored-by: yiguolei <yi...@gmail.com>
---
be/src/exprs/expr.h | 1 -
be/src/exprs/expr_context.h | 1 -
be/src/olap/CMakeLists.txt | 3 -
be/src/olap/collect_iterator.cpp | 412 -------
be/src/olap/collect_iterator.h | 214 ----
be/src/olap/compaction.cpp | 3 +-
be/src/olap/generic_iterators.cpp | 415 -------
be/src/olap/generic_iterators.h | 46 -
be/src/olap/iterators.h | 4 -
be/src/olap/merger.cpp | 74 --
be/src/olap/merger.h | 4 -
be/src/olap/reader.h | 10 -
be/src/olap/rowset/beta_rowset_reader.cpp | 142 +--
be/src/olap/rowset/beta_rowset_reader.h | 4 -
be/src/olap/rowset/rowset_reader.h | 7 -
be/src/olap/rowset/rowset_reader_context.h | 1 -
.../rowset/segment_v2/empty_segment_iterator.cpp | 6 -
.../rowset/segment_v2/empty_segment_iterator.h | 1 -
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 103 --
be/src/olap/rowset/segment_v2/segment_iterator.h | 1 -
be/src/olap/schema_change.cpp | 327 ------
be/src/olap/schema_change.h | 62 +-
be/src/olap/task/engine_checksum_task.cpp | 69 +-
be/src/olap/task/engine_checksum_task.h | 1 -
be/src/olap/tuple_reader.cpp | 220 ----
be/src/olap/tuple_reader.h | 78 --
be/src/vec/olap/block_reader.h | 5 -
be/src/vec/olap/vertical_block_reader.h | 5 -
be/src/vec/olap/vgeneric_iterators.cpp | 2 +-
be/test/CMakeLists.txt | 7 -
be/test/olap/block_column_predicate_test.cpp | 225 ----
.../olap/bloom_filter_column_predicate_test.cpp | 183 ---
be/test/olap/comparison_predicate_test.cpp | 793 -------------
be/test/olap/generic_iterators_test.cpp | 194 ----
be/test/olap/in_list_predicate_test.cpp | 730 ------------
be/test/olap/null_predicate_test.cpp | 614 ----------
be/test/olap/row_block_v2_test.cpp | 166 ---
be/test/olap/rowset/beta_rowset_test.cpp | 219 ----
be/test/olap/rowset/segment_v2/segment_test.cpp | 1176 --------------------
be/test/olap/segcompaction_test.cpp | 109 --
40 files changed, 22 insertions(+), 6615 deletions(-)
diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h
index 448784d0f2..8588f37429 100644
--- a/be/src/exprs/expr.h
+++ b/be/src/exprs/expr.h
@@ -279,7 +279,6 @@ protected:
friend class FunctionCall;
friend class HashJoinNode;
friend class ExecNode;
- friend class OlapScanNode;
friend class SetVar;
friend class NativeUdfExpr;
friend class JsonFunctions;
diff --git a/be/src/exprs/expr_context.h b/be/src/exprs/expr_context.h
index e61d60fd00..a76b342da6 100644
--- a/be/src/exprs/expr_context.h
+++ b/be/src/exprs/expr_context.h
@@ -157,7 +157,6 @@ private:
friend class InPredicate;
friend class RuntimePredicateWrapper;
friend class BloomFilterPredicate;
- friend class OlapScanNode;
friend class EsPredicate;
friend class RowGroupReader;
diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt
index 3512d13a7e..d82c9a0975 100644
--- a/be/src/olap/CMakeLists.txt
+++ b/be/src/olap/CMakeLists.txt
@@ -30,7 +30,6 @@ add_library(Olap STATIC
bloom_filter.hpp
block_column_predicate.cpp
byte_buffer.cpp
- collect_iterator.cpp
compaction.cpp
compaction_permit_limiter.cpp
compress.cpp
@@ -39,7 +38,6 @@ add_library(Olap STATIC
delete_handler.cpp
delta_writer.cpp
file_helper.cpp
- generic_iterators.cpp
hll.cpp
inverted_index_parser.cpp
like_column_predicate.cpp
@@ -55,7 +53,6 @@ add_library(Olap STATIC
page_cache.cpp
push_handler.cpp
reader.cpp
- tuple_reader.cpp
row_block.cpp
row_block2.cpp
row_cursor.cpp
diff --git a/be/src/olap/collect_iterator.cpp b/be/src/olap/collect_iterator.cpp
deleted file mode 100644
index 8301fa299b..0000000000
--- a/be/src/olap/collect_iterator.cpp
+++ /dev/null
@@ -1,412 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "olap/collect_iterator.h"
-
-#include <utility>
-
-#include "olap/reader.h"
-#include "olap/row.h"
-#include "olap/row_block.h"
-#include "olap/row_cursor.h"
-#include "olap/rowset/beta_rowset_reader.h"
-
-namespace doris {
-using namespace ErrorCode;
-
-CollectIterator::~CollectIterator() = default;
-
-void CollectIterator::init(TabletReader* reader) {
- _reader = reader;
- // when aggregate is enabled or key_type is DUP_KEYS, we don't merge
- // multiple data to aggregate for better performance
- if (_reader->_reader_type == READER_QUERY &&
- (_reader->_aggregation || _reader->_tablet->keys_type() == KeysType::DUP_KEYS ||
- (_reader->_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
- _reader->_tablet->enable_unique_key_merge_on_write()))) {
- _merge = false;
- }
-}
-
-Status CollectIterator::add_child(RowsetReaderSharedPtr rs_reader) {
- std::unique_ptr<LevelIterator> child(new Level0Iterator(rs_reader, _reader));
- RETURN_NOT_OK(child->init());
- if (child->current_row() == nullptr) {
- return Status::Error<END_OF_FILE>();
- }
-
- _children.push_back(child.release());
- return Status::OK();
-}
-
-// Build a merge heap. If _merge is true, a rowset with the max rownum
-// status will be used as the base rowset, and the other rowsets will be merged first and
-// then merged with the base rowset.
-void CollectIterator::build_heap(const std::vector<RowsetReaderSharedPtr>& rs_readers) {
- DCHECK(rs_readers.size() == _children.size());
- _reverse = _reader->_tablet_schema->keys_type() == KeysType::UNIQUE_KEYS;
- SortType sort_type = _reader->_tablet_schema->sort_type();
- int sort_col_num = _reader->_tablet_schema->sort_col_num();
- if (_children.empty()) {
- _inner_iter.reset(nullptr);
- return;
- } else if (_merge) {
- DCHECK(!rs_readers.empty());
- // build merge heap with two children, a base rowset as level0iterator and
- // other cumulative rowsets as a level1iterator
- if (_children.size() > 1) {
- // find 'base rowset', 'base rowset' is the rowset which contains the max row number
- int64_t max_row_num = 0;
- int base_reader_idx = 0;
- for (size_t i = 0; i < rs_readers.size(); ++i) {
- int64_t cur_row_num = rs_readers[i]->rowset()->rowset_meta()->num_rows();
- if (cur_row_num > max_row_num) {
- max_row_num = cur_row_num;
- base_reader_idx = i;
- }
- }
- auto base_reader_child = _children.begin();
- std::advance(base_reader_child, base_reader_idx);
-
- std::list<LevelIterator*> cumu_children;
- int i = 0;
- for (const auto& child : _children) {
- if (i != base_reader_idx) {
- cumu_children.push_back(child);
- }
- ++i;
- }
- Level1Iterator* cumu_iter = new Level1Iterator(
- cumu_children, cumu_children.size() > 1, _reverse, _reader->_sequence_col_idx,
- &_reader->_merged_rows, sort_type, sort_col_num);
- cumu_iter->init();
- _inner_iter.reset(new Level1Iterator(
- std::list<LevelIterator*> {*base_reader_child, cumu_iter}, _merge, _reverse,
- _reader->_sequence_col_idx, &_reader->_merged_rows, sort_type, sort_col_num));
- } else {
- // _children.size() == 1
- _inner_iter.reset(new Level1Iterator(_children, _merge, _reverse,
- _reader->_sequence_col_idx, &_reader->_merged_rows,
- sort_type, sort_col_num));
- }
- } else {
- _inner_iter.reset(new Level1Iterator(_children, _merge, _reverse,
- _reader->_sequence_col_idx, &_reader->_merged_rows,
- sort_type, sort_col_num));
- }
- _inner_iter->init();
- // Clear _children earlier to release any related references
- _children.clear();
-}
-
-bool CollectIterator::LevelIteratorComparator::operator()(const LevelIterator* a,
- const LevelIterator* b) {
- // First compare row cursor.
- const RowCursor* first = a->current_row();
- const RowCursor* second = b->current_row();
- int cmp_res = compare_row(*first, *second);
- if (cmp_res != 0) {
- return cmp_res > 0;
- }
-
- // Second: If _sequence_id_idx != 0 means we need to compare sequence. sequence only use
- // in unique key. so keep reverse order here
- if (_sequence_id_idx != -1) {
- auto seq_first_cell = first->cell(_sequence_id_idx);
- auto seq_second_cell = second->cell(_sequence_id_idx);
- auto res = first->schema()
- ->column(_sequence_id_idx)
- ->compare_cell(seq_first_cell, seq_second_cell);
- if (res != 0) {
- res < 0 ? a->set_need_skip(true) : b->set_need_skip(true);
- return res < 0;
- }
- }
- // if row cursors equal, compare data version.
- // read data from higher version to lower version.
- // for UNIQUE_KEYS just read the highest version and no need agg_update.
- // for AGG_KEYS if a version is deleted, the lower version no need to agg_update
- if (_reverse) {
- auto lower = a->version() < b->version();
- lower ? a->set_need_skip(true) : b->set_need_skip(true);
- return lower;
- }
- return a->version() > b->version();
-}
-
-CollectIterator::BaseComparator::BaseComparator(std::shared_ptr<LevelIteratorComparator>& cmp) {
- _cmp = cmp;
-}
-
-bool CollectIterator::BaseComparator::operator()(const LevelIterator* a, const LevelIterator* b) {
- return _cmp->operator()(a, b);
-}
-
-bool CollectIterator::LevelZorderIteratorComparator::operator()(const LevelIterator* a,
- const LevelIterator* b) {
- // First compare row cursor.
- const RowCursor* first = a->current_row();
- const RowCursor* second = b->current_row();
- int cmp_res = _comparator.compare_row(*first, *second);
- if (cmp_res != 0) {
- return cmp_res > 0;
- }
- // if row cursors equal, compare data version.
- // read data from higher version to lower version.
- // for UNIQUE_KEYS just read the highest version and no need agg_update.
- // for AGG_KEYS if a version is deleted, the lower version no need to agg_update
- if (_reverse) {
- return a->version() < b->version();
- }
- return a->version() > b->version();
-}
-
-const RowCursor* CollectIterator::current_row(bool* delete_flag) const {
- if (LIKELY(_inner_iter)) {
- return _inner_iter->current_row(delete_flag);
- }
- return nullptr;
-}
-
-Status CollectIterator::next(const RowCursor** row, bool* delete_flag) {
- if (LIKELY(_inner_iter)) {
- return _inner_iter->next(row, delete_flag);
- } else {
- return Status::Error<END_OF_FILE>();
- }
-}
-
-CollectIterator::Level0Iterator::Level0Iterator(RowsetReaderSharedPtr rs_reader,
- TabletReader* reader)
- : _rs_reader(rs_reader), _is_delete(rs_reader->delete_flag()), _reader(reader) {
- if (LIKELY(rs_reader->type() == RowsetTypePB::BETA_ROWSET)) {
- _refresh_current_row = &Level0Iterator::_refresh_current_row_v2;
- } else {
- LOG(FATAL) << "Not supported rowset type";
- }
-}
-
-CollectIterator::Level0Iterator::~Level0Iterator() = default;
-
-Status CollectIterator::Level0Iterator::init() {
- RETURN_NOT_OK_LOG(_row_cursor.init(_reader->_tablet_schema, _reader->_return_columns),
- "failed to init row cursor");
- return (this->*_refresh_current_row)();
-}
-
-const RowCursor* CollectIterator::Level0Iterator::current_row(bool* delete_flag) const {
- *delete_flag = _is_delete || _current_row->is_delete();
- return _current_row;
-}
-
-const RowCursor* CollectIterator::Level0Iterator::current_row() const {
- return _current_row;
-}
-
-int64_t CollectIterator::Level0Iterator::version() const {
- return _rs_reader->version().second;
-}
-
-Status CollectIterator::Level0Iterator::_refresh_current_row_v2() {
- do {
- if (_row_block != nullptr && _row_block->has_remaining()) {
- size_t pos = _row_block->pos();
- _row_block->get_row(pos, &_row_cursor);
- _current_row = &_row_cursor;
- return Status::OK();
- } else {
- auto res = _rs_reader->next_block(&_row_block);
- if (!res.ok()) {
- _current_row = nullptr;
- return res;
- }
- }
- } while (_row_block != nullptr);
- _current_row = nullptr;
- return Status::Error<END_OF_FILE>();
-}
-
-Status CollectIterator::Level0Iterator::next(const RowCursor** row, bool* delete_flag) {
- _row_block->pos_inc();
- auto res = (this->*_refresh_current_row)();
- *row = _current_row;
- *delete_flag = _is_delete;
- if (_current_row != nullptr) {
- *delete_flag = _is_delete || _current_row->is_delete();
- }
- return res;
-}
-
-CollectIterator::Level1Iterator::Level1Iterator(
- const std::list<CollectIterator::LevelIterator*>& children, bool merge, bool reverse,
- int sequence_id_idx, uint64_t* merge_count, SortType sort_type, int sort_col_num)
- : _children(children),
- _merge(merge),
- _reverse(reverse),
- _sequence_id_idx(sequence_id_idx),
- _merged_rows(merge_count),
- _sort_type(sort_type),
- _sort_col_num(sort_col_num) {}
-
-CollectIterator::LevelIterator::~LevelIterator() = default;
-
-CollectIterator::Level1Iterator::~Level1Iterator() {
- for (auto child : _children) {
- if (child != nullptr) {
- delete child;
- child = nullptr;
- }
- }
-
- if (_heap) {
- while (!_heap->empty()) {
- LevelIterator* it = _heap->top();
- if (it != nullptr) {
- delete it;
- it = nullptr;
- }
- _heap->pop();
- }
- }
-}
-
-// Read next row into *row.
-// Returns
-// Status::Error<END_OF_FILE>() and set *row to nullptr when EOF is reached.
-// Others when error happens
-Status CollectIterator::Level1Iterator::next(const RowCursor** row, bool* delete_flag) {
- if (UNLIKELY(_cur_child == nullptr)) {
- return Status::Error<END_OF_FILE>();
- }
- if (_merge) {
- return _merge_next(row, delete_flag);
- } else {
- return _normal_next(row, delete_flag);
- }
-}
-
-// Get top row of the heap, nullptr if reach end.
-const RowCursor* CollectIterator::Level1Iterator::current_row(bool* delete_flag) const {
- if (_cur_child != nullptr) {
- return _cur_child->current_row(delete_flag);
- }
- return nullptr;
-}
-
-// Get top row of the heap, nullptr if reach end.
-const RowCursor* CollectIterator::Level1Iterator::current_row() const {
- if (_cur_child != nullptr) {
- return _cur_child->current_row();
- }
- return nullptr;
-}
-
-int64_t CollectIterator::Level1Iterator::version() const {
- if (_cur_child != nullptr) {
- return _cur_child->version();
- }
- return -1;
-}
-
-Status CollectIterator::Level1Iterator::init() {
- if (_children.empty()) {
- return Status::OK();
- }
-
- // Only when there are multiple children that need to be merged
- if (_merge && _children.size() > 1) {
- std::shared_ptr<LevelIteratorComparator> cmp;
- if (_sort_type == SortType::ZORDER) {
- cmp = std::make_shared<LevelZorderIteratorComparator>(_reverse, _sequence_id_idx,
- _sort_col_num);
- } else {
- cmp = std::make_shared<LevelIteratorComparator>(_reverse, _sequence_id_idx);
- }
- BaseComparator bcmp(cmp);
- _heap.reset(new MergeHeap(bcmp));
- for (auto child : _children) {
- DCHECK(child != nullptr);
- DCHECK(child->current_row() != nullptr);
- _heap->push(child);
- }
- _cur_child = _heap->top();
- // Clear _children earlier to release any related references
- _children.clear();
- } else {
- _merge = false;
- _heap.reset(nullptr);
- _cur_child = *(_children.begin());
- }
- return Status::OK();
-}
-
-inline Status CollectIterator::Level1Iterator::_merge_next(const RowCursor** row,
- bool* delete_flag) {
- _heap->pop();
- auto res = _cur_child->next(row, delete_flag);
- if (LIKELY(res.ok())) {
- _heap->push(_cur_child);
- _cur_child = _heap->top();
- } else if (res.is<END_OF_FILE>()) {
- // current child has been read, to read next
- delete _cur_child;
- if (!_heap->empty()) {
- _cur_child = _heap->top();
- } else {
- _cur_child = nullptr;
- return Status::Error<END_OF_FILE>();
- }
- } else {
- _cur_child = nullptr;
- LOG(WARNING) << "failed to get next from child, res=" << res;
- return res;
- }
-
- if (_cur_child->need_skip()) {
- (*_merged_rows)++;
- _cur_child->set_need_skip(false);
- return _merge_next(row, delete_flag);
- }
- *row = _cur_child->current_row(delete_flag);
- return Status::OK();
-}
-
-inline Status CollectIterator::Level1Iterator::_normal_next(const RowCursor** row,
- bool* delete_flag) {
- auto res = _cur_child->next(row, delete_flag);
- if (LIKELY(res.ok())) {
- return Status::OK();
- } else if (res.is<END_OF_FILE>()) {
- // current child has been read, to read next
- delete _cur_child;
- _children.pop_front();
- if (!_children.empty()) {
- _cur_child = *(_children.begin());
- *row = _cur_child->current_row(delete_flag);
- return Status::OK();
- } else {
- _cur_child = nullptr;
- return Status::Error<END_OF_FILE>();
- }
- } else {
- _cur_child = nullptr;
- LOG(WARNING) << "failed to get next from child, res=" << res;
- return res;
- }
-}
-
-} // namespace doris
diff --git a/be/src/olap/collect_iterator.h b/be/src/olap/collect_iterator.h
deleted file mode 100644
index ac9bed60e2..0000000000
--- a/be/src/olap/collect_iterator.h
+++ /dev/null
@@ -1,214 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <queue>
-
-#include "olap/olap_define.h"
-#include "olap/row_cursor.h"
-#include "olap/rowset/rowset_reader.h"
-#include "util/tuple_row_zorder_compare.h"
-
-namespace doris {
-
-class TabletReader;
-class RowCursor;
-
-class CollectIterator {
-public:
- ~CollectIterator();
-
- // Hold reader point to get reader params
- void init(TabletReader* reader);
-
- Status add_child(RowsetReaderSharedPtr rs_reader);
-
- void build_heap(const std::vector<RowsetReaderSharedPtr>& rs_readers);
-
- // Get top row of the heap, nullptr if reach end.
- const RowCursor* current_row(bool* delete_flag) const;
-
- // Read next row into *row.
- // Returns
- // Status::Error<END_OF_FILE>() and set *row to nullptr when EOF is reached.
- // Others when error happens
- Status next(const RowCursor** row, bool* delete_flag);
-
-private:
- // This interface is the actual implementation of the new version of iterator.
- // It currently contains two implementations, one is Level0Iterator,
- // which only reads data from the rowset reader, and the other is Level1Iterator,
- // which can read merged data from multiple LevelIterators through MergeHeap.
- // By using Level1Iterator, some rowset readers can be merged in advance and
- // then merged with other rowset readers.
- class LevelIterator {
- public:
- virtual Status init() = 0;
-
- virtual const RowCursor* current_row(bool* delete_flag) const = 0;
-
- virtual const RowCursor* current_row() const = 0;
-
- virtual int64_t version() const = 0;
-
- virtual Status next(const RowCursor** row, bool* delete_flag) = 0;
- virtual ~LevelIterator() = 0;
-
- bool need_skip() const { return _skip_row; }
-
- void set_need_skip(bool skip) const { _skip_row = skip; }
-
- // Only use in unique reader. Heap will set _skip_row = true.
- // when build heap find the row in LevelIterator have same key but lower version or sequence
- // the row of LevelIteratro should be skipped to prevent useless compare and function call
- mutable bool _skip_row = false;
- };
-
- // Compare row cursors between multiple merge elements,
- // if row cursors equal, compare data version.
- class LevelIteratorComparator {
- public:
- LevelIteratorComparator(const bool reverse = false, int sequence_id_idx = -1)
- : _reverse(reverse), _sequence_id_idx(sequence_id_idx) {}
- virtual bool operator()(const LevelIterator* a, const LevelIterator* b);
- virtual ~LevelIteratorComparator() {}
-
- private:
- bool _reverse;
- int _sequence_id_idx;
- };
-
- class LevelZorderIteratorComparator : public LevelIteratorComparator {
- public:
- LevelZorderIteratorComparator(const bool reverse = false, int sequence_id_idx = -1,
- const size_t sort_col_num = 0)
- : _reverse(reverse) {
- _comparator = TupleRowZOrderComparator(sort_col_num);
- }
- virtual bool operator()(const LevelIterator* a, const LevelIterator* b);
- virtual ~LevelZorderIteratorComparator() = default;
-
- private:
- bool _reverse = false;
- TupleRowZOrderComparator _comparator;
- };
-
- class BaseComparator {
- public:
- BaseComparator(std::shared_ptr<LevelIteratorComparator>& cmp);
- bool operator()(const LevelIterator* a, const LevelIterator* b);
-
- private:
- std::shared_ptr<LevelIteratorComparator> _cmp;
- };
-
- typedef std::priority_queue<LevelIterator*, std::vector<LevelIterator*>, BaseComparator>
- MergeHeap;
- // Iterate from rowset reader. This Iterator usually like a leaf node
- class Level0Iterator : public LevelIterator {
- public:
- Level0Iterator(RowsetReaderSharedPtr rs_reader, TabletReader* reader);
-
- Status init() override;
-
- const RowCursor* current_row(bool* delete_flag) const override;
-
- const RowCursor* current_row() const override;
-
- int64_t version() const override;
-
- Status next(const RowCursor** row, bool* delete_flag) override;
-
- ~Level0Iterator();
-
- private:
- Status (Level0Iterator::*_refresh_current_row)() = nullptr;
-
- Status _refresh_current_row_v2();
-
- RowsetReaderSharedPtr _rs_reader;
- const RowCursor* _current_row = nullptr; // It points to the returned row
- bool _is_delete = false;
- TabletReader* _reader = nullptr;
- RowCursor _row_cursor; // It points to rows inside `_row_block`, maybe not returned
- RowBlock* _row_block = nullptr;
- };
-
- // Iterate from LevelIterators (maybe Level0Iterators or Level1Iterator or mixed)
- class Level1Iterator : public LevelIterator {
- public:
- Level1Iterator(const std::list<LevelIterator*>& children, bool merge, bool reverse,
- int sequence_id_idx, uint64_t* merge_count, SortType sort_type,
- int sort_col_num);
-
- Status init() override;
-
- const RowCursor* current_row(bool* delete_flag) const override;
-
- const RowCursor* current_row() const override;
-
- int64_t version() const override;
-
- Status next(const RowCursor** row, bool* delete_flag) override;
-
- ~Level1Iterator();
-
- private:
- Status _merge_next(const RowCursor** row, bool* delete_flag);
- Status _normal_next(const RowCursor** row, bool* delete_flag);
-
- // Each LevelIterator corresponds to a rowset reader,
- // it will be cleared after '_heap' has been initialized when '_merge == true'.
- std::list<LevelIterator*> _children;
- // point to the Level0Iterator containing the next output row.
- // null when CollectIterator hasn't been initialized or reaches EOF.
- LevelIterator* _cur_child = nullptr;
-
- // when `_merge == true`, rowset reader returns ordered rows and CollectIterator uses a priority queue to merge
- // sort them. The output of CollectIterator is also ordered.
- // When `_merge == false`, rowset reader returns *partial* ordered rows. CollectIterator simply returns all rows
- // from the first rowset, the second rowset, .., the last rowset. The output of CollectorIterator is also
- // *partially* ordered.
- bool _merge = true;
- bool _reverse = false;
- // used when `_merge == true`
- // need to be cleared when deconstructing this Level1Iterator
- // The child LevelIterator should be either in _heap or in _children
- std::unique_ptr<MergeHeap> _heap;
- // used when `_merge == false`
- int _sequence_id_idx = -1;
-
- uint64_t* _merged_rows = nullptr;
- SortType _sort_type;
- int _sort_col_num;
- };
-
- std::unique_ptr<LevelIterator> _inner_iter;
-
- // Each LevelIterator corresponds to a rowset reader,
- // it will be cleared after '_inner_iter' has been initialized.
- std::list<LevelIterator*> _children;
-
- bool _merge = true;
- bool _reverse = false;
-
- // Hold reader point to access read params, such as fetch conditions.
- TabletReader* _reader = nullptr;
-};
-
-} // namespace doris
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index fa47754cea..50632ac71b 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -288,8 +288,7 @@ Status Compaction::do_compaction_impl(int64_t permits) {
_input_rs_readers, _output_rs_writer.get(), &stats);
}
} else {
- res = Merger::merge_rowsets(_tablet, compaction_type(), _cur_tablet_schema,
- _input_rs_readers, _output_rs_writer.get(), &stats);
+ LOG(FATAL) << "Only support vectorized compaction";
}
if (!res.ok()) {
diff --git a/be/src/olap/generic_iterators.cpp b/be/src/olap/generic_iterators.cpp
deleted file mode 100644
index eec7e30477..0000000000
--- a/be/src/olap/generic_iterators.cpp
+++ /dev/null
@@ -1,415 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <queue>
-#include <utility>
-
-#include "olap/iterators.h"
-#include "olap/row.h"
-#include "olap/row_block2.h"
-#include "olap/row_cursor_cell.h"
-
-namespace doris {
-using namespace ErrorCode;
-
-// This iterator will generate ordered data. For example for schema
-// (int, int) this iterator will generator data like
-// (0, 1), (1, 2), (2, 3), (3, 4)...
-//
-// Usage:
-// Schema schema;
-// AutoIncrementIterator iter(schema, 1000);
-// StorageReadOptions opts;
-// RETURN_IF_ERROR(iter.init(opts));
-// RowBlockV2 block;
-// do {
-// st = iter.next_batch(&block);
-// } while (st.ok());
-class AutoIncrementIterator : public RowwiseIterator {
-public:
- // Will generate num_rows rows in total
- AutoIncrementIterator(const Schema& schema, size_t num_rows)
- : _schema(schema), _num_rows(num_rows), _rows_returned(0) {}
- ~AutoIncrementIterator() override {}
-
- // NOTE: Currently, this function will ignore StorageReadOptions
- Status init(const StorageReadOptions& opts) override;
- Status next_batch(RowBlockV2* block) override;
-
- const Schema& schema() const override { return _schema; }
-
-private:
- Schema _schema;
- size_t _num_rows;
- size_t _rows_returned;
-};
-
-Status AutoIncrementIterator::init(const StorageReadOptions& opts) {
- return Status::OK();
-}
-
-Status AutoIncrementIterator::next_batch(RowBlockV2* block) {
- int row_idx = 0;
- while (row_idx < block->capacity() && _rows_returned < _num_rows) {
- RowBlockRow row = block->row(row_idx);
-
- for (int i = 0; i < _schema.num_columns(); ++i) {
- row.set_is_null(i, false);
- const auto* col_schema = _schema.column(i);
- switch (col_schema->type()) {
- case OLAP_FIELD_TYPE_SMALLINT:
- *(int16_t*)row.cell_ptr(i) = _rows_returned + i;
- break;
- case OLAP_FIELD_TYPE_INT:
- *(int32_t*)row.cell_ptr(i) = _rows_returned + i;
- break;
- case OLAP_FIELD_TYPE_BIGINT:
- *(int64_t*)row.cell_ptr(i) = _rows_returned + i;
- break;
- case OLAP_FIELD_TYPE_FLOAT:
- *(float*)row.cell_ptr(i) = _rows_returned + i;
- break;
- case OLAP_FIELD_TYPE_DOUBLE:
- *(double*)row.cell_ptr(i) = _rows_returned + i;
- break;
- default:
- break;
- }
- }
- row_idx++;
- _rows_returned++;
- }
- block->set_num_rows(row_idx);
- block->set_selected_size(row_idx);
- block->set_delete_state(DEL_PARTIAL_SATISFIED);
- if (row_idx > 0) {
- return Status::OK();
- }
- return Status::EndOfFile("End of AutoIncrementIterator");
-}
-
-// Used to store merge state for a MergeIterator input.
-// This class will iterate all data from internal iterator
-// through client call advance().
-// Usage:
-// MergeIteratorContext ctx(iter);
-// RETURN_IF_ERROR(ctx.init());
-// while (ctx.valid()) {
-// visit(ctx.current_row());
-// RETURN_IF_ERROR(ctx.advance());
-// }
-class MergeIteratorContext {
-public:
- MergeIteratorContext(RowwiseIterator* iter) : _iter(iter), _block(iter->schema(), 1024) {}
-
- MergeIteratorContext(const MergeIteratorContext&) = delete;
- MergeIteratorContext(MergeIteratorContext&&) = delete;
- MergeIteratorContext& operator=(const MergeIteratorContext&) = delete;
- MergeIteratorContext& operator=(MergeIteratorContext&&) = delete;
-
- ~MergeIteratorContext() {
- delete _iter;
- _iter = nullptr;
- }
-
- // Initialize this context and will prepare data for current_row()
- Status init(const StorageReadOptions& opts);
-
- // Return current row which internal row index points to
- // And this function won't make internal index advance.
- // Before call this function, Client must assure that
- // valid() return true
- RowBlockRow current_row() const {
- uint16_t* selection_vector = _block.selection_vector();
- return RowBlockRow(&_block, selection_vector[_index_in_block]);
- }
-
- // Advance internal row index to next valid row
- // Return error if error happens
- // Don't call this when valid() is false, action is undefined
- Status advance();
-
- // Return if has remaining data in this context.
- // Only when this function return true, current_row()
- // will return a valid row
- bool valid() const { return _valid; }
-
- uint64_t data_id() const { return _iter->data_id(); }
-
- bool need_skip() const { return _skip; }
-
- void set_skip(bool skip) const { _skip = skip; }
-
-private:
- // Load next block into _block
- Status _load_next_block();
-
-private:
- RowwiseIterator* _iter;
-
- // used to store data load from iterator
- RowBlockV2 _block;
-
- bool _valid = false;
- mutable bool _skip = false;
- size_t _index_in_block = -1;
-};
-
-Status MergeIteratorContext::init(const StorageReadOptions& opts) {
- RETURN_IF_ERROR(_iter->init(opts));
- RETURN_IF_ERROR(_load_next_block());
- if (valid()) {
- RETURN_IF_ERROR(advance());
- }
- return Status::OK();
-}
-
-Status MergeIteratorContext::advance() {
- _skip = false;
- // NOTE: we increase _index_in_block directly to valid one check
- do {
- _index_in_block++;
- if (_index_in_block < _block.selected_size()) {
- return Status::OK();
- }
- // current batch has no data, load next batch
- RETURN_IF_ERROR(_load_next_block());
- } while (_valid);
- return Status::OK();
-}
-
-Status MergeIteratorContext::_load_next_block() {
- do {
- _block.clear();
- Status st = _iter->next_batch(&_block);
- if (!st.ok()) {
- _valid = false;
- if (st.is<END_OF_FILE>()) {
- return Status::OK();
- } else {
- return st;
- }
- }
- } while (_block.num_rows() == 0);
- _index_in_block = -1;
- _valid = true;
- return Status::OK();
-}
-
-class MergeIterator : public RowwiseIterator {
-public:
- // MergeIterator takes the ownership of input iterators
- MergeIterator(std::vector<RowwiseIterator*> iters, int sequence_id_idx, bool is_unique,
- uint64_t* merged_rows)
- : _origin_iters(std::move(iters)),
- _sequence_id_idx(sequence_id_idx),
- _merged_rows(merged_rows),
- _merge_heap(MergeContextComparator(_sequence_id_idx, is_unique)) {}
-
- ~MergeIterator() override {
- while (!_merge_heap.empty()) {
- auto ctx = _merge_heap.top();
- _merge_heap.pop();
- delete ctx;
- }
- }
-
- Status init(const StorageReadOptions& opts) override;
-
- Status next_batch(RowBlockV2* block) override;
-
- const Schema& schema() const override { return *_schema; }
-
-private:
- // It will be released after '_merge_heap' has been built.
- std::vector<RowwiseIterator*> _origin_iters;
-
- int _sequence_id_idx;
- uint64_t* _merged_rows;
- std::unique_ptr<Schema> _schema;
-
- struct MergeContextComparator {
- MergeContextComparator(int idx, bool is_unique)
- : _sequence_id_idx(idx), _is_unique(is_unique) {};
-
- bool operator()(const MergeIteratorContext* lhs, const MergeIteratorContext* rhs) const {
- auto lhs_row = lhs->current_row();
- auto rhs_row = rhs->current_row();
- int cmp_res = compare_row(lhs_row, rhs_row);
- if (cmp_res != 0) {
- return cmp_res > 0;
- }
-
- auto res = 0;
- // Second: If _sequence_id_idx != 0 means we need to compare sequence. sequence only use
- // in unique key. so keep reverse order of sequence id here
- if (_sequence_id_idx != -1) {
- auto l_cell = lhs_row.cell(_sequence_id_idx);
- auto r_cell = rhs_row.cell(_sequence_id_idx);
- res = lhs_row.schema()->column(_sequence_id_idx)->compare_cell(l_cell, r_cell);
- }
-
- // if row cursors equal, compare segment id.
- // when in UNIQUE_KEYS table, we need only read the latest one, so we
- // return the row in reverse order of segment id.
- // when in AGG_KEYS table, we return the row in order of segment id, because
- // we need replace the value with lower segment id by the one with higher segment id when
- // non-vectorized.
- if (_is_unique) {
- bool result = res == 0 ? lhs->data_id() < rhs->data_id() : res < 0;
- result ? lhs->set_skip(true) : rhs->set_skip(true);
- return result;
- }
-
- return lhs->data_id() > rhs->data_id();
- }
-
- int _sequence_id_idx;
- bool _is_unique;
- };
-
- using MergeHeap = std::priority_queue<MergeIteratorContext*, std::vector<MergeIteratorContext*>,
- MergeContextComparator>;
-
- MergeHeap _merge_heap;
-};
-
-Status MergeIterator::init(const StorageReadOptions& opts) {
- if (_origin_iters.empty()) {
- return Status::OK();
- }
- _schema.reset(new Schema((*(_origin_iters.begin()))->schema()));
-
- for (auto iter : _origin_iters) {
- std::unique_ptr<MergeIteratorContext> ctx(new MergeIteratorContext(iter));
- RETURN_IF_ERROR(ctx->init(opts));
- if (!ctx->valid()) {
- continue;
- }
- _merge_heap.push(ctx.release());
- }
-
- _origin_iters.clear();
- return Status::OK();
-}
-
-Status MergeIterator::next_batch(RowBlockV2* block) {
- size_t row_idx = 0;
- for (; row_idx < block->capacity() && !_merge_heap.empty();) {
- auto ctx = _merge_heap.top();
- _merge_heap.pop();
-
- if (!ctx->need_skip()) {
- RowBlockRow dst_row = block->row(row_idx++);
- // copy current row to block
- copy_row(&dst_row, ctx->current_row(), block->pool());
- } else if (_merged_rows != nullptr) {
- (*_merged_rows)++;
- }
-
- RETURN_IF_ERROR(ctx->advance());
- if (ctx->valid()) {
- _merge_heap.push(ctx);
- } else {
- // Release ctx earlier to reduce resource consumed
- delete ctx;
- }
- }
- block->set_num_rows(row_idx);
- block->set_selected_size(row_idx);
- if (row_idx > 0) {
- return Status::OK();
- } else {
- return Status::EndOfFile("End of MergeIterator");
- }
-}
-
-// UnionIterator will read data from input iterator one by one.
-class UnionIterator : public RowwiseIterator {
-public:
- // Iterators' ownership it transferred to this class.
- // This class will delete all iterators when destructs
- // Client should not use iterators any more.
- UnionIterator(std::vector<RowwiseIterator*>& v) : _origin_iters(v.begin(), v.end()) {}
-
- ~UnionIterator() override {
- std::for_each(_origin_iters.begin(), _origin_iters.end(),
- std::default_delete<RowwiseIterator>());
- }
-
- Status init(const StorageReadOptions& opts) override;
-
- Status next_batch(RowBlockV2* block) override;
-
- const Schema& schema() const override { return *_schema; }
-
-private:
- std::unique_ptr<Schema> _schema;
- RowwiseIterator* _cur_iter = nullptr;
- std::deque<RowwiseIterator*> _origin_iters;
-};
-
-Status UnionIterator::init(const StorageReadOptions& opts) {
- if (_origin_iters.empty()) {
- return Status::OK();
- }
-
- for (auto iter : _origin_iters) {
- RETURN_IF_ERROR(iter->init(opts));
- }
- _schema.reset(new Schema((*(_origin_iters.begin()))->schema()));
- _cur_iter = *(_origin_iters.begin());
- return Status::OK();
-}
-
-Status UnionIterator::next_batch(RowBlockV2* block) {
- while (_cur_iter != nullptr) {
- auto st = _cur_iter->next_batch(block);
- if (st.is<END_OF_FILE>()) {
- delete _cur_iter;
- _cur_iter = nullptr;
- _origin_iters.pop_front();
- if (!_origin_iters.empty()) {
- _cur_iter = *(_origin_iters.begin());
- }
- } else {
- return st;
- }
- }
- return Status::EndOfFile("End of UnionIterator");
-}
-
-RowwiseIterator* new_merge_iterator(std::vector<RowwiseIterator*> inputs, int sequence_id_idx,
- bool is_unique, uint64_t* merged_rows) {
- if (inputs.size() == 1) {
- return *(inputs.begin());
- }
- return new MergeIterator(std::move(inputs), sequence_id_idx, is_unique, merged_rows);
-}
-
-RowwiseIterator* new_union_iterator(std::vector<RowwiseIterator*>& inputs) {
- if (inputs.size() == 1) {
- return *(inputs.begin());
- }
- return new UnionIterator(inputs);
-}
-
-RowwiseIterator* new_auto_increment_iterator(const Schema& schema, size_t num_rows) {
- return new AutoIncrementIterator(schema, num_rows);
-}
-
-} // namespace doris
diff --git a/be/src/olap/generic_iterators.h b/be/src/olap/generic_iterators.h
deleted file mode 100644
index c0d75279d8..0000000000
--- a/be/src/olap/generic_iterators.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "olap/iterators.h"
-
-namespace doris {
-
-// Create a merge iterator for input iterators. Merge iterator will merge
-// ordered input iterator to one ordered iterator. So client should ensure
-// that every input iterator is ordered, otherwise result is undefined.
-//
-// Inputs iterators' ownership is taken by created merge iterator. And client
-// should delete returned iterator after usage.
-RowwiseIterator* new_merge_iterator(std::vector<RowwiseIterator*> inputs, int sequence_id_idx,
- bool is_unique, uint64_t* merged_rows);
-
-// Create a union iterator for input iterators. Union iterator will read
-// input iterators one by one.
-//
-// Inputs iterators' ownership is taken by created union iterator. And client
-// should delete returned iterator after usage.
-RowwiseIterator* new_union_iterator(std::vector<RowwiseIterator*>& inputs);
-
-// Create an auto increment iterator which returns num_rows data in format of schema.
-// This class aims to be used in unit test.
-//
-// Client should delete returned iterator.
-RowwiseIterator* new_auto_increment_iterator(const Schema& schema, size_t num_rows);
-
-} // namespace doris
diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index 0cabfb606b..62f3650543 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -118,10 +118,6 @@ public:
// into input batch with Status::OK() returned
// If there is no data to read, will return Status::EndOfFile.
// If other error happens, other error code will be returned.
- virtual Status next_batch(RowBlockV2* block) {
- return Status::NotSupported("to be implemented");
- }
-
virtual Status next_batch(vectorized::Block* block) {
return Status::NotSupported("to be implemented");
}
diff --git a/be/src/olap/merger.cpp b/be/src/olap/merger.cpp
index 1a548f1bd8..9cb79fce6f 100644
--- a/be/src/olap/merger.cpp
+++ b/be/src/olap/merger.cpp
@@ -23,7 +23,6 @@
#include "olap/olap_define.h"
#include "olap/row_cursor.h"
#include "olap/tablet.h"
-#include "olap/tuple_reader.h"
#include "util/trace.h"
#include "vec/olap/block_reader.h"
#include "vec/olap/vertical_block_reader.h"
@@ -31,79 +30,6 @@
namespace doris {
-Status Merger::merge_rowsets(TabletSharedPtr tablet, ReaderType reader_type,
- TabletSchemaSPtr cur_tablet_schema,
- const std::vector<RowsetReaderSharedPtr>& src_rowset_readers,
- RowsetWriter* dst_rowset_writer, Merger::Statistics* stats_output) {
- TRACE_COUNTER_SCOPE_LATENCY_US("merge_rowsets_latency_us");
-
- TupleReader reader;
- TabletReader::ReaderParams reader_params;
- reader_params.tablet = tablet;
- reader_params.reader_type = reader_type;
- reader_params.rs_readers = src_rowset_readers;
- reader_params.version = dst_rowset_writer->version();
- {
- std::shared_lock rdlock(tablet->get_header_lock());
- auto delete_preds = tablet->delete_predicates();
- std::copy(delete_preds.cbegin(), delete_preds.cend(),
- std::inserter(reader_params.delete_predicates,
- reader_params.delete_predicates.begin()));
- }
- TabletSchemaSPtr merge_tablet_schema = std::make_shared<TabletSchema>();
- merge_tablet_schema->copy_from(*cur_tablet_schema);
- // Merge the columns in delete predicate that not in latest schema in to current tablet schema
- for (auto& del_pred_rs : reader_params.delete_predicates) {
- merge_tablet_schema->merge_dropped_columns(tablet->tablet_schema(del_pred_rs->version()));
- }
- reader_params.tablet_schema = merge_tablet_schema;
- RETURN_NOT_OK(reader.init(reader_params));
-
- RowCursor row_cursor;
- RETURN_NOT_OK_LOG(
- row_cursor.init(cur_tablet_schema),
- "failed to init row cursor when merging rowsets of tablet " + tablet->full_name());
- row_cursor.allocate_memory_for_string_type(cur_tablet_schema);
-
- std::unique_ptr<MemPool> mem_pool(new MemPool());
-
- // The following procedure would last for long time, half of one day, etc.
- int64_t output_rows = 0;
- while (!StorageEngine::instance()->stopped()) {
- ObjectPool objectPool;
- bool eof = false;
- // Read one row into row_cursor
- RETURN_NOT_OK_LOG(
- reader.next_row_with_aggregation(&row_cursor, mem_pool.get(), &objectPool, &eof),
- "failed to read next row when merging rowsets of tablet " + tablet->full_name());
- if (eof) {
- break;
- }
- RETURN_NOT_OK_LOG(
- dst_rowset_writer->add_row(row_cursor),
- "failed to write row when merging rowsets of tablet " + tablet->full_name());
- output_rows++;
- // the memory allocate by mem pool has been copied,
- // so we should release memory immediately
- mem_pool->clear();
- }
- if (StorageEngine::instance()->stopped()) {
- LOG(INFO) << "tablet " << tablet->full_name() << "failed to do compaction, engine stopped";
- return Status::Error<INTERNAL_ERROR>();
- }
-
- if (stats_output != nullptr) {
- stats_output->output_rows = output_rows;
- stats_output->merged_rows = reader.merged_rows();
- stats_output->filtered_rows = reader.filtered_rows();
- }
-
- RETURN_NOT_OK_LOG(
- dst_rowset_writer->flush(),
- "failed to flush rowset when merging rowsets of tablet " + tablet->full_name());
- return Status::OK();
-}
-
Status Merger::vmerge_rowsets(TabletSharedPtr tablet, ReaderType reader_type,
TabletSchemaSPtr cur_tablet_schema,
const std::vector<RowsetReaderSharedPtr>& src_rowset_readers,
diff --git a/be/src/olap/merger.h b/be/src/olap/merger.h
index 734c6a866c..5b16e2662b 100644
--- a/be/src/olap/merger.h
+++ b/be/src/olap/merger.h
@@ -41,10 +41,6 @@ public:
// merge rows from `src_rowset_readers` and write into `dst_rowset_writer`.
// return OK and set statistics into `*stats_output`.
// return others on error
- static Status merge_rowsets(TabletSharedPtr tablet, ReaderType reader_type,
- TabletSchemaSPtr cur_tablet_schema,
- const std::vector<RowsetReaderSharedPtr>& src_rowset_readers,
- RowsetWriter* dst_rowset_writer, Statistics* stats_output);
static Status vmerge_rowsets(TabletSharedPtr tablet, ReaderType reader_type,
TabletSchemaSPtr cur_tablet_schema,
diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h
index 0ea2e7c757..473e7697b6 100644
--- a/be/src/olap/reader.h
+++ b/be/src/olap/reader.h
@@ -34,8 +34,6 @@ namespace doris {
class Tablet;
class RowCursor;
-class RowBlock;
-class CollectIterator;
class RuntimeState;
namespace vectorized {
@@ -120,13 +118,6 @@ public:
// Initialize TabletReader with tablet, data version and fetch range.
virtual Status init(const ReaderParams& read_params);
- // Read next row with aggregation.
- // Return OK and set `*eof` to false when next row is read into `row_cursor`.
- // Return OK and set `*eof` to true when no more rows can be read.
- // Return others when unexpected error happens.
- virtual Status next_row_with_aggregation(RowCursor* row_cursor, MemPool* mem_pool,
- ObjectPool* agg_pool, bool* eof) = 0;
-
// Read next block with aggregation.
// Return OK and set `*eof` to false when next block is read
// Return OK and set `*eof` to true when no more rows can be read.
@@ -153,7 +144,6 @@ public:
virtual bool update_profile(RuntimeProfile* profile) { return false; }
protected:
- friend class CollectIterator;
friend class vectorized::VCollectIterator;
friend class DeleteHandler;
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp
index f70d84ae6f..e5d461210e 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -20,7 +20,6 @@
#include <utility>
#include "olap/delete_handler.h"
-#include "olap/generic_iterators.h"
#include "olap/row_block.h"
#include "olap/row_block2.h"
#include "olap/row_cursor.h"
@@ -198,27 +197,16 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
// merge or union segment iterator
RowwiseIterator* final_iterator;
- if (config::enable_storage_vectorization && read_context->is_vec) {
- if (read_context->need_ordered_result &&
- _rowset->rowset_meta()->is_segments_overlapping()) {
- final_iterator = vectorized::new_merge_iterator(
- iterators, read_context->sequence_id_idx, read_context->is_unique,
- read_context->read_orderby_key_reverse, read_context->merged_rows);
- } else {
- if (read_context->read_orderby_key_reverse) {
- // reverse iterators to read backward for ORDER BY key DESC
- std::reverse(iterators.begin(), iterators.end());
- }
- final_iterator = vectorized::new_union_iterator(iterators);
- }
+ if (read_context->need_ordered_result && _rowset->rowset_meta()->is_segments_overlapping()) {
+ final_iterator = vectorized::new_merge_iterator(
+ iterators, read_context->sequence_id_idx, read_context->is_unique,
+ read_context->read_orderby_key_reverse, read_context->merged_rows);
} else {
- if (read_context->need_ordered_result &&
- _rowset->rowset_meta()->is_segments_overlapping()) {
- final_iterator = new_merge_iterator(iterators, read_context->sequence_id_idx,
- read_context->is_unique, read_context->merged_rows);
- } else {
- final_iterator = new_union_iterator(iterators);
+ if (read_context->read_orderby_key_reverse) {
+ // reverse iterators to read backward for ORDER BY key DESC
+ std::reverse(iterators.begin(), iterators.end());
}
+ final_iterator = vectorized::new_union_iterator(iterators);
}
auto s = final_iterator->init(_read_options);
@@ -228,122 +216,22 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) {
}
_iterator.reset(final_iterator);
- // The data in _input_block will be copied shallowly to _output_block.
- // Therefore, for nestable fields, the _input_block can't be shared.
- bool has_nestable_fields = false;
- for (const auto* field : _input_schema->columns()) {
- if (field != nullptr && field->get_sub_field_count() > 0) {
- has_nestable_fields = true;
- break;
- }
- }
-
- // init input block
- if (_can_reuse_schema && !has_nestable_fields) {
- if (read_context->reuse_block == nullptr) {
- read_context->reuse_block.reset(
- new RowBlockV2(*_input_schema, std::min(1024, read_context->batch_size)));
- }
- _input_block = read_context->reuse_block;
- } else {
- _input_block.reset(
- new RowBlockV2(*_input_schema, std::min(1024, read_context->batch_size)));
- }
-
- if (!read_context->is_vec) {
- // init input/output block and row
- _output_block.reset(new RowBlock(read_context->tablet_schema));
-
- RowBlockInfo output_block_info;
- output_block_info.row_num = std::min(1024, read_context->batch_size);
- output_block_info.null_supported = true;
- output_block_info.column_ids = *(_context->return_columns);
- _output_block->init(output_block_info);
- _row.reset(new RowCursor());
- RETURN_NOT_OK(_row->init(read_context->tablet_schema, *(_context->return_columns)));
- }
-
return Status::OK();
}
-Status BetaRowsetReader::next_block(RowBlock** block) {
+Status BetaRowsetReader::next_block(vectorized::Block* block) {
SCOPED_RAW_TIMER(&_stats->block_fetch_ns);
- // read next input block
- _input_block->clear();
- {
- auto s = _iterator->next_batch(_input_block.get());
+ do {
+ auto s = _iterator->next_batch(block);
if (!s.ok()) {
if (s.is<END_OF_FILE>()) {
- *block = nullptr;
return Status::Error<END_OF_FILE>();
+ } else {
+ LOG(WARNING) << "failed to read next block: " << s.to_string();
+ return Status::Error<ROWSET_READ_FAILED>();
}
- LOG(WARNING) << "failed to read next block: " << s.to_string();
- return Status::Error<ROWSET_READ_FAILED>();
}
- }
-
- // convert to output block
- _output_block->clear();
- {
- SCOPED_RAW_TIMER(&_stats->block_convert_ns);
- _input_block->convert_to_row_block(_row.get(), _output_block.get());
- }
- *block = _output_block.get();
- return Status::OK();
-}
-
-Status BetaRowsetReader::next_block(vectorized::Block* block) {
- SCOPED_RAW_TIMER(&_stats->block_fetch_ns);
- if (config::enable_storage_vectorization && _context->is_vec) {
- do {
- auto s = _iterator->next_batch(block);
- if (!s.ok()) {
- if (s.is<END_OF_FILE>()) {
- return Status::Error<END_OF_FILE>();
- } else {
- LOG(WARNING) << "failed to read next block: " << s.to_string();
- return Status::Error<ROWSET_READ_FAILED>();
- }
- }
- } while (block->rows() == 0);
- } else {
- bool is_first = true;
-
- do {
- // read next input block
- {
- _input_block->clear();
- {
- auto s = _iterator->next_batch(_input_block.get());
- if (!s.ok()) {
- if (s.is<END_OF_FILE>()) {
- if (is_first) {
- return Status::Error<END_OF_FILE>();
- } else {
- break;
- }
- } else {
- LOG(WARNING) << "failed to read next block: " << s.to_string();
- return Status::Error<ROWSET_READ_FAILED>();
- }
- } else if (_input_block->selected_size() == 0) {
- continue;
- }
- }
- }
-
- {
- SCOPED_RAW_TIMER(&_stats->block_convert_ns);
- auto s = _input_block->convert_to_vec_block(block);
- if (UNLIKELY(!s.ok())) {
- LOG(WARNING) << "failed to read next block: " << s.to_string();
- return Status::Error<STRING_OVERFLOW_IN_VEC_ENGINE>();
- }
- }
- is_first = false;
- } while (block->rows() <
- _context->batch_size); // here we should keep block.rows() < batch_size
- }
+ } while (block->rows() == 0);
return Status::OK();
}
diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h
index e2f888cb12..c80d258d3b 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -38,9 +38,6 @@ public:
Status get_segment_iterators(RowsetReaderContext* read_context,
std::vector<RowwiseIterator*>* out_iters) override;
void reset_read_options() override;
-
- // It's ok, because we only get ref here, the block's owner is this reader.
- Status next_block(RowBlock** block) override;
Status next_block(vectorized::Block* block) override;
Status next_block_view(vectorized::BlockView* block_view) override;
bool support_return_data_by_ref() override { return _iterator->support_return_data_by_ref(); }
@@ -89,7 +86,6 @@ private:
std::unique_ptr<RowwiseIterator> _iterator;
std::shared_ptr<RowBlockV2> _input_block;
- std::unique_ptr<RowBlock> _output_block;
std::unique_ptr<RowCursor> _row;
// make sure this handle is initialized and valid before
diff --git a/be/src/olap/rowset/rowset_reader.h b/be/src/olap/rowset/rowset_reader.h
index 99fcf0da58..289f782707 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -48,13 +48,6 @@ public:
std::vector<RowwiseIterator*>* out_iters) = 0;
virtual void reset_read_options() = 0;
- // read next block data into *block.
- // Returns
- // OK when read successfully.
- // Status::Error<END_OF_FILE>() and set *block to null when there is no more block.
- // Others when error happens.
- virtual Status next_block(RowBlock** block) = 0;
-
virtual Status next_block(vectorized::Block* block) = 0;
virtual Status next_block_view(vectorized::BlockView* block_view) = 0;
diff --git a/be/src/olap/rowset/rowset_reader_context.h b/be/src/olap/rowset/rowset_reader_context.h
index 31b115ae33..29972c6c75 100644
--- a/be/src/olap/rowset/rowset_reader_context.h
+++ b/be/src/olap/rowset/rowset_reader_context.h
@@ -65,7 +65,6 @@ struct RowsetReaderContext {
bool enable_unique_key_merge_on_write = false;
const DeleteBitmap* delete_bitmap = nullptr;
bool record_rowids = false;
- std::shared_ptr<RowBlockV2> reuse_block;
bool is_vertical_compaction = false;
bool is_key_column_group = false;
std::shared_ptr<Schema> reuse_input_schema;
diff --git a/be/src/olap/rowset/segment_v2/empty_segment_iterator.cpp b/be/src/olap/rowset/segment_v2/empty_segment_iterator.cpp
index f367c2d052..0c9985e884 100644
--- a/be/src/olap/rowset/segment_v2/empty_segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/empty_segment_iterator.cpp
@@ -24,12 +24,6 @@ namespace segment_v2 {
EmptySegmentIterator::EmptySegmentIterator(const doris::Schema& schema) : _schema(schema) {}
-Status EmptySegmentIterator::next_batch(RowBlockV2* block) {
- block->set_num_rows(0);
- block->set_selected_size(0);
- return Status::EndOfFile("no more data in segment");
-}
-
Status EmptySegmentIterator::next_batch(vectorized::Block* block) {
return Status::EndOfFile("no more data in segment");
}
diff --git a/be/src/olap/rowset/segment_v2/empty_segment_iterator.h b/be/src/olap/rowset/segment_v2/empty_segment_iterator.h
index 0c186ed861..5571ee6756 100644
--- a/be/src/olap/rowset/segment_v2/empty_segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/empty_segment_iterator.h
@@ -31,7 +31,6 @@ public:
~EmptySegmentIterator() override {}
Status init(const StorageReadOptions& opts) override { return Status::OK(); }
const Schema& schema() const override { return _schema; }
- Status next_batch(RowBlockV2* row_block) override;
Status next_batch(vectorized::Block* block) override;
private:
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index fe03a21f2a..22e8edecdb 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -626,109 +626,6 @@ Status SegmentIterator::_read_columns(const std::vector<ColumnId>& column_ids, R
return Status::OK();
}
-Status SegmentIterator::next_batch(RowBlockV2* block) {
- SCOPED_RAW_TIMER(&_opts.stats->block_load_ns);
- if (UNLIKELY(!_inited)) {
- RETURN_IF_ERROR(_init());
- _inited = true;
- }
-
- uint32_t nrows_read = 0;
- uint32_t nrows_read_limit = block->capacity();
- const auto& read_columns =
- _lazy_materialization_read ? _predicate_columns : block->schema()->column_ids();
-
- // phase 1: read rows selected by various index (indicated by _row_bitmap) into block
- // when using lazy-materialization-read, only columns with predicates are read
- {
- SCOPED_RAW_TIMER(&_opts.stats->first_read_ns);
- do {
- uint32_t range_from;
- uint32_t range_to;
- bool has_next_range =
- _range_iter->next_range(nrows_read_limit - nrows_read, &range_from, &range_to);
- if (!has_next_range) {
- break;
- }
- if (_cur_rowid == 0 || _cur_rowid != range_from) {
- _cur_rowid = range_from;
- _opts.stats->block_first_read_seek_num += 1;
- SCOPED_RAW_TIMER(&_opts.stats->block_first_read_seek_ns);
- RETURN_IF_ERROR(_seek_columns(read_columns, _cur_rowid));
- }
- size_t rows_to_read = range_to - range_from;
- RETURN_IF_ERROR(_read_columns(read_columns, block, nrows_read, rows_to_read));
- _cur_rowid += rows_to_read;
- if (_lazy_materialization_read) {
- for (uint32_t rid = range_from; rid < range_to; rid++) {
- _block_rowids[nrows_read++] = rid;
- }
- } else {
- nrows_read += rows_to_read;
- }
- } while (nrows_read < nrows_read_limit);
- }
-
- block->set_num_rows(nrows_read);
- block->set_selected_size(nrows_read);
- if (nrows_read == 0) {
- return Status::EndOfFile("no more data in segment");
- }
- _opts.stats->raw_rows_read += nrows_read;
- _opts.stats->blocks_load += 1;
-
- // phase 2: run vectorized evaluation on remaining predicates to prune rows.
- // block's selection vector will be set to indicate which rows have passed predicates.
- // TODO(hkp): optimize column predicate to check column block once for one column
- if (!_col_predicates.empty() || _opts.delete_condition_predicates != nullptr) {
- // init selection position index
- uint16_t selected_size = block->selected_size();
- uint16_t original_size = selected_size;
-
- SCOPED_RAW_TIMER(&_opts.stats->vec_cond_ns);
- for (auto column_predicate : _col_predicates) {
- auto column_id = column_predicate->column_id();
- auto column_block = block->column_block(column_id);
- column_predicate->evaluate(&column_block, block->selection_vector(), &selected_size);
- }
- _opts.stats->rows_vec_cond_filtered += original_size - selected_size;
-
- // set original_size again to check delete condition predicates
- // filter how many data
- original_size = selected_size;
- _opts.delete_condition_predicates->evaluate(block, &selected_size);
- _opts.stats->rows_vec_del_cond_filtered += original_size - selected_size;
-
- block->set_selected_size(selected_size);
- block->set_num_rows(selected_size);
- }
-
- // phase 3: read non-predicate columns of rows that have passed predicates
- if (_lazy_materialization_read) {
- SCOPED_RAW_TIMER(&_opts.stats->lazy_read_ns);
- uint16_t i = 0;
- const uint16_t* sv = block->selection_vector();
- const uint16_t sv_size = block->selected_size();
- while (i < sv_size) {
- // i: start offset the current range
- // j: past the last offset of the current range
- uint16_t j = i + 1;
- while (j < sv_size && _block_rowids[sv[j]] == _block_rowids[sv[j - 1]] + 1) {
- ++j;
- }
- uint16_t range_size = j - i;
- {
- _opts.stats->block_lazy_read_seek_num += 1;
- SCOPED_RAW_TIMER(&_opts.stats->block_lazy_read_seek_ns);
- RETURN_IF_ERROR(_seek_columns(_non_predicate_columns, _block_rowids[sv[i]]));
- }
- RETURN_IF_ERROR(_read_columns(_non_predicate_columns, block, sv[i], range_size));
- i += range_size;
- }
- }
- return Status::OK();
-}
-
/* ---------------------- for vectorization implementation ---------------------- */
/**
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 09dbc0db9c..9bd5a8c315 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -53,7 +53,6 @@ public:
~SegmentIterator() override;
Status init(const StorageReadOptions& opts) override;
- Status next_batch(RowBlockV2* row_block) override;
Status next_batch(vectorized::Block* block) override;
// Get current block row locations. This function should be called
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 4c9b65f704..f4015fdcb3 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -1218,27 +1218,6 @@ Status LinkedSchemaChange::process(RowsetReaderSharedPtr rowset_reader, RowsetWr
}
}
-SchemaChangeDirectly::SchemaChangeDirectly(const RowBlockChanger& row_block_changer)
- : _row_block_changer(row_block_changer), _row_block_allocator(nullptr), _cursor(nullptr) {}
-
-SchemaChangeDirectly::~SchemaChangeDirectly() {
- VLOG_NOTICE << "~SchemaChangeDirectly()";
- SAFE_DELETE(_row_block_allocator);
- SAFE_DELETE(_cursor);
-}
-
-bool SchemaChangeDirectly::_write_row_block(RowsetWriter* rowset_writer, RowBlock* row_block) {
- for (uint32_t i = 0; i < row_block->row_block_info().row_num; i++) {
- row_block->get_row(i, _cursor);
- if (!rowset_writer->add_row(*_cursor)) {
- LOG(WARNING) << "fail to write to new rowset for direct schema change";
- return false;
- }
- }
-
- return true;
-}
-
Status reserve_block(std::unique_ptr<RowBlock, RowBlockDeleter>* block_handle_ptr, int row_num,
RowBlockAllocator* allocator) {
auto& block_handle = *block_handle_ptr;
@@ -1257,75 +1236,6 @@ Status reserve_block(std::unique_ptr<RowBlock, RowBlockDeleter>* block_handle_pt
return Status::OK();
}
-Status SchemaChangeDirectly::_inner_process(RowsetReaderSharedPtr rowset_reader,
- RowsetWriter* rowset_writer, TabletSharedPtr new_tablet,
- TabletSchemaSPtr base_tablet_schema) {
- if (_row_block_allocator == nullptr) {
- _row_block_allocator = new RowBlockAllocator(new_tablet->tablet_schema(), 0);
- if (_row_block_allocator == nullptr) {
- LOG(FATAL) << "failed to malloc RowBlockAllocator. size=" << sizeof(RowBlockAllocator);
- return Status::Error<INVALID_ARGUMENT>();
- }
- }
-
- if (nullptr == _cursor) {
- _cursor = new (nothrow) RowCursor();
- if (nullptr == _cursor) {
- LOG(WARNING) << "fail to allocate row cursor.";
- return Status::Error<INVALID_ARGUMENT>();
- }
-
- if (!_cursor->init(new_tablet->tablet_schema())) {
- LOG(WARNING) << "fail to init row cursor.";
- return Status::Error<INVALID_ARGUMENT>();
- }
- }
-
- Status res = Status::OK();
-
- VLOG_NOTICE << "init writer. new_tablet=" << new_tablet->full_name()
- << ", block_row_number=" << new_tablet->num_rows_per_row_block();
-
- std::unique_ptr<RowBlock, RowBlockDeleter> new_row_block(nullptr, [&](RowBlock* block) {
- if (block != nullptr) {
- _row_block_allocator->release(block);
- }
- });
-
- RowBlock* ref_row_block = nullptr;
- rowset_reader->next_block(&ref_row_block);
- while (ref_row_block != nullptr && ref_row_block->has_remaining()) {
- // We will allocate blocks of the same size as before
- // to ensure that the data can be stored
- RETURN_NOT_OK(reserve_block(&new_row_block, ref_row_block->row_block_info().row_num,
- _row_block_allocator));
-
- // Change ref to new. This step is reasonable to say that it does need to wait for a large block, but theoretically it has nothing to do with the writer.
- uint64_t filtered_rows = 0;
- res = _row_block_changer.change_row_block(ref_row_block, rowset_reader->version().second,
- new_row_block.get(), &filtered_rows);
- RETURN_NOT_OK_LOG(res, "failed to change data in row block.");
-
- // rows filtered by delete handler one by one
- _add_filtered_rows(filtered_rows);
-
- if (!_write_row_block(rowset_writer, new_row_block.get())) {
- res = Status::Error<SCHEMA_CHANGE_INFO_INVALID>();
- LOG(WARNING) << "failed to write row block.";
- return res;
- }
-
- ref_row_block->clear();
- rowset_reader->next_block(&ref_row_block);
- }
-
- if (!rowset_writer->flush()) {
- return Status::Error<ALTER_STATUS_ERR>();
- }
-
- return res;
-}
-
Status VSchemaChangeDirectly::_inner_process(RowsetReaderSharedPtr rowset_reader,
RowsetWriter* rowset_writer,
TabletSharedPtr new_tablet,
@@ -1351,18 +1261,6 @@ Status VSchemaChangeDirectly::_inner_process(RowsetReaderSharedPtr rowset_reader
return Status::OK();
}
-SchemaChangeWithSorting::SchemaChangeWithSorting(const RowBlockChanger& row_block_changer,
- size_t memory_limitation)
- : _row_block_changer(row_block_changer),
- _memory_limitation(memory_limitation),
- _temp_delta_versions(Version::mock()),
- _row_block_allocator(nullptr) {}
-
-SchemaChangeWithSorting::~SchemaChangeWithSorting() {
- VLOG_NOTICE << "~SchemaChangeWithSorting()";
- SAFE_DELETE(_row_block_allocator);
-}
-
VSchemaChangeWithSorting::VSchemaChangeWithSorting(const RowBlockChanger& row_block_changer,
size_t memory_limitation)
: _changer(row_block_changer),
@@ -1372,173 +1270,6 @@ VSchemaChangeWithSorting::VSchemaChangeWithSorting(const RowBlockChanger& row_bl
"VSchemaChangeWithSorting:changer={}", std::to_string(int64(&row_block_changer))));
}
-Status SchemaChangeWithSorting::_inner_process(RowsetReaderSharedPtr rowset_reader,
- RowsetWriter* rowset_writer,
- TabletSharedPtr new_tablet,
- TabletSchemaSPtr base_tablet_schema) {
- if (_row_block_allocator == nullptr) {
- _row_block_allocator =
- new (nothrow) RowBlockAllocator(new_tablet->tablet_schema(), _memory_limitation);
- if (_row_block_allocator == nullptr) {
- LOG(FATAL) << "failed to malloc RowBlockAllocator. size=" << sizeof(RowBlockAllocator);
- return Status::Error<INVALID_ARGUMENT>();
- }
- }
-
- Status res = Status::OK();
- RowsetSharedPtr rowset = rowset_reader->rowset();
-
- RowBlockSorter row_block_sorter(_row_block_allocator);
-
- // for internal sorting
- RowBlock* new_row_block = nullptr;
- std::vector<RowBlock*> row_block_arr;
-
- // for external sorting
- // src_rowsets to store the rowset generated by internal sorting
- std::vector<RowsetSharedPtr> src_rowsets;
-
- Defer defer {[&]() {
- // remove the intermediate rowsets generated by internal sorting
- for (auto& row_set : src_rowsets) {
- StorageEngine::instance()->add_unused_rowset(row_set);
- }
-
- for (auto block : row_block_arr) {
- _row_block_allocator->release(block);
- }
-
- row_block_arr.clear();
- }};
-
- _temp_delta_versions.first = _temp_delta_versions.second;
-
- SegmentsOverlapPB segments_overlap = rowset->rowset_meta()->segments_overlap();
- int64_t oldest_write_timestamp = rowset->oldest_write_timestamp();
- int64_t newest_write_timestamp = rowset->newest_write_timestamp();
- RowBlock* ref_row_block = nullptr;
- rowset_reader->next_block(&ref_row_block);
- while (ref_row_block != nullptr && ref_row_block->has_remaining()) {
- auto st = _row_block_allocator->allocate(&new_row_block,
- ref_row_block->row_block_info().row_num, true);
- // if OLAP_ERR_FETCH_MEMORY_EXCEEDED == st.precise_code()
- // that mean RowBlockAllocator::alocate() memory exceeded.
- // But we can flush row_block_arr if row_block_arr is not empty.
- // Don't return directly.
- if (st.is<MEM_ALLOC_FAILED>()) {
- return st;
- } else if (st) {
- // do memory check for sorting, in case schema change task fail at row block sorting because of
- // not doing internal sorting first
- if (!_row_block_allocator->is_memory_enough_for_sorting(
- ref_row_block->row_block_info().row_num, row_block_sorter.num_rows())) {
- if (new_row_block != nullptr) {
- _row_block_allocator->release(new_row_block);
- new_row_block = nullptr;
- }
- }
- }
-
- if (new_row_block == nullptr) {
- if (row_block_arr.empty()) {
- LOG(WARNING) << "Memory limitation is too small for Schema Change: "
- << "memory_limitation=" << _memory_limitation
- << ". You can increase the memory by changing the config: "
- << "memory_limitation_per_thread_for_schema_change_bytes";
- return Status::Error<FETCH_MEMORY_EXCEEDED>();
- }
-
- // enter here while memory limitation is reached.
- RowsetSharedPtr rowset;
- if (!_internal_sorting(
- row_block_arr,
- Version(_temp_delta_versions.second, _temp_delta_versions.second),
- oldest_write_timestamp, newest_write_timestamp, new_tablet,
- segments_overlap, &rowset)) {
- LOG(WARNING) << "failed to sorting internally.";
- return Status::Error<ALTER_STATUS_ERR>();
- }
-
- src_rowsets.push_back(rowset);
-
- for (auto block : row_block_arr) {
- _row_block_allocator->release(block);
- }
-
- row_block_arr.clear();
-
- // increase temp version
- ++_temp_delta_versions.second;
- continue;
- }
-
- uint64_t filtered_rows = 0;
- res = _row_block_changer.change_row_block(ref_row_block, rowset_reader->version().second,
- new_row_block, &filtered_rows);
- if (!res) {
- row_block_arr.push_back(new_row_block);
- LOG(WARNING) << "failed to change data in row block.";
- return res;
- }
- _add_filtered_rows(filtered_rows);
-
- if (new_row_block->row_block_info().row_num > 0) {
- if (!row_block_sorter.sort(&new_row_block)) {
- row_block_arr.push_back(new_row_block);
- LOG(WARNING) << "failed to sort row block.";
- return Status::Error<ALTER_STATUS_ERR>();
- }
- row_block_arr.push_back(new_row_block);
- } else {
- LOG(INFO) << "new block num rows is: " << new_row_block->row_block_info().row_num;
- _row_block_allocator->release(new_row_block);
- new_row_block = nullptr;
- }
-
- ref_row_block->clear();
- rowset_reader->next_block(&ref_row_block);
- }
-
- if (!row_block_arr.empty()) {
- // enter here while memory limitation is reached.
- RowsetSharedPtr rowset = nullptr;
-
- if (!_internal_sorting(row_block_arr,
- Version(_temp_delta_versions.second, _temp_delta_versions.second),
- oldest_write_timestamp, newest_write_timestamp, new_tablet,
- segments_overlap, &rowset)) {
- LOG(WARNING) << "failed to sorting internally.";
- return Status::Error<ALTER_STATUS_ERR>();
- }
-
- src_rowsets.push_back(rowset);
-
- for (auto block : row_block_arr) {
- _row_block_allocator->release(block);
- }
-
- row_block_arr.clear();
-
- // increase temp version
- ++_temp_delta_versions.second;
- }
-
- if (src_rowsets.empty()) {
- res = rowset_writer->flush();
- if (!res) {
- LOG(WARNING) << "create empty version for schema change failed."
- << " version=" << rowset_writer->version().first << "-"
- << rowset_writer->version().second;
- return Status::Error<ALTER_STATUS_ERR>();
- }
- } else if (!_external_sorting(src_rowsets, rowset_writer, new_tablet)) {
- LOG(WARNING) << "failed to sorting externally.";
- return Status::Error<ALTER_STATUS_ERR>();
- }
-
- return res;
-}
-
Status VSchemaChangeWithSorting::_inner_process(RowsetReaderSharedPtr rowset_reader,
RowsetWriter* rowset_writer,
TabletSharedPtr new_tablet,
@@ -1626,36 +1357,6 @@ Status VSchemaChangeWithSorting::_inner_process(RowsetReaderSharedPtr rowset_rea
return Status::OK();
}
-bool SchemaChangeWithSorting::_internal_sorting(
- const std::vector<RowBlock*>& row_block_arr, const Version& version,
- int64_t oldest_write_timestamp, int64_t newest_write_timestamp, TabletSharedPtr new_tablet,
- SegmentsOverlapPB segments_overlap, RowsetSharedPtr* rowset) {
- uint64_t merged_rows = 0;
- RowBlockMerger merger(new_tablet);
-
- VLOG_NOTICE << "init rowset builder. tablet=" << new_tablet->full_name()
- << ", block_row_size=" << new_tablet->num_rows_per_row_block();
-
- std::unique_ptr<RowsetWriter> rowset_writer;
- if (!new_tablet->create_rowset_writer(version, VISIBLE, segments_overlap,
- new_tablet->tablet_schema(), oldest_write_timestamp,
- newest_write_timestamp, &rowset_writer)) {
- return false;
- }
-
- if (!merger.merge(row_block_arr, rowset_writer.get(), &merged_rows)) {
- LOG(WARNING) << "failed to merge row blocks.";
- new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX +
- rowset_writer->rowset_id().to_string());
- return false;
- }
- new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX +
- rowset_writer->rowset_id().to_string());
- _add_merged_rows(merged_rows);
- *rowset = rowset_writer->build();
- return true;
-}
-
Status VSchemaChangeWithSorting::_internal_sorting(
const std::vector<std::unique_ptr<vectorized::Block>>& blocks, const Version& version,
int64_t oldest_write_timestamp, int64_t newest_write_timestamp, TabletSharedPtr new_tablet,
@@ -1680,34 +1381,6 @@ Status VSchemaChangeWithSorting::_internal_sorting(
return Status::OK();
}
-bool SchemaChangeWithSorting::_external_sorting(vector<RowsetSharedPtr>& src_rowsets,
- RowsetWriter* rowset_writer,
- TabletSharedPtr new_tablet) {
- std::vector<RowsetReaderSharedPtr> rs_readers;
- for (auto& rowset : src_rowsets) {
- RowsetReaderSharedPtr rs_reader;
- auto res = rowset->create_reader(&rs_reader);
- if (!res) {
- LOG(WARNING) << "failed to create rowset reader.";
- return false;
- }
- rs_readers.push_back(rs_reader);
- }
-
- Merger::Statistics stats;
- auto res = Merger::merge_rowsets(new_tablet, READER_ALTER_TABLE, new_tablet->tablet_schema(),
- rs_readers, rowset_writer, &stats);
- if (!res) {
- LOG(WARNING) << "failed to merge rowsets. tablet=" << new_tablet->full_name()
- << ", version=" << rowset_writer->version().first << "-"
- << rowset_writer->version().second;
- return false;
- }
- _add_merged_rows(stats.merged_rows);
- _add_filtered_rows(stats.filtered_rows);
- return true;
-}
-
Status VSchemaChangeWithSorting::_external_sorting(vector<RowsetSharedPtr>& src_rowsets,
RowsetWriter* rowset_writer,
TabletSharedPtr new_tablet) {
diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h
index fd9b45cf8e..c9ffd1cdd9 100644
--- a/be/src/olap/schema_change.h
+++ b/be/src/olap/schema_change.h
@@ -163,27 +163,6 @@ private:
DISALLOW_COPY_AND_ASSIGN(LinkedSchemaChange);
};
-// @brief schema change without sorting.
-class SchemaChangeDirectly : public SchemaChange {
-public:
- // @params tablet the instance of tablet which has new schema.
- // @params row_block_changer changer to modify the data of RowBlock
- explicit SchemaChangeDirectly(const RowBlockChanger& row_block_changer);
- ~SchemaChangeDirectly() override;
-
-private:
- Status _inner_process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* rowset_writer,
- TabletSharedPtr new_tablet, TabletSchemaSPtr base_tablet_schema) override;
-
- const RowBlockChanger& _row_block_changer;
- RowBlockAllocator* _row_block_allocator;
- RowCursor* _cursor;
-
- bool _write_row_block(RowsetWriter* rowset_builder, RowBlock* row_block);
-
- DISALLOW_COPY_AND_ASSIGN(SchemaChangeDirectly);
-};
-
class VSchemaChangeDirectly : public SchemaChange {
public:
VSchemaChangeDirectly(const RowBlockChanger& row_block_changer) : _changer(row_block_changer) {}
@@ -196,32 +175,6 @@ private:
};
// @breif schema change with sorting
-class SchemaChangeWithSorting : public SchemaChange {
-public:
- explicit SchemaChangeWithSorting(const RowBlockChanger& row_block_changer,
- size_t memory_limitation);
- ~SchemaChangeWithSorting() override;
-
-private:
- Status _inner_process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* rowset_writer,
- TabletSharedPtr new_tablet, TabletSchemaSPtr base_tablet_schema) override;
-
- bool _internal_sorting(const std::vector<RowBlock*>& row_block_arr,
- const Version& temp_delta_versions, int64_t oldest_write_timestamp,
- int64_t newest_write_timestamp, TabletSharedPtr new_tablet,
- SegmentsOverlapPB segments_overlap, RowsetSharedPtr* rowset);
-
- bool _external_sorting(std::vector<RowsetSharedPtr>& src_rowsets, RowsetWriter* rowset_writer,
- TabletSharedPtr new_tablet);
-
- const RowBlockChanger& _row_block_changer;
- size_t _memory_limitation;
- Version _temp_delta_versions;
- RowBlockAllocator* _row_block_allocator;
-
- DISALLOW_COPY_AND_ASSIGN(SchemaChangeWithSorting);
-};
-
class VSchemaChangeWithSorting : public SchemaChange {
public:
VSchemaChangeWithSorting(const RowBlockChanger& row_block_changer, size_t memory_limitation);
@@ -254,21 +207,12 @@ public:
static std::unique_ptr<SchemaChange> get_sc_procedure(const RowBlockChanger& rb_changer,
bool sc_sorting, bool sc_directly) {
if (sc_sorting) {
- if (config::enable_vectorized_alter_table) {
- return std::make_unique<VSchemaChangeWithSorting>(
- rb_changer, config::memory_limitation_per_thread_for_schema_change_bytes);
- } else {
- return std::make_unique<SchemaChangeWithSorting>(
- rb_changer, config::memory_limitation_per_thread_for_schema_change_bytes);
- }
+ return std::make_unique<VSchemaChangeWithSorting>(
+ rb_changer, config::memory_limitation_per_thread_for_schema_change_bytes);
}
if (sc_directly) {
- if (config::enable_vectorized_alter_table) {
- return std::make_unique<VSchemaChangeDirectly>(rb_changer);
- } else {
- return std::make_unique<SchemaChangeDirectly>(rb_changer);
- }
+ return std::make_unique<VSchemaChangeDirectly>(rb_changer);
}
return std::make_unique<LinkedSchemaChange>(rb_changer);
diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp
index 8bb6eb1f9e..ee1fe11198 100644
--- a/be/src/olap/task/engine_checksum_task.cpp
+++ b/be/src/olap/task/engine_checksum_task.cpp
@@ -17,15 +17,13 @@
#include "olap/task/engine_checksum_task.h"
-#include "olap/row.h"
-#include "olap/tuple_reader.h"
#include "runtime/thread_context.h"
namespace doris {
EngineChecksumTask::EngineChecksumTask(TTabletId tablet_id, TSchemaHash schema_hash,
TVersion version, uint32_t* checksum)
- : _tablet_id(tablet_id), _schema_hash(schema_hash), _version(version), _checksum(checksum) {
+ : _tablet_id(tablet_id), _schema_hash(schema_hash), _version(version) {
_mem_tracker = std::make_shared<MemTrackerLimiter>(
MemTrackerLimiter::Type::CONSISTENCY,
"EngineChecksumTask#tabletId=" + std::to_string(tablet_id));
@@ -40,70 +38,7 @@ Status EngineChecksumTask::_compute_checksum() {
LOG(INFO) << "begin to process compute checksum."
<< "tablet_id=" << _tablet_id << ", schema_hash=" << _schema_hash
<< ", version=" << _version;
-
- if (_checksum == nullptr) {
- return Status::InvalidArgument("invalid checksum which is nullptr");
- }
-
- TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(_tablet_id);
- if (nullptr == tablet) {
- return Status::InternalError("could not find tablet {}", _tablet_id);
- }
-
- TupleReader reader;
- TabletReader::ReaderParams reader_params;
- reader_params.tablet = tablet;
- reader_params.tablet_schema = tablet->tablet_schema();
- reader_params.reader_type = READER_CHECKSUM;
- reader_params.version = Version(0, _version);
- auto& delete_preds = tablet->delete_predicates();
- std::copy(delete_preds.cbegin(), delete_preds.cend(),
- std::inserter(reader_params.delete_predicates,
- reader_params.delete_predicates.begin()));
- {
- std::shared_lock rdlock(tablet->get_header_lock());
- const RowsetSharedPtr message = tablet->rowset_with_max_version();
- if (message == nullptr) {
- LOG(FATAL) << "fail to get latest version. tablet_id=" << _tablet_id;
- }
-
- RETURN_IF_ERROR(
- tablet->capture_rs_readers(reader_params.version, &reader_params.rs_readers));
- }
-
- for (size_t i = 0; i < tablet->tablet_schema()->num_columns(); ++i) {
- reader_params.return_columns.push_back(i);
- }
-
- RETURN_IF_ERROR(reader.init(reader_params));
-
- RowCursor row;
- std::unique_ptr<MemPool> mem_pool(new MemPool());
- std::unique_ptr<ObjectPool> agg_object_pool(new ObjectPool());
- RETURN_IF_ERROR(row.init(tablet->tablet_schema(), reader_params.return_columns));
-
- row.allocate_memory_for_string_type(tablet->tablet_schema());
-
- bool eof = false;
- uint32_t row_checksum = 0;
- while (true) {
- RETURN_IF_ERROR(reader.next_row_with_aggregation(&row, mem_pool.get(),
- agg_object_pool.get(), &eof));
- if (eof) {
- VLOG_NOTICE << "reader reads to the end.";
- break;
- }
- // The value of checksum is independent of the sorting of data rows.
- row_checksum ^= hash_row(row, 0);
- // the memory allocate by mem pool has been copied,
- // so we should release memory immediately
- mem_pool->clear();
- agg_object_pool.reset(new ObjectPool());
- }
-
- LOG(INFO) << "success to finish compute checksum. checksum=" << row_checksum;
- *_checksum = row_checksum;
- return Status::OK();
+ return Status::InternalError("Not implemented yet");
}
} // namespace doris
diff --git a/be/src/olap/task/engine_checksum_task.h b/be/src/olap/task/engine_checksum_task.h
index 04afa1a5cd..2979e9b000 100644
--- a/be/src/olap/task/engine_checksum_task.h
+++ b/be/src/olap/task/engine_checksum_task.h
@@ -43,7 +43,6 @@ private:
TTabletId _tablet_id;
TSchemaHash _schema_hash;
TVersion _version;
- uint32_t* _checksum;
std::shared_ptr<MemTrackerLimiter> _mem_tracker;
}; // EngineTask
diff --git a/be/src/olap/tuple_reader.cpp b/be/src/olap/tuple_reader.cpp
deleted file mode 100644
index cdf25b2eb1..0000000000
--- a/be/src/olap/tuple_reader.cpp
+++ /dev/null
@@ -1,220 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "olap/tuple_reader.h"
-
-#include <parallel_hashmap/phmap.h>
-
-#include <boost/algorithm/string/case_conv.hpp>
-#include <unordered_set>
-
-#include "olap/collect_iterator.h"
-#include "olap/olap_common.h"
-#include "olap/row.h"
-#include "olap/row_cursor.h"
-#include "runtime/mem_pool.h"
-
-using std::nothrow;
-using std::set;
-using std::vector;
-
-namespace doris {
-using namespace ErrorCode;
-
-Status TupleReader::_init_collect_iter(const ReaderParams& read_params,
- std::vector<RowsetReaderSharedPtr>* valid_rs_readers) {
- _collect_iter.init(this);
- std::vector<RowsetReaderSharedPtr> rs_readers;
- auto res = _capture_rs_readers(read_params, &rs_readers);
- if (!res.ok()) {
- LOG(WARNING) << "fail to init reader when _capture_rs_readers. res:" << res
- << ", tablet_id:" << read_params.tablet->tablet_id()
- << ", schema_hash:" << read_params.tablet->schema_hash()
- << ", reader_type:" << read_params.reader_type
- << ", version:" << read_params.version;
- return res;
- }
-
- for (auto& rs_reader : rs_readers) {
- RETURN_NOT_OK(rs_reader->init(&_reader_context));
- Status res = _collect_iter.add_child(rs_reader);
- if (!res.ok() && !res.is<END_OF_FILE>()) {
- LOG(WARNING) << "failed to add child to iterator, err=" << res;
- return res;
- }
- if (res.ok()) {
- valid_rs_readers->push_back(rs_reader);
- }
- }
- _collect_iter.build_heap(*valid_rs_readers);
- _next_key = _collect_iter.current_row(&_next_delete_flag);
-
- return Status::OK();
-}
-
-Status TupleReader::init(const ReaderParams& read_params) {
- RETURN_NOT_OK(TabletReader::init(read_params));
-
- std::vector<RowsetReaderSharedPtr> rs_readers;
- auto status = _init_collect_iter(read_params, &rs_readers);
- if (!status.ok()) {
- return status;
- }
-
- if (_optimize_for_single_rowset(rs_readers)) {
- _next_row_func = _tablet->keys_type() == AGG_KEYS ? &TupleReader::_direct_agg_key_next_row
- : &TupleReader::_direct_next_row;
- return Status::OK();
- }
-
- switch (_tablet->keys_type()) {
- case KeysType::DUP_KEYS:
- _next_row_func = &TupleReader::_direct_next_row;
- break;
- case KeysType::UNIQUE_KEYS:
- if (_tablet->enable_unique_key_merge_on_write()) {
- _next_row_func = &TupleReader::_direct_next_row;
- } else {
- _next_row_func = &TupleReader::_unique_key_next_row;
- }
- break;
- case KeysType::AGG_KEYS:
- _next_row_func = &TupleReader::_agg_key_next_row;
- break;
- default:
- DCHECK(false) << "No next row function for type:" << _tablet->keys_type();
- break;
- }
-
- return Status::OK();
-}
-
-Status TupleReader::_direct_next_row(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool,
- bool* eof) {
- if (UNLIKELY(_next_key == nullptr)) {
- *eof = true;
- return Status::OK();
- }
- direct_copy_row(row_cursor, *_next_key);
- auto res = _collect_iter.next(&_next_key, &_next_delete_flag);
- if (UNLIKELY(!res.ok() && !res.is<END_OF_FILE>())) {
- return res;
- }
- return Status::OK();
-}
-
-Status TupleReader::_direct_agg_key_next_row(RowCursor* row_cursor, MemPool* mem_pool,
- ObjectPool* agg_pool, bool* eof) {
- if (UNLIKELY(_next_key == nullptr)) {
- *eof = true;
- return Status::OK();
- }
- init_row_with_others(row_cursor, *_next_key, mem_pool, agg_pool);
- auto res = _collect_iter.next(&_next_key, &_next_delete_flag);
- if (UNLIKELY(!res.ok() && !res.is<END_OF_FILE>())) {
- return res;
- }
- if (_need_agg_finalize) {
- agg_finalize_row(_value_cids, row_cursor, mem_pool);
- }
- return Status::OK();
-}
-
-Status TupleReader::_agg_key_next_row(RowCursor* row_cursor, MemPool* mem_pool,
- ObjectPool* agg_pool, bool* eof) {
- if (UNLIKELY(_next_key == nullptr)) {
- *eof = true;
- return Status::OK();
- }
- init_row_with_others(row_cursor, *_next_key, mem_pool, agg_pool);
- int64_t merged_count = 0;
- do {
- auto res = _collect_iter.next(&_next_key, &_next_delete_flag);
- if (UNLIKELY(res.is<END_OF_FILE>())) {
- break;
- }
-
- if (UNLIKELY(!res.ok())) {
- LOG(WARNING) << "next failed: " << res;
- return res;
- }
-
- if (UNLIKELY(_aggregation && merged_count > config::doris_scanner_row_num)) {
- break;
- }
-
- // break while can NOT doing aggregation
- if (!equal_row(_key_cids, *row_cursor, *_next_key)) {
- break;
- }
- agg_update_row(_value_cids, row_cursor, *_next_key);
- ++merged_count;
- } while (true);
- _merged_rows += merged_count;
- // For agg query, we don't need finalize agg object and directly pass agg object to agg node
- if (_need_agg_finalize) {
- agg_finalize_row(_value_cids, row_cursor, mem_pool);
- }
-
- return Status::OK();
-}
-
-Status TupleReader::_unique_key_next_row(RowCursor* row_cursor, MemPool* mem_pool,
- ObjectPool* agg_pool, bool* eof) {
- *eof = false;
- bool cur_delete_flag = false;
- do {
- if (UNLIKELY(_next_key == nullptr)) {
- *eof = true;
- return Status::OK();
- }
- cur_delete_flag = _next_delete_flag;
- // the version is in reverse order, the first row is the highest version,
- // in UNIQUE_KEY highest version is the final result, there is no need to
- // merge the lower versions
- direct_copy_row(row_cursor, *_next_key);
- while (_next_key) {
- // skip the lower version rows;
- auto res = _collect_iter.next(&_next_key, &_next_delete_flag);
- if (LIKELY(!res.is<END_OF_FILE>())) {
- if (UNLIKELY(!res.ok())) {
- LOG(WARNING) << "next failed: " << res;
- return res;
- }
-
- if (!equal_row(_key_cids, *row_cursor, *_next_key)) {
- agg_finalize_row(_value_cids, row_cursor, mem_pool);
- break;
- }
- _merged_rows++;
- cur_delete_flag = _next_delete_flag;
- } else {
- break;
- }
- }
-
- // if reader needs to filter delete row and current delete_flag is true,
- // then continue
- if (!(cur_delete_flag && _filter_delete)) {
- break;
- }
- _stats.rows_del_filtered++;
- } while (cur_delete_flag);
- return Status::OK();
-}
-
-} // namespace doris
diff --git a/be/src/olap/tuple_reader.h b/be/src/olap/tuple_reader.h
deleted file mode 100644
index 191594c6d7..0000000000
--- a/be/src/olap/tuple_reader.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <gen_cpp/PaloInternalService_types.h>
-#include <thrift/protocol/TDebugProtocol.h>
-
-#include "olap/collect_iterator.h"
-#include "olap/delete_handler.h"
-#include "olap/reader.h"
-#include "olap/row_cursor.h"
-#include "olap/rowset/rowset_reader.h"
-
-namespace doris {
-
-class Tablet;
-class RowCursor;
-
-class TupleReader final : public TabletReader {
-public:
- // Initialize TupleReader with tablet, data version and fetch range.
- Status init(const ReaderParams& read_params) override;
-
- Status next_row_with_aggregation(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool,
- bool* eof) override {
- return (this->*_next_row_func)(row_cursor, mem_pool, agg_pool, eof);
- }
-
-private:
- friend class CollectIterator;
- friend class DeleteHandler;
-
- // Directly read row from rowset and pass to upper caller. No need to do aggregation.
- // This is usually used for DUPLICATE KEY tables
- Status _direct_next_row(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool,
- bool* eof);
- // Just same as _direct_next_row, but this is only for AGGREGATE KEY tables.
- // And this is an optimization for AGGR tables.
- // When there is only one rowset and is not overlapping, we can read it directly without aggregation.
- Status _direct_agg_key_next_row(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool,
- bool* eof);
- // For normal AGGREGATE KEY tables, read data by a merge heap.
- Status _agg_key_next_row(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool,
- bool* eof);
- // For UNIQUE KEY tables, read data by a merge heap.
- // The difference from _agg_key_next_row is that it will read the data from high version to low version,
- // to minimize the comparison time in merge heap.
- Status _unique_key_next_row(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool,
- bool* eof);
-
- Status _init_collect_iter(const ReaderParams& read_params,
- std::vector<RowsetReaderSharedPtr>* valid_rs_readers);
-
-private:
- const RowCursor* _next_key = nullptr;
-
- Status (TupleReader::*_next_row_func)(RowCursor* row_cursor, MemPool* mem_pool,
- ObjectPool* agg_pool, bool* eof) = nullptr;
-
- CollectIterator _collect_iter;
-};
-
-} // namespace doris
diff --git a/be/src/vec/olap/block_reader.h b/be/src/vec/olap/block_reader.h
index b5a17c80e5..18e3e0e232 100644
--- a/be/src/vec/olap/block_reader.h
+++ b/be/src/vec/olap/block_reader.h
@@ -35,11 +35,6 @@ public:
// Initialize BlockReader with tablet, data version and fetch range.
Status init(const ReaderParams& read_params) override;
- Status next_row_with_aggregation(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool,
- bool* eof) override {
- return Status::Error<ErrorCode::READER_INITIALIZE_ERROR>();
- }
-
Status next_block_with_aggregation(Block* block, MemPool* mem_pool, ObjectPool* agg_pool,
bool* eof) override {
return (this->*_next_block_func)(block, mem_pool, agg_pool, eof);
diff --git a/be/src/vec/olap/vertical_block_reader.h b/be/src/vec/olap/vertical_block_reader.h
index b701a84b55..184795c329 100644
--- a/be/src/vec/olap/vertical_block_reader.h
+++ b/be/src/vec/olap/vertical_block_reader.h
@@ -46,11 +46,6 @@ public:
return (this->*_next_block_func)(block, mem_pool, agg_pool, eof);
}
- Status next_row_with_aggregation(RowCursor* row_cursor, MemPool* mem_pool, ObjectPool* agg_pool,
- bool* eof) override {
- return Status::OK();
- }
-
uint64_t merged_rows() const override {
DCHECK(_vcollect_iter);
return _vcollect_iter->merged_rows();
diff --git a/be/src/vec/olap/vgeneric_iterators.cpp b/be/src/vec/olap/vgeneric_iterators.cpp
index c559fa47d2..7997a3d43d 100644
--- a/be/src/vec/olap/vgeneric_iterators.cpp
+++ b/be/src/vec/olap/vgeneric_iterators.cpp
@@ -174,7 +174,7 @@ void VMergeIteratorContext::copy_rows(BlockView* view, bool advanced) {
// VAutoIncrementIterator iter(schema, 1000);
// StorageReadOptions opts;
// RETURN_IF_ERROR(iter.init(opts));
-// RowBlockV2 block;
+// Block block;
// do {
// st = iter.next_batch(&block);
// } while (st.ok());
diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt
index cb3bceb95d..eb710a5eb5 100644
--- a/be/test/CMakeLists.txt
+++ b/be/test/CMakeLists.txt
@@ -94,14 +94,9 @@ set(OLAP_TEST_FILES
olap/delta_writer_test.cpp
olap/delete_handler_test.cpp
olap/row_block_test.cpp
- olap/row_block_v2_test.cpp
olap/byte_buffer_test.cpp
olap/lru_cache_test.cpp
olap/bloom_filter_test.cpp
- olap/bloom_filter_column_predicate_test.cpp
- olap/comparison_predicate_test.cpp
- olap/in_list_predicate_test.cpp
- olap/null_predicate_test.cpp
olap/file_helper_test.cpp
olap/file_utils_test.cpp
olap/cumulative_compaction_policy_test.cpp
@@ -122,7 +117,6 @@ set(OLAP_TEST_FILES
olap/rowset/segment_v2/ordinal_page_index_test.cpp
olap/rowset/segment_v2/rle_page_test.cpp
olap/rowset/segment_v2/binary_dict_page_test.cpp
- olap/rowset/segment_v2/segment_test.cpp
olap/rowset/segment_v2/row_ranges_test.cpp
olap/rowset/segment_v2/frame_of_reference_page_test.cpp
olap/rowset/segment_v2/block_bloom_filter_test.cpp
@@ -138,7 +132,6 @@ set(OLAP_TEST_FILES
olap/rowset/unique_rowset_id_generator_test.cpp
olap/rowset/rowset_tree_test.cpp
olap/txn_manager_test.cpp
- olap/generic_iterators_test.cpp
olap/key_coder_test.cpp
olap/short_key_index_test.cpp
olap/primary_key_index_test.cpp
diff --git a/be/test/olap/block_column_predicate_test.cpp b/be/test/olap/block_column_predicate_test.cpp
index 56a47da0e0..9ae68fa242 100644
--- a/be/test/olap/block_column_predicate_test.cpp
+++ b/be/test/olap/block_column_predicate_test.cpp
@@ -56,42 +56,9 @@ public:
tablet_schema->init_from_pb(tablet_schema_pb);
}
- void init_row_block(TabletSchemaSPtr tablet_schema, int size) {
- Schema schema(tablet_schema);
- _row_block.reset(new RowBlockV2(schema, size));
- }
-
std::unique_ptr<MemPool> _mem_pool;
- std::unique_ptr<RowBlockV2> _row_block;
};
-TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("FLOAT_COLUMN"), "FLOAT", "REPLACE", 1, true, true, tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- float value = 5.0;
-
- std::unique_ptr<ColumnPredicate> pred(
- new ComparisonPredicateBase<TYPE_FLOAT, PredicateType::EQ>(0, value));
- SingleColumnBlockPredicate single_column_block_pred(pred.get());
-
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<float*>(col_block_view.data()) = i;
- }
- single_column_block_pred.evaluate(_row_block.get(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 5.0);
-}
-
TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN_VEC) {
vectorized::MutableColumns block;
block.push_back(vectorized::PredicateColumnType<TYPE_INT>::create());
@@ -119,41 +86,6 @@ TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN_VEC) {
EXPECT_EQ(pred_col->get_data()[sel_idx[0]], value);
}
-TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- double less_value = 5.0;
- double great_value = 3.0;
- std::unique_ptr<ColumnPredicate> less_pred(
- new ComparisonPredicateBase<TYPE_DOUBLE, PredicateType::LT>(0, less_value));
- std::unique_ptr<ColumnPredicate> great_pred(
- new ComparisonPredicateBase<TYPE_DOUBLE, PredicateType::GT>(0, great_value));
- auto single_less_pred = new SingleColumnBlockPredicate(less_pred.get());
- auto single_great_pred = new SingleColumnBlockPredicate(great_pred.get());
-
- AndBlockColumnPredicate and_block_column_pred;
- and_block_column_pred.add_column_predicate(single_less_pred);
- and_block_column_pred.add_column_predicate(single_great_pred);
-
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<double*>(col_block_view.data()) = i;
- }
- and_block_column_pred.evaluate(_row_block.get(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 4.0);
-}
-
TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN_VEC) {
vectorized::MutableColumns block;
block.push_back(vectorized::PredicateColumnType<TYPE_INT>::create());
@@ -189,41 +121,6 @@ TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN_VEC) {
EXPECT_EQ(pred_col->get_data()[sel_idx[0]], 4);
}
-TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- double less_value = 5.0;
- double great_value = 3.0;
- std::unique_ptr<ColumnPredicate> less_pred(
- new ComparisonPredicateBase<TYPE_DOUBLE, PredicateType::LT>(0, less_value));
- std::unique_ptr<ColumnPredicate> great_pred(
- new ComparisonPredicateBase<TYPE_DOUBLE, PredicateType::GT>(0, great_value));
- auto single_less_pred = new SingleColumnBlockPredicate(less_pred.get());
- auto single_great_pred = new SingleColumnBlockPredicate(great_pred.get());
-
- OrBlockColumnPredicate or_block_column_pred;
- or_block_column_pred.add_column_predicate(single_less_pred);
- or_block_column_pred.add_column_predicate(single_great_pred);
-
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<double*>(col_block_view.data()) = i;
- }
- or_block_column_pred.evaluate(_row_block.get(), &select_size);
- EXPECT_EQ(select_size, 10);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 0.0);
-}
-
TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN_VEC) {
vectorized::MutableColumns block;
block.push_back(vectorized::PredicateColumnType<TYPE_INT>::create());
@@ -259,70 +156,6 @@ TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN_VEC) {
EXPECT_EQ(pred_col->get_data()[sel_idx[0]], 0);
}
-TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- double less_value = 5.0;
- double great_value = 3.0;
- std::unique_ptr<ColumnPredicate> less_pred(
- new ComparisonPredicateBase<TYPE_DOUBLE, PredicateType::LT>(0, less_value));
- std::unique_ptr<ColumnPredicate> great_pred(
- new ComparisonPredicateBase<TYPE_DOUBLE, PredicateType::GT>(0, great_value));
- std::unique_ptr<ColumnPredicate> less_pred1(
- new ComparisonPredicateBase<TYPE_DOUBLE, PredicateType::LT>(0, great_value));
-
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<double*>(col_block_view.data()) = i;
- }
-
- // Test for and or single
- // (column < 5 and column > 3) or column < 3
- auto and_block_column_pred = new AndBlockColumnPredicate();
- and_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get()));
- and_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(great_pred.get()));
-
- OrBlockColumnPredicate or_block_column_pred;
- or_block_column_pred.add_column_predicate(and_block_column_pred);
- or_block_column_pred.add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get()));
-
- or_block_column_pred.evaluate(_row_block.get(), &select_size);
- EXPECT_EQ(select_size, 4);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 0.0);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[1]).cell_ptr(), 1.0);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[2]).cell_ptr(), 2.0);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[3]).cell_ptr(), 4.0);
-
- _row_block->clear();
- select_size = _row_block->selected_size();
- // Test for single or and
- // column < 3 or (column < 5 and column > 3)
- auto and_block_column_pred1 = new AndBlockColumnPredicate();
- and_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get()));
- and_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(great_pred.get()));
-
- OrBlockColumnPredicate or_block_column_pred1;
- or_block_column_pred1.add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get()));
- or_block_column_pred1.add_column_predicate(and_block_column_pred1);
-
- or_block_column_pred1.evaluate(_row_block.get(), &select_size);
- EXPECT_EQ(select_size, 4);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 0.0);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[1]).cell_ptr(), 1.0);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[2]).cell_ptr(), 2.0);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[3]).cell_ptr(), 4.0);
-}
-
TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) {
vectorized::MutableColumns block;
block.push_back(vectorized::PredicateColumnType<TYPE_INT>::create());
@@ -384,64 +217,6 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) {
EXPECT_EQ(pred_col->get_data()[sel_idx[3]], 4);
}
-TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- double less_value = 5.0;
- double great_value = 3.0;
- std::unique_ptr<ColumnPredicate> less_pred(
- new ComparisonPredicateBase<TYPE_DOUBLE, PredicateType::LT>(0, less_value));
- std::unique_ptr<ColumnPredicate> great_pred(
- new ComparisonPredicateBase<TYPE_DOUBLE, PredicateType::GT>(0, great_value));
- std::unique_ptr<ColumnPredicate> less_pred1(
- new ComparisonPredicateBase<TYPE_DOUBLE, PredicateType::LT>(0, great_value));
-
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<double*>(col_block_view.data()) = i;
- }
-
- // Test for and or single
- // (column < 5 or column < 3) and column > 3
- auto or_block_column_pred = new OrBlockColumnPredicate();
- or_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get()));
- or_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get()));
-
- AndBlockColumnPredicate and_block_column_pred;
- and_block_column_pred.add_column_predicate(or_block_column_pred);
- and_block_column_pred.add_column_predicate(new SingleColumnBlockPredicate(great_pred.get()));
-
- and_block_column_pred.evaluate(_row_block.get(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 4.0);
-
- _row_block->clear();
- select_size = _row_block->selected_size();
- // Test for single or and
- // column > 3 and (column < 5 or column < 3)
- auto or_block_column_pred1 = new OrBlockColumnPredicate();
- or_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get()));
- or_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get()));
-
- AndBlockColumnPredicate and_block_column_pred1;
- and_block_column_pred1.add_column_predicate(new SingleColumnBlockPredicate(great_pred.get()));
- and_block_column_pred1.add_column_predicate(or_block_column_pred1);
-
- and_block_column_pred1.evaluate(_row_block.get(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 4.0);
-}
-
TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN_VEC) {
vectorized::MutableColumns block;
block.push_back(vectorized::PredicateColumnType<TYPE_INT>::create());
diff --git a/be/test/olap/bloom_filter_column_predicate_test.cpp b/be/test/olap/bloom_filter_column_predicate_test.cpp
deleted file mode 100644
index ae27d609ba..0000000000
--- a/be/test/olap/bloom_filter_column_predicate_test.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <google/protobuf/stubs/common.h>
-#include <gtest/gtest.h>
-#include <time.h>
-
-#include "agent/be_exec_version_manager.h"
-#include "exprs/create_predicate_function.h"
-#include "olap/column_predicate.h"
-#include "olap/predicate_creator.h"
-#include "olap/row_block2.h"
-#include "runtime/mem_pool.h"
-#include "vec/columns/column_nullable.h"
-#include "vec/columns/columns_number.h"
-#include "vec/columns/predicate_column.h"
-
-using namespace doris::vectorized;
-
-namespace doris {
-
-class TestBloomFilterColumnPredicate : public testing::Test {
-public:
- TestBloomFilterColumnPredicate() : _row_block(nullptr) { _mem_pool.reset(new MemPool()); }
-
- ~TestBloomFilterColumnPredicate() {}
-
- void SetTabletSchema(std::string name, const std::string& type, const std::string& aggregation,
- uint32_t length, bool is_allow_null, bool is_key,
- TabletSchemaSPtr tablet_schema) {
- TabletSchemaPB tablet_schema_pb;
- static int id = 0;
- ColumnPB* column = tablet_schema_pb.add_column();
- column->set_unique_id(++id);
- column->set_name(name);
- column->set_type(type);
- column->set_is_key(is_key);
- column->set_is_nullable(is_allow_null);
- column->set_length(length);
- column->set_aggregation(aggregation);
- column->set_precision(1000);
- column->set_frac(1000);
- column->set_is_bf_column(false);
-
- tablet_schema->init_from_pb(tablet_schema_pb);
- }
-
- void init_row_block(TabletSchemaSPtr tablet_schema, int size) {
- Schema schema(tablet_schema);
- _row_block.reset(new RowBlockV2(schema, size));
- }
-
- std::unique_ptr<MemPool> _mem_pool;
- std::unique_ptr<RowBlockV2> _row_block;
-};
-
-TEST_F(TestBloomFilterColumnPredicate, FLOAT_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("FLOAT_COLUMN"), "FLOAT", "REPLACE", 1, true, true, tablet_schema);
- const int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
-
- std::shared_ptr<BloomFilterFuncBase> bloom_filter(
- create_bloom_filter(PrimitiveType::TYPE_FLOAT));
-
- bloom_filter->init(4096, 0.05);
- float value = 4.1;
- bloom_filter->insert(reinterpret_cast<void*>(&value));
- value = 5.1;
- bloom_filter->insert(reinterpret_cast<void*>(&value));
- value = 6.1;
- bloom_filter->insert(reinterpret_cast<void*>(&value));
- ColumnPredicate* pred = create_column_predicate(0, bloom_filter, OLAP_FIELD_TYPE_FLOAT,
- BeExecVersionManager::get_newest_version());
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<float*>(col_block_view.data()) = i + 0.1f;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 3);
- EXPECT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 4.1);
- EXPECT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[1]).cell_ptr(), 5.1);
- EXPECT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[2]).cell_ptr(), 6.1);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<float*>(col_block_view.data()) = i + 0.1;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 5.1);
-
- delete pred;
-}
-
-TEST_F(TestBloomFilterColumnPredicate, FLOAT_COLUMN_VEC) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("FLOAT_COLUMN"), "FLOAT", "REPLACE", 1, true, true, tablet_schema);
- const int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
-
- std::shared_ptr<BloomFilterFuncBase> bloom_filter(
- create_bloom_filter(PrimitiveType::TYPE_FLOAT));
-
- bloom_filter->init(4096, 0.05);
- auto column_data = ColumnFloat32::create();
- float values[3] = {4.1, 5.1, 6.1};
- int offsets[3] = {0, 1, 2};
-
- bloom_filter->insert_fixed_len((char*)values, offsets, 3);
- ColumnPredicate* pred = create_column_predicate(0, bloom_filter, OLAP_FIELD_TYPE_FLOAT,
- BeExecVersionManager::get_newest_version());
- auto* col_data = reinterpret_cast<float*>(_mem_pool->allocate(size * sizeof(float)));
-
- // for vectorized::Block no null
- auto pred_col = PredicateColumnType<TYPE_FLOAT>::create();
- pred_col->reserve(size);
- for (int i = 0; i < size; ++i) {
- *(col_data + i) = i + 0.1f;
- pred_col->insert_data(reinterpret_cast<const char*>(col_data + i), 0);
- }
- init_row_block(tablet_schema, size);
- _row_block->clear();
- auto select_size = _row_block->selected_size();
- select_size = pred->evaluate(*pred_col, _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 3);
- EXPECT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[0]], 4.1);
- EXPECT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[1]], 5.1);
- EXPECT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[2]], 6.1);
-
- // for vectorized::Block has nulls
- auto null_map = ColumnUInt8::create(size, 0);
- auto& null_map_data = null_map->get_data();
- for (int i = 0; i < size; ++i) {
- null_map_data[i] = (i % 2 == 0);
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- auto nullable_col =
- vectorized::ColumnNullable::create(std::move(pred_col), std::move(null_map));
- select_size = pred->evaluate(*nullable_col, _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 1);
- auto nested_col = check_and_get_column<PredicateColumnType<TYPE_FLOAT>>(
- nullable_col->get_nested_column());
- EXPECT_FLOAT_EQ((float)nested_col->get_data()[_row_block->selection_vector()[0]], 5.1);
-
- delete pred;
-}
-} // namespace doris
diff --git a/be/test/olap/comparison_predicate_test.cpp b/be/test/olap/comparison_predicate_test.cpp
deleted file mode 100644
index 9f6ef32b55..0000000000
--- a/be/test/olap/comparison_predicate_test.cpp
+++ /dev/null
@@ -1,793 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "olap/comparison_predicate.h"
-
-#include <google/protobuf/stubs/common.h>
-#include <gtest/gtest.h>
-#include <time.h>
-
-#include "olap/column_predicate.h"
-#include "olap/field.h"
-#include "olap/row_block2.h"
-#include "olap/wrapper_field.h"
-#include "runtime/mem_pool.h"
-#include "runtime/primitive_type.h"
-#include "runtime/string_value.hpp"
-
-namespace doris {
-
-namespace datetime {
-
-static uint32_t to_date_timestamp(const char* date_string) {
- tm time_tm;
- strptime(date_string, "%Y-%m-%d", &time_tm);
-
- int value = (time_tm.tm_year + 1900) * 16 * 32 + (time_tm.tm_mon + 1) * 32 + time_tm.tm_mday;
- return uint32_t(value);
-}
-
-static uint64_t to_datetime_timestamp(const std::string& value_string) {
- tm time_tm;
- strptime(value_string.c_str(), "%Y-%m-%d %H:%M:%S", &time_tm);
-
- uint64_t value =
- ((time_tm.tm_year + 1900) * 10000L + (time_tm.tm_mon + 1) * 100L + time_tm.tm_mday) *
- 1000000L +
- time_tm.tm_hour * 10000L + time_tm.tm_min * 100L + time_tm.tm_sec;
-
- return value;
-}
-
-static std::string to_date_string(uint24_t& date_value) {
- tm time_tm;
- int value = date_value;
- memset(&time_tm, 0, sizeof(time_tm));
- time_tm.tm_mday = static_cast<int>(value & 31);
- time_tm.tm_mon = static_cast<int>(value >> 5 & 15) - 1;
- time_tm.tm_year = static_cast<int>(value >> 9) - 1900;
- char buf[20] = {'\0'};
- strftime(buf, sizeof(buf), "%Y-%m-%d", &time_tm);
- return std::string(buf);
-}
-
-static std::string to_datetime_string(uint64_t& datetime_value) {
- tm time_tm;
- int64_t part1 = (datetime_value / 1000000L);
- int64_t part2 = (datetime_value - part1 * 1000000L);
-
- time_tm.tm_year = static_cast<int>((part1 / 10000L) % 10000) - 1900;
- time_tm.tm_mon = static_cast<int>((part1 / 100) % 100) - 1;
- time_tm.tm_mday = static_cast<int>(part1 % 100);
-
- time_tm.tm_hour = static_cast<int>((part2 / 10000L) % 10000);
- time_tm.tm_min = static_cast<int>((part2 / 100) % 100);
- time_tm.tm_sec = static_cast<int>(part2 % 100);
-
- char buf[20] = {'\0'};
- strftime(buf, 20, "%Y-%m-%d %H:%M:%S", &time_tm);
- return std::string(buf);
-}
-
-}; // namespace datetime
-
-#define TEST_PREDICATE_DEFINITION(CLASS_NAME) \
- class CLASS_NAME : public testing::Test { \
- public: \
- CLASS_NAME() { _mem_pool.reset(new MemPool()); } \
- ~CLASS_NAME() {} \
- void SetTabletSchema(std::string name, const std::string& type, \
- const std::string& aggregation, uint32_t length, bool is_allow_null, \
- bool is_key, TabletSchemaSPtr tablet_schema) { \
- TabletSchemaPB tablet_schema_pb; \
- static int id = 0; \
- ColumnPB* column = tablet_schema_pb.add_column(); \
- column->set_unique_id(++id); \
- column->set_name(name); \
- column->set_type(type); \
- column->set_is_key(is_key); \
- column->set_is_nullable(is_allow_null); \
- column->set_length(length); \
- column->set_aggregation(aggregation); \
- column->set_precision(1000); \
- column->set_frac(1000); \
- column->set_is_bf_column(false); \
- tablet_schema->init_from_pb(tablet_schema_pb); \
- } \
- \
- void init_row_block(TabletSchemaSPtr tablet_schema, int size) { \
- Schema schema(tablet_schema); \
- _row_block.reset(new RowBlockV2(schema, size)); \
- } \
- std::unique_ptr<MemPool> _mem_pool; \
- std::unique_ptr<RowBlockV2> _row_block; \
- };
-
-TEST_PREDICATE_DEFINITION(TestEqualPredicate)
-TEST_PREDICATE_DEFINITION(TestLessPredicate)
-
-TEST_F(TestEqualPredicate, FLOAT_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("FLOAT_COLUMN"), "FLOAT", "REPLACE", 1, true, true, tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- float value = 5.0;
- ColumnPredicate* pred = new ComparisonPredicateBase<TYPE_FLOAT, PredicateType::EQ>(0, value);
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<float*>(col_block_view.data()) = i;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 5.0);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<float*>(col_block_view.data()) = i;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 5.0);
-
- delete pred;
-}
-
-TEST_F(TestEqualPredicate, DOUBLE_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- double value = 5.0;
- ColumnPredicate* pred = new ComparisonPredicateBase<TYPE_DOUBLE, PredicateType::EQ>(0, value);
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<double*>(col_block_view.data()) = i;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 5.0);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<double*>(col_block_view.data()) = i;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 5.0);
-
- delete pred;
-}
-
-TEST_F(TestEqualPredicate, DECIMAL_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DECIMAL_COLUMN"), "DECIMAL", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- decimal12_t value = {5, 5};
- ColumnPredicate* pred =
- new ComparisonPredicateBase<TYPE_DECIMALV2, PredicateType::EQ>(0, value);
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- reinterpret_cast<decimal12_t*>(col_block_view.data())->integer = i;
- reinterpret_cast<decimal12_t*>(col_block_view.data())->fraction = i;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(*(decimal12_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), value);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- reinterpret_cast<decimal12_t*>(col_block_view.data())->integer = i;
- reinterpret_cast<decimal12_t*>(col_block_view.data())->fraction = i;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(*(decimal12_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), value);
-
- delete pred;
-}
-
-TEST_F(TestEqualPredicate, STRING_COLUMN) {
- TabletSchemaSPtr char_tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("STRING_COLUMN"), "CHAR", "REPLACE", 5, true, true,
- char_tablet_schema);
- // test WrapperField.from_string() for char type
- WrapperField* field = WrapperField::create(char_tablet_schema->column(0));
- EXPECT_EQ(Status::OK(), field->from_string("true"));
- const std::string tmp = field->to_string();
- EXPECT_EQ(5, tmp.size());
- EXPECT_EQ('t', tmp[0]);
- EXPECT_EQ('r', tmp[1]);
- EXPECT_EQ('u', tmp[2]);
- EXPECT_EQ('e', tmp[3]);
- EXPECT_EQ(0, tmp[4]);
-
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("STRING_COLUMN"), "VARCHAR", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
-
- StringValue value;
- const char* value_buffer = "dddd";
- value.len = 4;
- value.ptr = const_cast<char*>(value_buffer);
-
- ColumnPredicate* pred = new ComparisonPredicateBase<TYPE_STRING, PredicateType::EQ>(0, value);
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- char* string_buffer = reinterpret_cast<char*>(_mem_pool->allocate(60));
- memset(string_buffer, 0, 60);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- for (int j = 0; j <= i; ++j) {
- string_buffer[j] = 'a' + i;
- }
- reinterpret_cast<StringValue*>(col_block_view.data())->len = i + 1;
- reinterpret_cast<StringValue*>(col_block_view.data())->ptr = string_buffer;
- string_buffer += i + 1;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(*(StringValue*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), value);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- string_buffer = reinterpret_cast<char*>(_mem_pool->allocate(55));
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- for (int j = 0; j <= i; ++j) {
- string_buffer[j] = 'a' + i;
- }
- reinterpret_cast<StringValue*>(col_block_view.data())->len = i + 1;
- reinterpret_cast<StringValue*>(col_block_view.data())->ptr = string_buffer;
- string_buffer += i + 1;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(*(StringValue*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), value);
-
- delete field;
- delete pred;
-}
-
-TEST_F(TestEqualPredicate, DATE_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DATE_COLUMN"), "DATE", "REPLACE", 1, true, true, tablet_schema);
- int size = 6;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- uint32_t value = datetime::to_date_timestamp("2017-09-10");
- ColumnPredicate* pred = new ComparisonPredicateBase<TYPE_DATE, PredicateType::EQ>(0, value);
-
- std::vector<std::string> date_array;
- date_array.push_back("2017-09-07");
- date_array.push_back("2017-09-08");
- date_array.push_back("2017-09-09");
- date_array.push_back("2017-09-10");
- date_array.push_back("2017-09-11");
- date_array.push_back("2017-09-12");
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- const uint32_t tmp = datetime::to_date_timestamp(date_array[i].c_str());
- uint24_t timestamp = 0;
- memcpy(reinterpret_cast<void*>(×tamp), reinterpret_cast<const void*>(&tmp),
- sizeof(uint24_t));
- *reinterpret_cast<uint24_t*>(col_block_view.data()) = timestamp;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(datetime::to_date_string(
- *(uint24_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-10");
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- const uint32_t tmp = datetime::to_date_timestamp(date_array[i].c_str());
- uint24_t timestamp = 0;
- memcpy(reinterpret_cast<void*>(×tamp), reinterpret_cast<const void*>(&tmp),
- sizeof(uint24_t));
- *reinterpret_cast<uint24_t*>(col_block_view.data()) = timestamp;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(datetime::to_date_string(
- *(uint24_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-10");
-
- delete pred;
-}
-
-TEST_F(TestEqualPredicate, DATETIME_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DATETIME_COLUMN"), "DATETIME", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 6;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- uint64_t value = datetime::to_datetime_timestamp("2017-09-10 01:00:00");
- ColumnPredicate* pred = new ComparisonPredicateBase<TYPE_DATETIME, PredicateType::EQ>(0, value);
-
- std::vector<std::string> date_array;
- date_array.push_back("2017-09-07 00:00:00");
- date_array.push_back("2017-09-08 00:01:00");
- date_array.push_back("2017-09-09 00:00:01");
- date_array.push_back("2017-09-10 01:00:00");
- date_array.push_back("2017-09-11 01:01:00");
- date_array.push_back("2017-09-12 01:01:01");
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- uint64_t timestamp = datetime::to_datetime_timestamp(date_array[i].c_str());
- *reinterpret_cast<uint64_t*>(col_block_view.data()) = timestamp;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(datetime::to_datetime_string(
- *(uint64_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-10 01:00:00");
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- uint64_t timestamp = datetime::to_datetime_timestamp(date_array[i].c_str());
- *reinterpret_cast<uint64_t*>(col_block_view.data()) = timestamp;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(datetime::to_datetime_string(
- *(uint64_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-10 01:00:00");
-
- delete pred;
-}
-
-TEST_F(TestLessPredicate, FLOAT_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("FLOAT_COLUMN"), "FLOAT", "REPLACE", 1, true, true, tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- float value = 5.0;
- ColumnPredicate* pred = new ComparisonPredicateBase<TYPE_FLOAT, PredicateType::LT>(0, value);
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<float*>(col_block_view.data()) = i;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 5);
- float sum = 0;
- for (int i = 0; i < 5; ++i) {
- sum += *(float*)col_block.cell(_row_block->selection_vector()[i]).cell_ptr();
- }
- EXPECT_FLOAT_EQ(sum, 10.0);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<float*>(col_block_view.data()) = i;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 2);
- sum = 0;
- for (int i = 0; i < 2; ++i) {
- sum += *(float*)col_block.cell(_row_block->selection_vector()[i]).cell_ptr();
- }
- EXPECT_FLOAT_EQ(sum, 4.0);
-
- delete pred;
-}
-
-TEST_F(TestLessPredicate, DOUBLE_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- double value = 5.0;
- ColumnPredicate* pred = new ComparisonPredicateBase<TYPE_DOUBLE, PredicateType::LT>(0, value);
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<double*>(col_block_view.data()) = i;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 5);
- double sum = 0;
- for (int i = 0; i < 5; ++i) {
- sum += *(double*)col_block.cell(_row_block->selection_vector()[i]).cell_ptr();
- }
- EXPECT_DOUBLE_EQ(sum, 10.0);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<double*>(col_block_view.data()) = i;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 2);
- sum = 0;
- for (int i = 0; i < 2; ++i) {
- sum += *(double*)col_block.cell(_row_block->selection_vector()[i]).cell_ptr();
- }
- EXPECT_DOUBLE_EQ(sum, 4.0);
-
- delete pred;
-}
-
-TEST_F(TestLessPredicate, DECIMAL_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DECIMAL_COLUMN"), "DECIMAL", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- decimal12_t value = {5, 5};
- ColumnPredicate* pred =
- new ComparisonPredicateBase<TYPE_DECIMALV2, PredicateType::LT>(0, value);
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- reinterpret_cast<decimal12_t*>(col_block_view.data())->integer = i;
- reinterpret_cast<decimal12_t*>(col_block_view.data())->fraction = i;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 5);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- reinterpret_cast<decimal12_t*>(col_block_view.data())->integer = i;
- reinterpret_cast<decimal12_t*>(col_block_view.data())->fraction = i;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 2);
-
- delete pred;
-}
-
-TEST_F(TestLessPredicate, STRING_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("STRING_COLUMN"), "VARCHAR", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
-
- StringValue value;
- const char* value_buffer = "dddd";
- value.len = 4;
- value.ptr = const_cast<char*>(value_buffer);
- ColumnPredicate* pred = new ComparisonPredicateBase<TYPE_STRING, PredicateType::LT>(0, value);
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- char* string_buffer = reinterpret_cast<char*>(_mem_pool->allocate(60));
- memset(string_buffer, 0, 60);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- for (int j = 0; j <= i; ++j) {
- string_buffer[j] = 'a' + i;
- }
- reinterpret_cast<StringValue*>(col_block_view.data())->len = i + 1;
- reinterpret_cast<StringValue*>(col_block_view.data())->ptr = string_buffer;
- string_buffer += i + 1;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 3);
- EXPECT_TRUE(
- strncmp((*(StringValue*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr())
- .ptr,
- "a", 1) == 0);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- string_buffer = reinterpret_cast<char*>(_mem_pool->allocate(55));
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- for (int j = 0; j <= i; ++j) {
- string_buffer[j] = 'a' + i;
- }
- reinterpret_cast<StringValue*>(col_block_view.data())->len = i + 1;
- reinterpret_cast<StringValue*>(col_block_view.data())->ptr = string_buffer;
- string_buffer += i + 1;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_TRUE(
- strncmp((*(StringValue*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr())
- .ptr,
- "bb", 2) == 0);
-
- delete pred;
-}
-
-TEST_F(TestLessPredicate, DATE_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DATE_COLUMN"), "DATE", "REPLACE", 1, true, true, tablet_schema);
- int size = 6;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- uint32_t value = datetime::to_date_timestamp("2017-09-10");
- ColumnPredicate* pred = new ComparisonPredicateBase<TYPE_DATE, PredicateType::LT>(0, value);
-
- std::vector<std::string> date_array;
- date_array.push_back("2017-09-07");
- date_array.push_back("2017-09-08");
- date_array.push_back("2017-09-09");
- date_array.push_back("2017-09-10");
- date_array.push_back("2017-09-11");
- date_array.push_back("2017-09-12");
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- const uint32_t tmp = datetime::to_date_timestamp(date_array[i].c_str());
- uint24_t timestamp = 0;
- memcpy(reinterpret_cast<void*>(×tamp), reinterpret_cast<const void*>(&tmp),
- sizeof(uint24_t));
- *reinterpret_cast<uint24_t*>(col_block_view.data()) = timestamp;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 3);
- EXPECT_EQ(datetime::to_date_string(
- *(uint24_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-07");
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- const uint32_t tmp = datetime::to_date_timestamp(date_array[i].c_str());
- uint24_t timestamp = 0;
- memcpy(reinterpret_cast<void*>(×tamp), reinterpret_cast<const void*>(&tmp),
- sizeof(uint24_t));
- *reinterpret_cast<uint24_t*>(col_block_view.data()) = timestamp;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(datetime::to_date_string(
- *(uint24_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-08");
-
- delete pred;
-}
-
-TEST_F(TestLessPredicate, DATETIME_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- TabletColumn tablet_column;
- SetTabletSchema(std::string("DATETIME_COLUMN"), "DATETIME", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 6;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
-
- uint64_t value = datetime::to_datetime_timestamp("2017-09-10 01:00:00");
- ColumnPredicate* pred = new ComparisonPredicateBase<TYPE_DATETIME, PredicateType::LT>(0, value);
-
- std::vector<std::string> date_array;
- date_array.push_back("2017-09-07 00:00:00");
- date_array.push_back("2017-09-08 00:01:00");
- date_array.push_back("2017-09-09 00:00:01");
- date_array.push_back("2017-09-10 01:00:00");
- date_array.push_back("2017-09-11 01:01:00");
- date_array.push_back("2017-09-12 01:01:01");
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- uint64_t timestamp = datetime::to_datetime_timestamp(date_array[i].c_str());
- *reinterpret_cast<uint64_t*>(col_block_view.data()) = timestamp;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 3);
- EXPECT_EQ(datetime::to_datetime_string(
- *(uint64_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-07 00:00:00");
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- uint64_t timestamp = datetime::to_datetime_timestamp(date_array[i].c_str());
- *reinterpret_cast<uint64_t*>(col_block_view.data()) = timestamp;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(datetime::to_datetime_string(
- *(uint64_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-08 00:01:00");
-
- delete pred;
-}
-
-} // namespace doris
diff --git a/be/test/olap/generic_iterators_test.cpp b/be/test/olap/generic_iterators_test.cpp
deleted file mode 100644
index 9e982f4eef..0000000000
--- a/be/test/olap/generic_iterators_test.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "olap/generic_iterators.h"
-
-#include <gtest/gtest.h>
-
-#include <vector>
-
-#include "olap/olap_common.h"
-#include "olap/row_block2.h"
-#include "olap/schema.h"
-#include "util/slice.h"
-
-namespace doris {
-using namespace ErrorCode;
-
-class GenericIteratorsTest : public testing::Test {
-public:
- GenericIteratorsTest() {}
- virtual ~GenericIteratorsTest() {}
-};
-
-Schema create_schema() {
- std::vector<TabletColumn> col_schemas;
- col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_SMALLINT, true);
- // c2: int
- col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_INT, true);
- // c3: big int
- col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_SUM, OLAP_FIELD_TYPE_BIGINT, true);
-
- Schema schema(col_schemas, 2);
- return schema;
-}
-
-TEST(GenericIteratorsTest, AutoIncrement) {
- auto schema = create_schema();
- auto iter = new_auto_increment_iterator(schema, 500);
-
- StorageReadOptions opts;
- auto st = iter->init(opts);
- EXPECT_TRUE(st.ok());
-
- RowBlockV2 block(schema, 128);
-
- size_t row_count = 0;
- do {
- block.clear();
- st = iter->next_batch(&block);
- for (int i = 0; i < block.num_rows(); ++i) {
- auto row = block.row(i);
- EXPECT_EQ(row_count, *(int16_t*)row.cell_ptr(0));
- EXPECT_EQ(row_count + 1, *(int32_t*)row.cell_ptr(1));
- EXPECT_EQ(row_count + 2, *(int64_t*)row.cell_ptr(2));
- row_count++;
- }
- } while (st.ok());
- EXPECT_TRUE(st.is<END_OF_FILE>());
- EXPECT_EQ(500, row_count);
-
- delete iter;
-}
-
-TEST(GenericIteratorsTest, Union) {
- auto schema = create_schema();
- std::vector<RowwiseIterator*> inputs;
-
- inputs.push_back(new_auto_increment_iterator(schema, 100));
- inputs.push_back(new_auto_increment_iterator(schema, 200));
- inputs.push_back(new_auto_increment_iterator(schema, 300));
-
- auto iter = new_union_iterator(inputs);
- StorageReadOptions opts;
- auto st = iter->init(opts);
- EXPECT_TRUE(st.ok());
-
- RowBlockV2 block(schema, 128);
-
- size_t row_count = 0;
- do {
- block.clear();
- st = iter->next_batch(&block);
- for (int i = 0; i < block.num_rows(); ++i) {
- size_t base_value = row_count;
- if (row_count >= 300) {
- base_value -= 300;
- } else if (row_count >= 100) {
- base_value -= 100;
- }
- auto row = block.row(i);
- EXPECT_EQ(base_value, *(int16_t*)row.cell_ptr(0));
- EXPECT_EQ(base_value + 1, *(int32_t*)row.cell_ptr(1));
- EXPECT_EQ(base_value + 2, *(int64_t*)row.cell_ptr(2));
- row_count++;
- }
- } while (st.ok());
- EXPECT_TRUE(st.is<END_OF_FILE>());
- EXPECT_EQ(600, row_count);
-
- delete iter;
-}
-
-TEST(GenericIteratorsTest, MergeAgg) {
- auto schema = create_schema();
- std::vector<RowwiseIterator*> inputs;
-
- inputs.push_back(new_auto_increment_iterator(schema, 100));
- inputs.push_back(new_auto_increment_iterator(schema, 200));
- inputs.push_back(new_auto_increment_iterator(schema, 300));
-
- auto iter = new_merge_iterator(std::move(inputs), -1, false, nullptr);
- StorageReadOptions opts;
- auto st = iter->init(opts);
- EXPECT_TRUE(st.ok());
-
- RowBlockV2 block(schema, 128);
-
- size_t row_count = 0;
- do {
- block.clear();
- st = iter->next_batch(&block);
- for (int i = 0; i < block.num_rows(); ++i) {
- size_t base_value = 0;
- // 100 * 3, 200 * 2, 300
- if (row_count < 300) {
- base_value = row_count / 3;
- } else if (row_count < 500) {
- base_value = (row_count - 300) / 2 + 100;
- } else {
- base_value = row_count - 300;
- }
- auto row = block.row(i);
- EXPECT_EQ(base_value, *(int16_t*)row.cell_ptr(0));
- EXPECT_EQ(base_value + 1, *(int32_t*)row.cell_ptr(1));
- EXPECT_EQ(base_value + 2, *(int64_t*)row.cell_ptr(2));
- row_count++;
- }
- } while (st.ok());
- EXPECT_TRUE(st.is<END_OF_FILE>());
- EXPECT_EQ(600, row_count);
-
- delete iter;
-}
-
-TEST(GenericIteratorsTest, MergeUnique) {
- auto schema = create_schema();
- std::vector<RowwiseIterator*> inputs;
-
- inputs.push_back(new_auto_increment_iterator(schema, 100));
- inputs.push_back(new_auto_increment_iterator(schema, 200));
- inputs.push_back(new_auto_increment_iterator(schema, 300));
-
- auto iter = new_merge_iterator(std::move(inputs), -1, true, nullptr);
- StorageReadOptions opts;
- auto st = iter->init(opts);
- EXPECT_TRUE(st.ok());
-
- RowBlockV2 block(schema, 128);
-
- size_t row_count = 0;
- do {
- block.clear();
- st = iter->next_batch(&block);
- for (int i = 0; i < block.num_rows(); ++i) {
- size_t base_value = row_count;
- auto row = block.row(i);
- EXPECT_EQ(base_value, *(int16_t*)row.cell_ptr(0));
- EXPECT_EQ(base_value + 1, *(int32_t*)row.cell_ptr(1));
- EXPECT_EQ(base_value + 2, *(int64_t*)row.cell_ptr(2));
- row_count++;
- }
- } while (st.ok());
- EXPECT_TRUE(st.is<END_OF_FILE>());
- EXPECT_EQ(300, row_count);
-
- delete iter;
-}
-
-} // namespace doris
diff --git a/be/test/olap/in_list_predicate_test.cpp b/be/test/olap/in_list_predicate_test.cpp
deleted file mode 100644
index 73413c0c10..0000000000
--- a/be/test/olap/in_list_predicate_test.cpp
+++ /dev/null
@@ -1,730 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "olap/in_list_predicate.h"
-
-#include <google/protobuf/stubs/common.h>
-#include <gtest/gtest.h>
-#include <time.h>
-
-#include "olap/column_predicate.h"
-#include "olap/row_block2.h"
-#include "runtime/mem_pool.h"
-
-namespace doris {
-
-namespace datetime {
-
-static uint32_t timestamp_from_date(const char* date_string) {
- tm time_tm;
- strptime(date_string, "%Y-%m-%d", &time_tm);
-
- int value = (time_tm.tm_year + 1900) * 16 * 32 + (time_tm.tm_mon + 1) * 32 + time_tm.tm_mday;
- return uint32_t(value);
-}
-
-static uint32_t timestamp_from_date_v2(const char* date_string) {
- tm time_tm;
- strptime(date_string, "%Y-%m-%d", &time_tm);
-
- doris::vectorized::DateV2Value<doris::vectorized::DateV2ValueType> value;
- value.set_time(time_tm.tm_year + 1900, time_tm.tm_mon + 1, time_tm.tm_mday, 0, 0, 0, 0);
- return binary_cast<doris::vectorized::DateV2Value<doris::vectorized::DateV2ValueType>,
- uint32_t>(value);
-}
-
-static uint64_t timestamp_from_datetime(const std::string& value_string) {
- tm time_tm;
- strptime(value_string.c_str(), "%Y-%m-%d %H:%M:%S", &time_tm);
-
- uint64_t value =
- ((time_tm.tm_year + 1900) * 10000L + (time_tm.tm_mon + 1) * 100L + time_tm.tm_mday) *
- 1000000L +
- time_tm.tm_hour * 10000L + time_tm.tm_min * 100L + time_tm.tm_sec;
-
- return value;
-}
-
-static std::string to_date_string(uint24_t& date_value) {
- tm time_tm;
- int value = date_value;
- memset(&time_tm, 0, sizeof(time_tm));
- time_tm.tm_mday = static_cast<int>(value & 31);
- time_tm.tm_mon = static_cast<int>(value >> 5 & 15) - 1;
- time_tm.tm_year = static_cast<int>(value >> 9) - 1900;
- char buf[20] = {'\0'};
- strftime(buf, sizeof(buf), "%Y-%m-%d", &time_tm);
- return std::string(buf);
-}
-
-static std::string to_date_v2_string(uint32_t& date_value) {
- auto val = binary_cast<uint32_t, vectorized::DateV2Value<doris::vectorized::DateV2ValueType>>(
- date_value);
- std::stringstream ss;
- ss << val;
- return ss.str();
-}
-
-static std::string to_datetime_string(uint64_t& datetime_value) {
- tm time_tm;
- int64_t part1 = (datetime_value / 1000000L);
- int64_t part2 = (datetime_value - part1 * 1000000L);
-
- time_tm.tm_year = static_cast<int>((part1 / 10000L) % 10000) - 1900;
- time_tm.tm_mon = static_cast<int>((part1 / 100) % 100) - 1;
- time_tm.tm_mday = static_cast<int>(part1 % 100);
-
- time_tm.tm_hour = static_cast<int>((part2 / 10000L) % 10000);
- time_tm.tm_min = static_cast<int>((part2 / 100) % 100);
- time_tm.tm_sec = static_cast<int>(part2 % 100);
-
- char buf[20] = {'\0'};
- strftime(buf, 20, "%Y-%m-%d %H:%M:%S", &time_tm);
- return std::string(buf);
-}
-
-}; // namespace datetime
-
-class TestInListPredicate : public testing::Test {
-public:
- TestInListPredicate() : _row_block(nullptr) { _mem_pool.reset(new MemPool()); }
-
- ~TestInListPredicate() {}
-
- void SetTabletSchema(std::string name, const std::string& type, const std::string& aggregation,
- uint32_t length, bool is_allow_null, bool is_key,
- TabletSchemaSPtr tablet_schema) {
- TabletSchemaPB tablet_schema_pb;
- static int id = 0;
- ColumnPB* column = tablet_schema_pb.add_column();
- column->set_unique_id(++id);
- column->set_name(name);
- column->set_type(type);
- column->set_is_key(is_key);
- column->set_is_nullable(is_allow_null);
- column->set_length(length);
- column->set_aggregation(aggregation);
- column->set_precision(1000);
- column->set_frac(1000);
- column->set_is_bf_column(false);
-
- tablet_schema->init_from_pb(tablet_schema_pb);
- }
-
- void init_row_block(TabletSchemaSPtr tablet_schema, int size) {
- _schema = std::make_unique<Schema>(tablet_schema);
- _row_block.reset(new RowBlockV2(*_schema, size));
- }
-
- std::unique_ptr<MemPool> _mem_pool;
- std::unique_ptr<RowBlockV2> _row_block;
- std::unique_ptr<Schema> _schema;
-};
-
-#define TEST_IN_LIST_PREDICATE_V2(PRIMITIVE_TYPE, TYPE, TYPE_NAME, FIELD_TYPE) \
- TEST_F(TestInListPredicate, TYPE_NAME##_COLUMN_V2) { \
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); \
- SetTabletSchema(std::string("TYPE_NAME##_COLUMN"), FIELD_TYPE, "REPLACE", 1, false, true, \
- tablet_schema); \
- int size = 10; \
- Schema schema(tablet_schema); \
- RowBlockV2 block(schema, size); \
- phmap::flat_hash_set<TYPE> values; \
- values.insert(4); \
- values.insert(5); \
- values.insert(6); \
- ColumnPredicate* pred = \
- new InListPredicateBase<PRIMITIVE_TYPE, PredicateType::IN_LIST>(0, values); \
- uint16_t sel[10]; \
- for (int i = 0; i < 10; ++i) { \
- sel[i] = i; \
- } \
- uint16_t selected_size = 10; \
- ColumnBlock column = block.column_block(0); \
- /* for non nulls */ \
- for (int i = 0; i < size; ++i) { \
- column.set_is_null(i, false); \
- uint8_t* value = column.mutable_cell_ptr(i); \
- *((TYPE*)value) = i; \
- } \
- \
- pred->evaluate(&column, sel, &selected_size); \
- EXPECT_EQ(selected_size, 3); \
- EXPECT_EQ(*((TYPE*)column.cell_ptr(sel[0])), 4); \
- EXPECT_EQ(*((TYPE*)column.cell_ptr(sel[1])), 5); \
- EXPECT_EQ(*((TYPE*)column.cell_ptr(sel[2])), 6); \
- \
- /* for has nulls */ \
- TabletSchemaSPtr tablet_schema2 = std::make_shared<TabletSchema>(); \
- SetTabletSchema(std::string("TYPE_NAME##_COLUMN"), FIELD_TYPE, "REPLACE", 1, true, true, \
- tablet_schema2); \
- Schema schema2(tablet_schema2); \
- RowBlockV2 block2(schema2, size); \
- ColumnBlock column2 = block2.column_block(0); \
- for (int i = 0; i < size; ++i) { \
- if (i % 2 == 0) { \
- column2.set_is_null(i, true); \
- } else { \
- column2.set_is_null(i, false); \
- uint8_t* value = column2.mutable_cell_ptr(i); \
- *((TYPE*)value) = i; \
- } \
- } \
- for (int i = 0; i < 10; ++i) { \
- sel[i] = i; \
- } \
- selected_size = 10; \
- \
- pred->evaluate(&column2, sel, &selected_size); \
- EXPECT_EQ(selected_size, 1); \
- EXPECT_EQ(*((TYPE*)column2.cell_ptr(sel[0])), 5); \
- delete pred; \
- }
-
-TEST_IN_LIST_PREDICATE_V2(TYPE_TINYINT, int8_t, TINYINT, "TINYINT")
-TEST_IN_LIST_PREDICATE_V2(TYPE_SMALLINT, int16_t, SMALLINT, "SMALLINT")
-TEST_IN_LIST_PREDICATE_V2(TYPE_INT, int32_t, INT, "INT")
-TEST_IN_LIST_PREDICATE_V2(TYPE_BIGINT, int64_t, BIGINT, "BIGINT")
-TEST_IN_LIST_PREDICATE_V2(TYPE_LARGEINT, int128_t, LARGEINT, "LARGEINT")
-
-TEST_F(TestInListPredicate, FLOAT_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("FLOAT_COLUMN"), "FLOAT", "REPLACE", 1, true, true, tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- phmap::flat_hash_set<float> values;
- values.insert(4.1);
- values.insert(5.1);
- values.insert(6.1);
- ColumnPredicate* pred = new InListPredicateBase<TYPE_FLOAT, PredicateType::IN_LIST>(0, values);
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<float*>(col_block_view.data()) = i + 0.1f;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 3);
- EXPECT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 4.1);
- EXPECT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[1]).cell_ptr(), 5.1);
- EXPECT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[2]).cell_ptr(), 6.1);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<float*>(col_block_view.data()) = i + 0.1;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 5.1);
-
- delete pred;
-}
-
-TEST_F(TestInListPredicate, DOUBLE_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- phmap::flat_hash_set<double> values;
- values.insert(4.1);
- values.insert(5.1);
- values.insert(6.1);
-
- ColumnPredicate* pred = new InListPredicateBase<TYPE_DOUBLE, PredicateType::IN_LIST>(0, values);
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<double*>(col_block_view.data()) = i + 0.1;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 3);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 4.1);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[1]).cell_ptr(), 5.1);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[2]).cell_ptr(), 6.1);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<double*>(col_block_view.data()) = i + 0.1;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_DOUBLE_EQ(*(double*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 5.1);
-
- delete pred;
-}
-
-TEST_F(TestInListPredicate, DECIMAL_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DECIMAL_COLUMN"), "DECIMAL", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- phmap::flat_hash_set<decimal12_t> values;
-
- decimal12_t value1 = {4, 4};
- values.insert(value1);
- decimal12_t value2 = {5, 5};
- values.insert(value2);
- decimal12_t value3 = {6, 6};
- values.insert(value3);
-
- ColumnPredicate* pred =
- new InListPredicateBase<TYPE_DECIMALV2, PredicateType::IN_LIST>(0, values);
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- reinterpret_cast<decimal12_t*>(col_block_view.data())->integer = i;
- reinterpret_cast<decimal12_t*>(col_block_view.data())->fraction = i;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 3);
- EXPECT_EQ(*(decimal12_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), value1);
- EXPECT_EQ(*(decimal12_t*)col_block.cell(_row_block->selection_vector()[1]).cell_ptr(), value2);
- EXPECT_EQ(*(decimal12_t*)col_block.cell(_row_block->selection_vector()[2]).cell_ptr(), value3);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- reinterpret_cast<decimal12_t*>(col_block_view.data())->integer = i;
- reinterpret_cast<decimal12_t*>(col_block_view.data())->fraction = i;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(*(decimal12_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), value2);
-
- delete pred;
-}
-
-TEST_F(TestInListPredicate, CHAR_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("STRING_COLUMN"), "CHAR", "REPLACE", 1, true, true, tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- phmap::flat_hash_set<StringValue> values;
- StringValue value1;
- const char* value1_buffer = "aaaaa";
- value1.ptr = const_cast<char*>(value1_buffer);
- value1.len = 5;
- values.insert(value1);
-
- StringValue value2;
- const char* value2_buffer = "bbbbb";
- value2.ptr = const_cast<char*>(value2_buffer);
- value2.len = 5;
- values.insert(value2);
-
- StringValue value3;
- const char* value3_buffer = "ccccc";
- value3.ptr = const_cast<char*>(value3_buffer);
- value3.len = 5;
- values.insert(value3);
-
- ColumnPredicate* pred = new InListPredicateBase<TYPE_CHAR, PredicateType::IN_LIST>(0, values);
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- char* string_buffer = reinterpret_cast<char*>(_mem_pool->allocate(60));
- memset(string_buffer, 0, 60);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- for (int j = 0; j <= 5; ++j) {
- string_buffer[j] = 'a' + i;
- }
- reinterpret_cast<StringValue*>(col_block_view.data())->len = 5;
- reinterpret_cast<StringValue*>(col_block_view.data())->ptr = string_buffer;
- string_buffer += 5;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 3);
- EXPECT_EQ(*(StringValue*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), value1);
- EXPECT_EQ(*(StringValue*)col_block.cell(_row_block->selection_vector()[1]).cell_ptr(), value2);
- EXPECT_EQ(*(StringValue*)col_block.cell(_row_block->selection_vector()[2]).cell_ptr(), value3);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- string_buffer = reinterpret_cast<char*>(_mem_pool->allocate(55));
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- for (int j = 0; j <= 5; ++j) {
- string_buffer[j] = 'a' + i;
- }
- reinterpret_cast<StringValue*>(col_block_view.data())->len = 5;
- reinterpret_cast<StringValue*>(col_block_view.data())->ptr = string_buffer;
- string_buffer += 5;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(*(StringValue*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), value2);
-
- delete pred;
-}
-
-TEST_F(TestInListPredicate, VARCHAR_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("STRING_COLUMN"), "VARCHAR", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- phmap::flat_hash_set<StringValue> values;
- StringValue value1;
- const char* value1_buffer = "a";
- value1.ptr = const_cast<char*>(value1_buffer);
- value1.len = 1;
- values.insert(value1);
-
- StringValue value2;
- const char* value2_buffer = "bb";
- value2.ptr = const_cast<char*>(value2_buffer);
- value2.len = 2;
- values.insert(value2);
-
- StringValue value3;
- const char* value3_buffer = "ccc";
- value3.ptr = const_cast<char*>(value3_buffer);
- value3.len = 3;
- values.insert(value3);
-
- ColumnPredicate* pred =
- new InListPredicateBase<TYPE_VARCHAR, PredicateType::IN_LIST>(0, values);
-
- // for ColumnBlock no null
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- char* string_buffer = reinterpret_cast<char*>(_mem_pool->allocate(60));
- memset(string_buffer, 0, 60);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- for (int j = 0; j <= i; ++j) {
- string_buffer[j] = 'a' + i;
- }
- reinterpret_cast<StringValue*>(col_block_view.data())->len = i + 1;
- reinterpret_cast<StringValue*>(col_block_view.data())->ptr = string_buffer;
- string_buffer += i + 1;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 3);
- EXPECT_EQ(*(StringValue*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), value1);
- EXPECT_EQ(*(StringValue*)col_block.cell(_row_block->selection_vector()[1]).cell_ptr(), value2);
- EXPECT_EQ(*(StringValue*)col_block.cell(_row_block->selection_vector()[2]).cell_ptr(), value3);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- string_buffer = reinterpret_cast<char*>(_mem_pool->allocate(55));
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- for (int j = 0; j <= i; ++j) {
- string_buffer[j] = 'a' + i;
- }
- reinterpret_cast<StringValue*>(col_block_view.data())->len = i + 1;
- reinterpret_cast<StringValue*>(col_block_view.data())->ptr = string_buffer;
- string_buffer += i + 1;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(*(StringValue*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), value2);
-
- delete pred;
-}
-
-TEST_F(TestInListPredicate, DATE_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DATE_COLUMN"), "DATE", "REPLACE", 1, true, true, tablet_schema);
- int size = 6;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- phmap::flat_hash_set<uint32_t> values;
- uint24_t value1 = datetime::timestamp_from_date("2017-09-09");
- values.insert(value1);
-
- uint24_t value2 = datetime::timestamp_from_date("2017-09-10");
- values.insert(value2);
-
- uint24_t value3 = datetime::timestamp_from_date("2017-09-11");
- values.insert(value3);
- ColumnPredicate* pred = new InListPredicateBase<TYPE_DATE, PredicateType::IN_LIST>(0, values);
-
- std::vector<std::string> date_array;
- date_array.push_back("2017-09-07");
- date_array.push_back("2017-09-08");
- date_array.push_back("2017-09-09");
- date_array.push_back("2017-09-10");
- date_array.push_back("2017-09-11");
- date_array.push_back("2017-09-12");
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- uint24_t timestamp = uint24_t(datetime::timestamp_from_date(date_array[i].c_str()));
- *reinterpret_cast<uint24_t*>(col_block_view.data()) = timestamp;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 3);
- EXPECT_EQ(datetime::to_date_string(
- *(uint24_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-09");
- EXPECT_EQ(datetime::to_date_string(
- *(uint24_t*)col_block.cell(_row_block->selection_vector()[1]).cell_ptr()),
- "2017-09-10");
- EXPECT_EQ(datetime::to_date_string(
- *(uint24_t*)col_block.cell(_row_block->selection_vector()[2]).cell_ptr()),
- "2017-09-11");
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- uint24_t timestamp = uint24_t(datetime::timestamp_from_date(date_array[i].c_str()));
- *reinterpret_cast<uint24_t*>(col_block_view.data()) = timestamp;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(datetime::to_date_string(
- *(uint24_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-10");
-
- delete pred;
-}
-
-TEST_F(TestInListPredicate, DATE_V2_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DATE_V2_COLUMN"), "DATEV2", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 6;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- phmap::flat_hash_set<uint32_t> values;
- uint32_t value1 = datetime::timestamp_from_date_v2("2017-09-09");
- values.insert(value1);
-
- uint32_t value2 = datetime::timestamp_from_date_v2("2017-09-10");
- values.insert(value2);
-
- uint32_t value3 = datetime::timestamp_from_date_v2("2017-09-11");
- values.insert(value3);
- ColumnPredicate* pred = new InListPredicateBase<TYPE_DATEV2, PredicateType::IN_LIST>(0, values);
-
- std::vector<std::string> date_array;
- date_array.push_back("2017-09-07");
- date_array.push_back("2017-09-08");
- date_array.push_back("2017-09-09");
- date_array.push_back("2017-09-10");
- date_array.push_back("2017-09-11");
- date_array.push_back("2017-09-12");
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- uint32_t timestamp = datetime::timestamp_from_date_v2(date_array[i].c_str());
- *reinterpret_cast<uint32_t*>(col_block_view.data()) = timestamp;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 3);
- EXPECT_EQ(datetime::to_date_v2_string(
- *(uint32_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-09");
- EXPECT_EQ(datetime::to_date_v2_string(
- *(uint32_t*)col_block.cell(_row_block->selection_vector()[1]).cell_ptr()),
- "2017-09-10");
- EXPECT_EQ(datetime::to_date_v2_string(
- *(uint32_t*)col_block.cell(_row_block->selection_vector()[2]).cell_ptr()),
- "2017-09-11");
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- uint32_t timestamp = datetime::timestamp_from_date_v2(date_array[i].c_str());
- *reinterpret_cast<uint32_t*>(col_block_view.data()) = timestamp;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(datetime::to_date_v2_string(
- *(uint32_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-10");
-
- delete pred;
-}
-
-TEST_F(TestInListPredicate, DATETIME_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DATETIME_COLUMN"), "DATETIME", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 6;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- phmap::flat_hash_set<uint64_t> values;
- uint64_t value1 = datetime::timestamp_from_datetime("2017-09-09 00:00:01");
- values.insert(value1);
-
- uint64_t value2 = datetime::timestamp_from_datetime("2017-09-10 01:00:00");
- values.insert(value2);
-
- uint64_t value3 = datetime::timestamp_from_datetime("2017-09-11 01:01:00");
- values.insert(value3);
-
- ColumnPredicate* pred =
- new InListPredicateBase<TYPE_DATETIME, PredicateType::IN_LIST>(0, values);
-
- std::vector<std::string> date_array;
- date_array.push_back("2017-09-07 00:00:00");
- date_array.push_back("2017-09-08 00:01:00");
- date_array.push_back("2017-09-09 00:00:01");
- date_array.push_back("2017-09-10 01:00:00");
- date_array.push_back("2017-09-11 01:01:00");
- date_array.push_back("2017-09-12 01:01:01");
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- uint64_t timestamp = datetime::timestamp_from_datetime(date_array[i].c_str());
- *reinterpret_cast<uint64_t*>(col_block_view.data()) = timestamp;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 3);
- EXPECT_EQ(datetime::to_datetime_string(
- *(uint64_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-09 00:00:01");
- EXPECT_EQ(datetime::to_datetime_string(
- *(uint64_t*)col_block.cell(_row_block->selection_vector()[1]).cell_ptr()),
- "2017-09-10 01:00:00");
- EXPECT_EQ(datetime::to_datetime_string(
- *(uint64_t*)col_block.cell(_row_block->selection_vector()[2]).cell_ptr()),
- "2017-09-11 01:01:00");
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- uint64_t timestamp = datetime::timestamp_from_datetime(date_array[i].c_str());
- *reinterpret_cast<uint64_t*>(col_block_view.data()) = timestamp;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 1);
- EXPECT_EQ(datetime::to_datetime_string(
- *(uint64_t*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr()),
- "2017-09-10 01:00:00");
-
- delete pred;
-}
-
-} // namespace doris
diff --git a/be/test/olap/null_predicate_test.cpp b/be/test/olap/null_predicate_test.cpp
deleted file mode 100644
index fc879bb241..0000000000
--- a/be/test/olap/null_predicate_test.cpp
+++ /dev/null
@@ -1,614 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "olap/null_predicate.h"
-
-#include <google/protobuf/stubs/common.h>
-#include <gtest/gtest.h>
-#include <time.h>
-
-#include "olap/column_predicate.h"
-#include "olap/field.h"
-#include "olap/row_block2.h"
-#include "runtime/mem_pool.h"
-#include "runtime/string_value.hpp"
-#include "vec/columns/column_nullable.h"
-#include "vec/core/block.h"
-
-using namespace doris::vectorized;
-
-namespace doris {
-
-namespace datetime {
-
-static uint24_t to_date_timestamp(const char* date_string) {
- tm time_tm;
- strptime(date_string, "%Y-%m-%d", &time_tm);
-
- int value = (time_tm.tm_year + 1900) * 16 * 32 + (time_tm.tm_mon + 1) * 32 + time_tm.tm_mday;
- return uint24_t(value);
-}
-
-static uint32_t to_date_v2_timestamp(const char* date_string) {
- tm time_tm;
- strptime(date_string, "%Y-%m-%d", &time_tm);
-
- return ((time_tm.tm_year + 1900) << 9) | ((time_tm.tm_mon + 1) << 5) | time_tm.tm_mday;
-}
-
-}; // namespace datetime
-
-class TestNullPredicate : public testing::Test {
-public:
- TestNullPredicate() : _row_block(nullptr) { _mem_pool.reset(new MemPool()); }
-
- ~TestNullPredicate() {}
-
- void SetTabletSchema(std::string name, std::string type, std::string aggregation,
- uint32_t length, bool is_allow_null, bool is_key,
- TabletSchemaSPtr tablet_schema) {
- TabletSchemaPB tablet_schema_pb;
- static int id = 0;
- ColumnPB* column = tablet_schema_pb.add_column();
- ;
- column->set_unique_id(++id);
- column->set_name(name);
- column->set_type(type);
- column->set_is_key(is_key);
- column->set_is_nullable(is_allow_null);
- column->set_length(length);
- column->set_aggregation(aggregation);
- column->set_precision(1000);
- column->set_frac(1000);
- column->set_is_bf_column(false);
- tablet_schema->init_from_pb(tablet_schema_pb);
- }
-
- void init_row_block(TabletSchemaSPtr tablet_schema, int size) {
- _schema = std::make_unique<Schema>(tablet_schema);
- _row_block.reset(new RowBlockV2(*_schema, size));
- }
-
- std::unique_ptr<MemPool> _mem_pool;
- std::unique_ptr<RowBlockV2> _row_block;
- std::unique_ptr<Schema> _schema;
-};
-
-#define TEST_IN_LIST_PREDICATE(TYPE, TYPE_NAME, FIELD_TYPE) \
- TEST_F(TestNullPredicate, TYPE_NAME##_COLUMN) { \
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); \
- SetTabletSchema(std::string("TYPE_NAME##_COLUMN"), FIELD_TYPE, "REPLACE", 1, true, true, \
- tablet_schema); \
- int size = 10; \
- std::vector<uint32_t> return_columns; \
- for (int i = 0; i < tablet_schema->num_columns(); ++i) { \
- return_columns.push_back(i); \
- } \
- std::unique_ptr<ColumnPredicate> pred(new NullPredicate(0, true)); \
- \
- /* for ColumnBlock nulls */ \
- init_row_block(tablet_schema, size); \
- ColumnBlock col_block = _row_block->column_block(0); \
- auto select_size = _row_block->selected_size(); \
- ColumnBlockView col_block_view(&col_block); \
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) { \
- col_block_view.set_null_bits(1, false); \
- *reinterpret_cast<TYPE*>(col_block_view.data()) = i; \
- } \
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); \
- EXPECT_EQ(select_size, 0); \
- \
- /* for vectorized::Block no null */ \
- _row_block->clear(); \
- select_size = _row_block->selected_size(); \
- vectorized::Block vec_block = tablet_schema->create_block(return_columns); \
- _row_block->convert_to_vec_block(&vec_block); \
- ColumnPtr vec_col = vec_block.get_columns()[0]; \
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), \
- _row_block->selection_vector(), select_size); \
- EXPECT_EQ(select_size, 0); \
- \
- /* for ColumnBlock has nulls */ \
- col_block_view = ColumnBlockView(&col_block); \
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) { \
- if (i % 2 == 0) { \
- col_block_view.set_null_bits(1, true); \
- } else { \
- col_block_view.set_null_bits(1, false); \
- *reinterpret_cast<TYPE*>(col_block_view.data()) = i; \
- } \
- } \
- _row_block->clear(); \
- select_size = _row_block->selected_size(); \
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); \
- EXPECT_EQ(select_size, 5); \
- \
- /* for vectorized::Block has nulls */ \
- _row_block->clear(); \
- select_size = _row_block->selected_size(); \
- vec_block = tablet_schema->create_block(return_columns); \
- _row_block->convert_to_vec_block(&vec_block); \
- vec_col = vec_block.get_columns()[0]; \
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), \
- _row_block->selection_vector(), select_size); \
- EXPECT_EQ(select_size, 5); \
- pred.reset(); \
- }
-
-TEST_IN_LIST_PREDICATE(int8_t, TINYINT, "TINYINT")
-TEST_IN_LIST_PREDICATE(int16_t, SMALLINT, "SMALLINT")
-TEST_IN_LIST_PREDICATE(int32_t, INT, "INT")
-TEST_IN_LIST_PREDICATE(int64_t, BIGINT, "BIGINT")
-TEST_IN_LIST_PREDICATE(int128_t, LARGEINT, "LARGEINT")
-
-TEST_F(TestNullPredicate, FLOAT_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("FLOAT_COLUMN"), "FLOAT", "REPLACE", 1, true, true, tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- std::unique_ptr<ColumnPredicate> pred(new NullPredicate(0, true));
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<float*>(col_block_view.data()) = i + 0.1;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 0);
-
- // for vectorized::Block no null
- _row_block->clear();
- select_size = _row_block->selected_size();
- vectorized::Block vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- ColumnPtr vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 0);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<float*>(col_block_view.data()) = i + 0.1;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 5);
-
- // for vectorized::Block has nulls
- _row_block->clear();
- select_size = _row_block->selected_size();
- vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 5);
-}
-
-TEST_F(TestNullPredicate, DOUBLE_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- std::unique_ptr<ColumnPredicate> pred(new NullPredicate(0, true));
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<double*>(col_block_view.data()) = i + 0.1;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 0);
-
- // for vectorized::Block no null
- _row_block->clear();
- select_size = _row_block->selected_size();
- vectorized::Block vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- ColumnPtr vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 0);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 2 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- *reinterpret_cast<double*>(col_block_view.data()) = i + 0.1;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 5);
-
- // for vectorized::Block has nulls
- _row_block->clear();
- select_size = _row_block->selected_size();
- vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 5);
-}
-
-TEST_F(TestNullPredicate, DECIMAL_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DECIMAL_COLUMN"), "DECIMAL", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- std::unique_ptr<ColumnPredicate> pred(new NullPredicate(0, true));
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- reinterpret_cast<decimal12_t*>(col_block_view.data())->integer = i;
- reinterpret_cast<decimal12_t*>(col_block_view.data())->fraction = i;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 0);
-
- // for vectorized::Block no null
- _row_block->clear();
- select_size = _row_block->selected_size();
- vectorized::Block vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- ColumnPtr vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 0);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 3 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- reinterpret_cast<decimal12_t*>(col_block_view.data())->integer = i;
- reinterpret_cast<decimal12_t*>(col_block_view.data())->fraction = i;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 4);
-
- // for vectorized::Block has nulls
- _row_block->clear();
- select_size = _row_block->selected_size();
- vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 4);
-}
-
-TEST_F(TestNullPredicate, STRING_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("STRING_COLUMN"), "VARCHAR", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 10;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- std::unique_ptr<ColumnPredicate> pred(new NullPredicate(0, true));
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
-
- char* string_buffer = reinterpret_cast<char*>(_mem_pool->allocate(55));
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- for (int j = 0; j <= i; ++j) {
- string_buffer[j] = 'a' + i;
- }
- reinterpret_cast<StringValue*>(col_block_view.data())->len = i + 1;
- reinterpret_cast<StringValue*>(col_block_view.data())->ptr = string_buffer;
- string_buffer += i + 1;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 0);
-
- // for vectorized::Block no null
- _row_block->clear();
- select_size = _row_block->selected_size();
- vectorized::Block vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- ColumnPtr vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 0);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- string_buffer = reinterpret_cast<char*>(_mem_pool->allocate(55));
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 3 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- for (int j = 0; j <= i; ++j) {
- string_buffer[j] = 'a' + i;
- }
- reinterpret_cast<StringValue*>(col_block_view.data())->len = i + 1;
- reinterpret_cast<StringValue*>(col_block_view.data())->ptr = string_buffer;
- string_buffer += i + 1;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 4);
-
- // for vectorized::Block has nulls
- _row_block->clear();
- select_size = _row_block->selected_size();
- vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 4);
-}
-
-TEST_F(TestNullPredicate, DATE_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DATE_COLUMN"), "DATE", "REPLACE", 1, true, true, tablet_schema);
- int size = 6;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- std::unique_ptr<ColumnPredicate> pred(new NullPredicate(0, true));
-
- std::vector<std::string> date_array;
- date_array.push_back("2017-09-07");
- date_array.push_back("2017-09-08");
- date_array.push_back("2017-09-09");
- date_array.push_back("2017-09-10");
- date_array.push_back("2017-09-11");
- date_array.push_back("2017-09-12");
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- uint24_t timestamp = datetime::to_date_timestamp(date_array[i].c_str());
- *reinterpret_cast<uint24_t*>(col_block_view.data()) = timestamp;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 0);
-
- // for vectorized::Block no null
- _row_block->clear();
- select_size = _row_block->selected_size();
- vectorized::Block vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- ColumnPtr vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 0);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 3 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- uint24_t timestamp = datetime::to_date_timestamp(date_array[i].c_str());
- *reinterpret_cast<uint24_t*>(col_block_view.data()) = timestamp;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 2);
-
- // for vectorized::Block has nulls
- _row_block->clear();
- select_size = _row_block->selected_size();
- vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 2);
-}
-
-TEST_F(TestNullPredicate, DATETIME_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DATETIME_COLUMN"), "DATETIME", "REPLACE", 1, true, true,
- tablet_schema);
- int size = 6;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- std::unique_ptr<ColumnPredicate> pred(new NullPredicate(0, true));
-
- std::vector<std::string> date_array;
- date_array.push_back("2017-09-07");
- date_array.push_back("2017-09-08");
- date_array.push_back("2017-09-09");
- date_array.push_back("2017-09-10");
- date_array.push_back("2017-09-11");
- date_array.push_back("2017-09-12");
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- uint64_t timestamp = datetime::to_date_timestamp(date_array[i].c_str());
- *reinterpret_cast<uint64_t*>(col_block_view.data()) = timestamp;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 0);
-
- // for vectorized::Block no null
- _row_block->clear();
- select_size = _row_block->selected_size();
- vectorized::Block vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- ColumnPtr vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 0);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 3 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- uint64_t timestamp = datetime::to_date_timestamp(date_array[i].c_str());
- *reinterpret_cast<uint64_t*>(col_block_view.data()) = timestamp;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 2);
-
- // for vectorized::Block has nulls
- _row_block->clear();
- select_size = _row_block->selected_size();
- vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 2);
-}
-
-TEST_F(TestNullPredicate, DATEV2_COLUMN) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- SetTabletSchema(std::string("DATEV2_COLUMN"), "DATEV2", "REPLACE", 4, true, true,
- tablet_schema);
- int size = 6;
- std::vector<uint32_t> return_columns;
- for (int i = 0; i < tablet_schema->num_columns(); ++i) {
- return_columns.push_back(i);
- }
- std::unique_ptr<ColumnPredicate> pred(new NullPredicate(0, true));
-
- std::vector<std::string> date_array;
- date_array.push_back("2017-09-07");
- date_array.push_back("2017-09-08");
- date_array.push_back("2017-09-09");
- date_array.push_back("2017-09-10");
- date_array.push_back("2017-09-11");
- date_array.push_back("2017-09-12");
-
- // for ColumnBlock no nulls
- init_row_block(tablet_schema, size);
- ColumnBlock col_block = _row_block->column_block(0);
- auto select_size = _row_block->selected_size();
- ColumnBlockView col_block_view(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- col_block_view.set_null_bits(1, false);
- uint32_t timestamp = datetime::to_date_v2_timestamp(date_array[i].c_str());
- *reinterpret_cast<uint32_t*>(col_block_view.data()) = timestamp;
- }
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 0);
-
- // for vectorized::Block no null
- _row_block->clear();
- select_size = _row_block->selected_size();
- vectorized::Block vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- ColumnPtr vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 0);
-
- // for ColumnBlock has nulls
- col_block_view = ColumnBlockView(&col_block);
- for (int i = 0; i < size; ++i, col_block_view.advance(1)) {
- if (i % 3 == 0) {
- col_block_view.set_null_bits(1, true);
- } else {
- col_block_view.set_null_bits(1, false);
- uint32_t timestamp = datetime::to_date_v2_timestamp(date_array[i].c_str());
- *reinterpret_cast<uint32_t*>(col_block_view.data()) = timestamp;
- }
- }
- _row_block->clear();
- select_size = _row_block->selected_size();
- pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
- EXPECT_EQ(select_size, 2);
-
- // for vectorized::Block has nulls
- _row_block->clear();
- select_size = _row_block->selected_size();
- vec_block = tablet_schema->create_block(return_columns);
- _row_block->convert_to_vec_block(&vec_block);
- vec_col = vec_block.get_columns()[0];
- select_size = pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
- _row_block->selection_vector(), select_size);
- EXPECT_EQ(select_size, 2);
-}
-
-} // namespace doris
diff --git a/be/test/olap/row_block_v2_test.cpp b/be/test/olap/row_block_v2_test.cpp
deleted file mode 100644
index 217a3871fb..0000000000
--- a/be/test/olap/row_block_v2_test.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-
-#include "olap/row_block2.h"
-
-namespace doris {
-
-class TestRowBlockV2 : public testing::Test {
-public:
- TestRowBlockV2() {}
- void SetUp() {}
- void TearDown() {}
-};
-
-void init_tablet_schema(TabletSchemaSPtr tablet_schema, bool is_nullable) {
- TabletSchemaPB tablet_schema_pb;
- {
- // k1: bigint
- {
- ColumnPB* column_1 = tablet_schema_pb.add_column();
- column_1->set_unique_id(1);
- column_1->set_name("k1");
- column_1->set_type("BIGINT");
- column_1->set_is_key(true);
- column_1->set_length(8);
- column_1->set_is_nullable(is_nullable);
- column_1->set_aggregation("NONE");
- }
- // k2: char
- {
- ColumnPB* column_2 = tablet_schema_pb.add_column();
- column_2->set_unique_id(2);
- column_2->set_name("k2");
- column_2->set_type("CHAR");
- column_2->set_is_key(true);
- column_2->set_length(10);
- column_2->set_is_nullable(is_nullable);
- column_2->set_aggregation("NONE");
- }
- // k3: varchar
- {
- ColumnPB* column_3 = tablet_schema_pb.add_column();
- column_3->set_unique_id(3);
- column_3->set_name("k3");
- column_3->set_type("VARCHAR");
- column_3->set_is_key(true);
- column_3->set_length(20);
- column_3->set_is_nullable(is_nullable);
- column_3->set_aggregation("NONE");
- }
- // v1: int
- {
- ColumnPB* column_4 = tablet_schema_pb.add_column();
- column_4->set_unique_id(3);
- column_4->set_name("v1");
- column_4->set_type("INT");
- column_4->set_is_key(false);
- column_4->set_length(4);
- column_4->set_is_nullable(false);
- column_4->set_aggregation("SUM");
- }
- }
- tablet_schema->init_from_pb(tablet_schema_pb);
-}
-
-TEST_F(TestRowBlockV2, test_convert) {
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- init_tablet_schema(tablet_schema, true);
- Schema schema(tablet_schema);
- RowBlockV2 input_block(schema, 1024);
- RowBlock output_block(tablet_schema);
- RowBlockInfo block_info;
- block_info.row_num = 1024;
- block_info.null_supported = true;
- output_block.init(block_info);
- MemPool pool;
- for (int i = 0; i < input_block.capacity(); ++i) {
- RowBlockRow row = input_block.row(i);
-
- // column_1
- row.set_is_null(0, false);
- uint8_t* cell0 = row.mutable_cell_ptr(0);
- (*(uint64_t*)cell0) = i;
-
- // column_2
- uint8_t* buf = pool.allocate(10);
- memset(buf, 'a' + (i % 10), 10);
- Slice str1(buf, 10);
- row.set_is_null(1, false);
- uint8_t* cell1 = row.mutable_cell_ptr(1);
- (*(Slice*)cell1) = str1;
-
- // column_3
- uint8_t* buf2 = pool.allocate(10);
- memset(buf2, 'A' + (i % 10), 10);
- Slice str2(buf2, 10);
- row.set_is_null(2, false);
- uint8_t* cell3 = row.mutable_cell_ptr(2);
- (*(Slice*)cell3) = str2;
-
- // column_4
- row.set_is_null(3, false);
- uint8_t* cell4 = row.mutable_cell_ptr(3);
- (*(uint32_t*)cell4) = 10 * i;
- }
-
- input_block.set_selected_size(5);
- uint16_t* select_vector = input_block.selection_vector();
- for (int i = 0; i < input_block.selected_size(); ++i) {
- // 10, 20, 30, 40, 50
- select_vector[i] = (i + 1) * 10;
- }
-
- RowCursor helper;
- helper.init(tablet_schema);
- auto st = input_block.convert_to_row_block(&helper, &output_block);
- EXPECT_TRUE(st.ok());
- EXPECT_EQ(5, output_block.limit());
- for (int i = 0; i < 5; ++i) {
- char* field1 = output_block.field_ptr(i, 0);
- char* field2 = output_block.field_ptr(i, 1);
- char* field3 = output_block.field_ptr(i, 2);
- char* field4 = output_block.field_ptr(i, 3);
- // test null bit
- EXPECT_FALSE(*reinterpret_cast<bool*>(field1));
- EXPECT_FALSE(*reinterpret_cast<bool*>(field2));
- EXPECT_FALSE(*reinterpret_cast<bool*>(field3));
- EXPECT_FALSE(*reinterpret_cast<bool*>(field4));
-
- uint64_t k1 = *reinterpret_cast<uint64_t*>(field1 + 1);
- EXPECT_EQ((i + 1) * 10, k1);
-
- Slice k2 = *reinterpret_cast<Slice*>(field2 + 1);
- char buf[10];
- memset(buf, 'a' + ((i + 1) * 10) % 10, 10);
- Slice k2_v(buf, 10);
- EXPECT_EQ(k2_v, k2);
-
- Slice k3 = *reinterpret_cast<Slice*>(field3 + 1);
- char buf2[10];
- memset(buf2, 'A' + ((i + 1) * 10) % 10, 10);
- Slice k3_v(buf2, 10);
- EXPECT_EQ(k3_v, k3);
-
- uint32_t v1 = *reinterpret_cast<uint32_t*>(field4 + 1);
- EXPECT_EQ((i + 1) * 10 * 10, v1);
- }
-}
-
-} // namespace doris
diff --git a/be/test/olap/rowset/beta_rowset_test.cpp b/be/test/olap/rowset/beta_rowset_test.cpp
index c880564542..077d46399c 100644
--- a/be/test/olap/rowset/beta_rowset_test.cpp
+++ b/be/test/olap/rowset/beta_rowset_test.cpp
@@ -169,225 +169,6 @@ private:
std::unique_ptr<DataDir> _data_dir;
};
-TEST_F(BetaRowsetTest, BasicFunctionTest) {
- Status s;
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- create_tablet_schema(tablet_schema);
-
- RowsetSharedPtr rowset;
- const int num_segments = 3;
- const uint32_t rows_per_segment = 4096;
- std::vector<uint32_t> segment_num_rows;
- { // write `num_segments * rows_per_segment` rows to rowset
- RowsetWriterContext writer_context;
- create_rowset_writer_context(tablet_schema, &writer_context);
-
- std::unique_ptr<RowsetWriter> rowset_writer;
- s = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer);
- EXPECT_EQ(Status::OK(), s);
-
- RowCursor input_row;
- input_row.init(tablet_schema);
-
- // for segment "i", row "rid"
- // k1 := rid*10 + i
- // k2 := k1 * 10
- // k3 := 4096 * i + rid
- for (int i = 0; i < num_segments; ++i) {
- MemPool mem_pool;
- for (int rid = 0; rid < rows_per_segment; ++rid) {
- uint32_t k1 = rid * 10 + i;
- uint32_t k2 = k1 * 10;
- uint32_t k3 = rows_per_segment * i + rid;
- input_row.set_field_content(0, reinterpret_cast<char*>(&k1), &mem_pool);
- input_row.set_field_content(1, reinterpret_cast<char*>(&k2), &mem_pool);
- input_row.set_field_content(2, reinterpret_cast<char*>(&k3), &mem_pool);
- s = rowset_writer->add_row(input_row);
- EXPECT_EQ(Status::OK(), s);
- }
- s = rowset_writer->flush();
- EXPECT_EQ(Status::OK(), s);
- }
-
- rowset = rowset_writer->build();
- EXPECT_TRUE(rowset != nullptr);
- EXPECT_EQ(num_segments, rowset->rowset_meta()->num_segments());
- EXPECT_EQ(num_segments * rows_per_segment, rowset->rowset_meta()->num_rows());
- }
-
- { // test return ordered results and return k1 and k2
- RowsetReaderContext reader_context;
- reader_context.tablet_schema = tablet_schema;
- reader_context.need_ordered_result = true;
- std::vector<uint32_t> return_columns = {0, 1};
- reader_context.return_columns = &return_columns;
- reader_context.stats = &_stats;
-
- // without predicates
- {
- RowsetReaderSharedPtr rowset_reader;
- create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
- RowBlock* output_block;
- uint32_t num_rows_read = 0;
- while ((s = rowset_reader->next_block(&output_block)) == Status::OK()) {
- EXPECT_TRUE(output_block != nullptr);
- EXPECT_GT(output_block->row_num(), 0);
- EXPECT_EQ(0, output_block->pos());
- EXPECT_EQ(output_block->row_num(), output_block->limit());
- EXPECT_EQ(return_columns, output_block->row_block_info().column_ids);
- // after sort merge segments, k1 will be 0, 1, 2, 10, 11, 12, 20, 21, 22, ..., 40950, 40951, 40952
- for (int i = 0; i < output_block->row_num(); ++i) {
- char* field1 = output_block->field_ptr(i, 0);
- char* field2 = output_block->field_ptr(i, 1);
- // test null bit
- EXPECT_FALSE(*reinterpret_cast<bool*>(field1));
- EXPECT_FALSE(*reinterpret_cast<bool*>(field2));
- uint32_t k1 = *reinterpret_cast<uint32_t*>(field1 + 1);
- uint32_t k2 = *reinterpret_cast<uint32_t*>(field2 + 1);
- EXPECT_EQ(k1 * 10, k2);
-
- int rid = num_rows_read / 3;
- int seg_id = num_rows_read % 3;
- EXPECT_EQ(rid * 10 + seg_id, k1);
- num_rows_read++;
- }
- }
- EXPECT_EQ(Status::Error<END_OF_FILE>(), s);
- EXPECT_TRUE(output_block == nullptr);
- EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read);
- EXPECT_TRUE(rowset_reader->get_segment_num_rows(&segment_num_rows).ok());
- EXPECT_EQ(segment_num_rows.size(), num_segments);
- for (auto i = 0; i < num_segments; i++) {
- EXPECT_EQ(segment_num_rows[i], rows_per_segment);
- }
- }
-
- // merge segments with predicates
- {
- std::vector<ColumnPredicate*> column_predicates;
- // column predicate: k1 = 10
- std::unique_ptr<ColumnPredicate> predicate(
- new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(0, 10));
- column_predicates.emplace_back(predicate.get());
- reader_context.predicates = &column_predicates;
- RowsetReaderSharedPtr rowset_reader;
- create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
- RowBlock* output_block;
- uint32_t num_rows_read = 0;
- while ((s = rowset_reader->next_block(&output_block)) == Status::OK()) {
- EXPECT_TRUE(output_block != nullptr);
- EXPECT_EQ(1, output_block->row_num());
- EXPECT_EQ(0, output_block->pos());
- EXPECT_EQ(output_block->row_num(), output_block->limit());
- EXPECT_EQ(return_columns, output_block->row_block_info().column_ids);
- // after sort merge segments, k1 will be 10
- for (int i = 0; i < output_block->row_num(); ++i) {
- char* field1 = output_block->field_ptr(i, 0);
- char* field2 = output_block->field_ptr(i, 1);
- // test null bit
- EXPECT_FALSE(*reinterpret_cast<bool*>(field1));
- EXPECT_FALSE(*reinterpret_cast<bool*>(field2));
- uint32_t k1 = *reinterpret_cast<uint32_t*>(field1 + 1);
- uint32_t k2 = *reinterpret_cast<uint32_t*>(field2 + 1);
- EXPECT_EQ(10, k1);
- EXPECT_EQ(k1 * 10, k2);
- num_rows_read++;
- }
- }
- EXPECT_EQ(Status::Error<END_OF_FILE>(), s);
- EXPECT_TRUE(output_block == nullptr);
- EXPECT_EQ(1, num_rows_read);
- EXPECT_TRUE(rowset_reader->get_segment_num_rows(&segment_num_rows).ok());
- EXPECT_EQ(segment_num_rows.size(), num_segments);
- for (auto i = 0; i < num_segments; i++) {
- EXPECT_EQ(segment_num_rows[i], rows_per_segment);
- }
- }
- }
-
- { // test return unordered data and only k3
- RowsetReaderContext reader_context;
- reader_context.tablet_schema = tablet_schema;
- reader_context.need_ordered_result = false;
- std::vector<uint32_t> return_columns = {2};
- reader_context.return_columns = &return_columns;
- reader_context.stats = &_stats;
-
- // without predicate
- {
- RowsetReaderSharedPtr rowset_reader;
- create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
-
- RowBlock* output_block;
- uint32_t num_rows_read = 0;
- while ((s = rowset_reader->next_block(&output_block)) == Status::OK()) {
- EXPECT_TRUE(output_block != nullptr);
- EXPECT_GT(output_block->row_num(), 0);
- EXPECT_EQ(0, output_block->pos());
- EXPECT_EQ(output_block->row_num(), output_block->limit());
- EXPECT_EQ(return_columns, output_block->row_block_info().column_ids);
- // for unordered result, k3 will be 0, 1, 2, ..., 4096*3-1
- for (int i = 0; i < output_block->row_num(); ++i) {
- char* field3 = output_block->field_ptr(i, 2);
- // test null bit
- EXPECT_FALSE(*reinterpret_cast<bool*>(field3));
- uint32_t k3 = *reinterpret_cast<uint32_t*>(field3 + 1);
- EXPECT_EQ(num_rows_read, k3);
- num_rows_read++;
- }
- }
- EXPECT_EQ(Status::Error<END_OF_FILE>(), s);
- EXPECT_TRUE(output_block == nullptr);
- EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read);
- EXPECT_TRUE(rowset_reader->get_segment_num_rows(&segment_num_rows).ok());
- EXPECT_EQ(segment_num_rows.size(), num_segments);
- for (auto i = 0; i < num_segments; i++) {
- EXPECT_EQ(segment_num_rows[i], rows_per_segment);
- }
- }
-
- // with predicate
- {
- std::vector<ColumnPredicate*> column_predicates;
- // column predicate: k3 < 100
- ColumnPredicate* predicate =
- new ComparisonPredicateBase<TYPE_INT, PredicateType::LT>(2, 100);
- column_predicates.emplace_back(predicate);
- reader_context.predicates = &column_predicates;
- RowsetReaderSharedPtr rowset_reader;
- create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
-
- RowBlock* output_block;
- uint32_t num_rows_read = 0;
- while ((s = rowset_reader->next_block(&output_block)) == Status::OK()) {
- EXPECT_TRUE(output_block != nullptr);
- EXPECT_LE(output_block->row_num(), 100);
- EXPECT_EQ(0, output_block->pos());
- EXPECT_EQ(output_block->row_num(), output_block->limit());
- EXPECT_EQ(return_columns, output_block->row_block_info().column_ids);
- // for unordered result, k3 will be 0, 1, 2, ..., 99
- for (int i = 0; i < output_block->row_num(); ++i) {
- char* field3 = output_block->field_ptr(i, 2);
- // test null bit
- EXPECT_FALSE(*reinterpret_cast<bool*>(field3));
- uint32_t k3 = *reinterpret_cast<uint32_t*>(field3 + 1);
- EXPECT_EQ(num_rows_read, k3);
- num_rows_read++;
- }
- }
- EXPECT_EQ(Status::Error<END_OF_FILE>(), s);
- EXPECT_TRUE(output_block == nullptr);
- EXPECT_EQ(100, num_rows_read);
- delete predicate;
- EXPECT_TRUE(rowset_reader->get_segment_num_rows(&segment_num_rows).ok());
- EXPECT_EQ(segment_num_rows.size(), num_segments);
- for (auto i = 0; i < num_segments; i++) {
- EXPECT_EQ(segment_num_rows[i], rows_per_segment);
- }
- }
- }
-}
-
class S3ClientMock : public Aws::S3::S3Client {
S3ClientMock() {}
S3ClientMock(const Aws::Auth::AWSCredentials& credentials,
diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp b/be/test/olap/rowset/segment_v2/segment_test.cpp
deleted file mode 100644
index 9ce5b31415..0000000000
--- a/be/test/olap/rowset/segment_v2/segment_test.cpp
+++ /dev/null
@@ -1,1176 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "olap/rowset/segment_v2/segment.h"
-
-#include <gtest/gtest.h>
-
-#include <filesystem>
-#include <functional>
-#include <iostream>
-#include <memory>
-#include <vector>
-
-#include "io/fs/file_system.h"
-#include "io/fs/file_writer.h"
-#include "io/fs/local_file_system.h"
-#include "olap/comparison_predicate.h"
-#include "olap/data_dir.h"
-#include "olap/in_list_predicate.h"
-#include "olap/olap_common.h"
-#include "olap/row_block2.h"
-#include "olap/row_cursor.h"
-#include "olap/rowset/segment_v2/segment_writer.h"
-#include "olap/storage_engine.h"
-#include "olap/tablet_schema.h"
-#include "olap/tablet_schema_helper.h"
-#include "runtime/mem_pool.h"
-#include "testutil/test_util.h"
-#include "util/file_utils.h"
-#include "util/key_util.h"
-
-namespace doris {
-using namespace ErrorCode;
-namespace segment_v2 {
-
-using std::string;
-using std::shared_ptr;
-
-using std::vector;
-
-using ValueGenerator = std::function<void(size_t rid, int cid, int block_id, RowCursorCell& cell)>;
-
-// 0, 1, 2, 3
-// 10, 11, 12, 13
-// 20, 21, 22, 23
-static void DefaultIntGenerator(size_t rid, int cid, int block_id, RowCursorCell& cell) {
- cell.set_not_null();
- *(int*)cell.mutable_cell_ptr() = rid * 10 + cid;
-}
-
-static bool column_contains_index(ColumnMetaPB column_meta, ColumnIndexTypePB type) {
- for (int i = 0; i < column_meta.indexes_size(); ++i) {
- if (column_meta.indexes(i).type() == type) {
- return true;
- }
- }
- return false;
-}
-
-static StorageEngine* k_engine = nullptr;
-
-class SegmentReaderWriterTest : public ::testing::Test {
-protected:
- void SetUp() override {
- if (FileUtils::check_exist(kSegmentDir)) {
- EXPECT_TRUE(FileUtils::remove_all(kSegmentDir).ok());
- }
- EXPECT_TRUE(FileUtils::create_dir(kSegmentDir).ok());
-
- doris::EngineOptions options;
- k_engine = new StorageEngine(options);
- StorageEngine::_s_instance = k_engine;
- }
-
- void TearDown() override {
- if (FileUtils::check_exist(kSegmentDir)) {
- EXPECT_TRUE(FileUtils::remove_all(kSegmentDir).ok());
- }
- if (k_engine != nullptr) {
- k_engine->stop();
- delete k_engine;
- k_engine = nullptr;
- }
- }
-
- TabletSchemaSPtr create_schema(const std::vector<TabletColumn>& columns,
- KeysType keys_type = DUP_KEYS, int num_custom_key_columns = -1) {
- TabletSchemaSPtr res = std::make_shared<TabletSchema>();
-
- for (auto& col : columns) {
- res->append_column(col);
- }
- res->_num_short_key_columns =
- num_custom_key_columns != -1 ? num_custom_key_columns : res->num_key_columns();
- res->_keys_type = keys_type;
- return res;
- }
-
- void build_segment(SegmentWriterOptions opts, TabletSchemaSPtr build_schema,
- TabletSchemaSPtr query_schema, size_t nrows, const ValueGenerator& generator,
- shared_ptr<Segment>* res) {
- static int seg_id = 0;
- // must use unique filename for each segment, otherwise page cache kicks in and produces
- // the wrong answer (it use (filename,offset) as cache key)
- std::string filename = fmt::format("seg_{}.dat", seg_id++);
- std::string path = fmt::format("{}/{}", kSegmentDir, filename);
- auto fs = io::global_local_filesystem();
-
- io::FileWriterPtr file_writer;
- Status st = fs->create_file(path, &file_writer);
- EXPECT_TRUE(st.ok());
- DataDir data_dir(kSegmentDir);
- data_dir.init();
- SegmentWriter writer(file_writer.get(), 0, build_schema, &data_dir, INT32_MAX, opts);
- st = writer.init();
- EXPECT_TRUE(st.ok());
-
- RowCursor row;
- auto olap_st = row.init(build_schema);
- EXPECT_EQ(Status::OK(), olap_st);
-
- for (size_t rid = 0; rid < nrows; ++rid) {
- for (int cid = 0; cid < build_schema->num_columns(); ++cid) {
- int row_block_id = rid / opts.num_rows_per_block;
- RowCursorCell cell = row.cell(cid);
- generator(rid, cid, row_block_id, cell);
- }
- EXPECT_TRUE(writer.append_row(row).ok());
- }
-
- uint64_t file_size, index_size;
- st = writer.finalize(&file_size, &index_size);
- EXPECT_TRUE(st.ok());
- EXPECT_TRUE(file_writer->close().ok());
- // Check min/max key generation
- // Create min row
- for (int cid = 0; cid < build_schema->num_key_columns(); ++cid) {
- RowCursorCell cell = row.cell(cid);
- generator(0, cid, 0 / opts.num_rows_per_block, cell);
- }
- std::string min_encoded_key;
- encode_key<RowCursor, true, true>(&min_encoded_key, row, build_schema->num_key_columns());
- EXPECT_EQ(min_encoded_key, writer.min_encoded_key().to_string());
- // Create max row
- for (int cid = 0; cid < build_schema->num_key_columns(); ++cid) {
- RowCursorCell cell = row.cell(cid);
- generator(nrows - 1, cid, (nrows - 1) / opts.num_rows_per_block, cell);
- }
- std::string max_encoded_key;
- encode_key<RowCursor, true, true>(&max_encoded_key, row, build_schema->num_key_columns());
- EXPECT_EQ(max_encoded_key, writer.max_encoded_key().to_string());
-
- st = Segment::open(fs, path, "", 0, {}, query_schema, res);
- EXPECT_TRUE(st.ok());
- EXPECT_EQ(nrows, (*res)->num_rows());
- }
-
-public:
- const std::string kSegmentDir = "./ut_dir/segment_test";
-};
-
-TEST_F(SegmentReaderWriterTest, normal) {
- std::vector<KeysType> keys_type_vec = {DUP_KEYS, AGG_KEYS, UNIQUE_KEYS};
- std::vector<bool> enable_unique_key_merge_on_write_vec = {false, true};
- for (auto keys_type : keys_type_vec) {
- for (auto enable_unique_key_merge_on_write : enable_unique_key_merge_on_write_vec) {
- TabletSchemaSPtr tablet_schema =
- create_schema({create_int_key(1), create_int_key(2), create_int_value(3),
- create_int_value(4)},
- keys_type);
- SegmentWriterOptions opts;
- opts.enable_unique_key_merge_on_write = enable_unique_key_merge_on_write;
- opts.num_rows_per_block = 10;
-
- shared_ptr<Segment> segment;
- build_segment(opts, tablet_schema, tablet_schema, 4096, DefaultIntGenerator, &segment);
-
- // reader
- {
- Schema schema(tablet_schema);
- OlapReaderStatistics stats;
- // scan all rows
- {
- StorageReadOptions read_opts;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 1024);
-
- int left = 4096;
-
- int rowid = 0;
- while (left > 0) {
- int rows_read = left > 1024 ? 1024 : left;
- block.clear();
- EXPECT_TRUE(iter->next_batch(&block).ok());
- EXPECT_EQ(DEL_NOT_SATISFIED, block.delete_state());
- EXPECT_EQ(rows_read, block.num_rows());
- left -= rows_read;
-
- for (int j = 0; j < block.schema()->column_ids().size(); ++j) {
- auto cid = block.schema()->column_ids()[j];
- auto column_block = block.column_block(j);
- for (int i = 0; i < rows_read; ++i) {
- int rid = rowid + i;
- EXPECT_FALSE(column_block.is_null(i));
- EXPECT_EQ(rid * 10 + cid, *(int*)column_block.cell_ptr(i));
- }
- }
- rowid += rows_read;
- }
- }
- // test seek, key, not exits
- {
- // lower bound
- std::unique_ptr<RowCursor> lower_bound(new RowCursor());
- lower_bound->init(tablet_schema, 2);
- {
- auto cell = lower_bound->cell(0);
- cell.set_not_null();
- *(int*)cell.mutable_cell_ptr() = 100;
- }
- {
- auto cell = lower_bound->cell(1);
- cell.set_not_null();
- *(int*)cell.mutable_cell_ptr() = 100;
- }
-
- // upper bound
- std::unique_ptr<RowCursor> upper_bound(new RowCursor());
- upper_bound->init(tablet_schema, 1);
- {
- auto cell = upper_bound->cell(0);
- cell.set_not_null();
- *(int*)cell.mutable_cell_ptr() = 200;
- }
-
- StorageReadOptions read_opts;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
- read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(),
- true);
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 100);
- EXPECT_TRUE(iter->next_batch(&block).ok());
- EXPECT_EQ(DEL_NOT_SATISFIED, block.delete_state());
- EXPECT_EQ(11, block.num_rows());
- auto column_block = block.column_block(0);
- for (int i = 0; i < 11; ++i) {
- EXPECT_EQ(100 + i * 10, *(int*)column_block.cell_ptr(i));
- }
- }
- // test seek, existing key
- {
- // lower bound
- std::unique_ptr<RowCursor> lower_bound(new RowCursor());
- lower_bound->init(tablet_schema, 2);
- {
- auto cell = lower_bound->cell(0);
- cell.set_not_null();
- *(int*)cell.mutable_cell_ptr() = 100;
- }
- {
- auto cell = lower_bound->cell(1);
- cell.set_not_null();
- *(int*)cell.mutable_cell_ptr() = 101;
- }
-
- // upper bound
- std::unique_ptr<RowCursor> upper_bound(new RowCursor());
- upper_bound->init(tablet_schema, 2);
- {
- auto cell = upper_bound->cell(0);
- cell.set_not_null();
- *(int*)cell.mutable_cell_ptr() = 200;
- }
- {
- auto cell = upper_bound->cell(1);
- cell.set_not_null();
- *(int*)cell.mutable_cell_ptr() = 201;
- }
-
- // include upper key
- StorageReadOptions read_opts;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
- read_opts.key_ranges.emplace_back(lower_bound.get(), true, upper_bound.get(),
- true);
- std::unique_ptr<RowwiseIterator> iter;
- segment->new_iterator(schema, read_opts, &iter);
-
- RowBlockV2 block(schema, 100);
- EXPECT_TRUE(iter->next_batch(&block).ok());
- EXPECT_EQ(DEL_NOT_SATISFIED, block.delete_state());
- EXPECT_EQ(11, block.num_rows());
- auto column_block = block.column_block(0);
- for (int i = 0; i < 11; ++i) {
- EXPECT_EQ(100 + i * 10, *(int*)column_block.cell_ptr(i));
- }
-
- // not include upper key
- StorageReadOptions read_opts1;
- read_opts1.stats = &stats;
- read_opts1.tablet_schema = tablet_schema;
- read_opts1.key_ranges.emplace_back(lower_bound.get(), true, upper_bound.get(),
- false);
- std::unique_ptr<RowwiseIterator> iter1;
- segment->new_iterator(schema, read_opts1, &iter1);
-
- RowBlockV2 block1(schema, 100);
- EXPECT_TRUE(iter1->next_batch(&block1).ok());
- EXPECT_EQ(DEL_NOT_SATISFIED, block1.delete_state());
- EXPECT_EQ(10, block1.num_rows());
- }
- // test seek, key
- {
- // lower bound
- std::unique_ptr<RowCursor> lower_bound(new RowCursor());
- lower_bound->init(tablet_schema, 1);
- {
- auto cell = lower_bound->cell(0);
- cell.set_not_null();
- *(int*)cell.mutable_cell_ptr() = 40970;
- }
-
- StorageReadOptions read_opts;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
- read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false);
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 100);
- EXPECT_TRUE(iter->next_batch(&block).is<END_OF_FILE>());
- EXPECT_EQ(0, block.num_rows());
- }
- // test seek, key (-2, -1)
- {
- // lower bound
- std::unique_ptr<RowCursor> lower_bound(new RowCursor());
- lower_bound->init(tablet_schema, 1);
- {
- auto cell = lower_bound->cell(0);
- cell.set_not_null();
- *(int*)cell.mutable_cell_ptr() = -2;
- }
-
- std::unique_ptr<RowCursor> upper_bound(new RowCursor());
- upper_bound->init(tablet_schema, 1);
- {
- auto cell = upper_bound->cell(0);
- cell.set_not_null();
- *(int*)cell.mutable_cell_ptr() = -1;
- }
-
- StorageReadOptions read_opts;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
- read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(),
- false);
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 100);
- EXPECT_TRUE(iter->next_batch(&block).is<END_OF_FILE>());
- EXPECT_EQ(0, block.num_rows());
- }
- }
- }
- }
-}
-
-TEST_F(SegmentReaderWriterTest, LazyMaterialization) {
- TabletSchemaSPtr tablet_schema = create_schema({create_int_key(1), create_int_value(2)});
- ValueGenerator data_gen = [](size_t rid, int cid, int block_id, RowCursorCell& cell) {
- cell.set_not_null();
- if (cid == 0) {
- *(int*)(cell.mutable_cell_ptr()) = rid;
- } else if (cid == 1) {
- *(int*)(cell.mutable_cell_ptr()) = rid * 10;
- }
- };
-
- {
- shared_ptr<Segment> segment;
- build_segment(SegmentWriterOptions(), tablet_schema, tablet_schema, 100, data_gen,
- &segment);
- {
- // lazy enabled when predicate is subset of returned columns:
- // select c1, c2 where c2 = 30;
- Schema read_schema(tablet_schema);
- std::unique_ptr<ColumnPredicate> predicate(
- new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(1, 30));
- const std::vector<ColumnPredicate*> predicates = {predicate.get()};
-
- OlapReaderStatistics stats;
- StorageReadOptions read_opts;
- read_opts.column_predicates = predicates;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
-
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(read_schema, read_opts, &iter).ok());
-
- RowBlockV2 block(read_schema, 1024);
- EXPECT_TRUE(iter->next_batch(&block).ok());
- EXPECT_TRUE(iter->is_lazy_materialization_read());
- EXPECT_EQ(1, block.selected_size());
- EXPECT_EQ(99, stats.rows_vec_cond_filtered);
- auto row = block.row(block.selection_vector()[0]);
- EXPECT_EQ("[3,30]", row.debug_string());
- }
- {
- // lazy disabled when all return columns have predicates:
- // select c1, c2 where c1 = 10 and c2 = 100;
- Schema read_schema(tablet_schema);
- std::unique_ptr<ColumnPredicate> p0(
- new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(0, 10));
- std::unique_ptr<ColumnPredicate> p1(
- new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(1, 100));
- const std::vector<ColumnPredicate*> predicates = {p0.get(), p1.get()};
-
- OlapReaderStatistics stats;
- StorageReadOptions read_opts;
- read_opts.column_predicates = predicates;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
-
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(read_schema, read_opts, &iter).ok());
-
- RowBlockV2 block(read_schema, 1024);
- EXPECT_TRUE(iter->next_batch(&block).ok());
- EXPECT_FALSE(iter->is_lazy_materialization_read());
- EXPECT_EQ(1, block.selected_size());
- EXPECT_EQ(99, stats.rows_vec_cond_filtered);
- auto row = block.row(block.selection_vector()[0]);
- EXPECT_EQ("[10,100]", row.debug_string());
- }
- {
- // lazy disabled when no predicate:
- // select c2
- std::vector<ColumnId> read_cols = {1};
- Schema read_schema(tablet_schema->columns(), read_cols);
- OlapReaderStatistics stats;
- StorageReadOptions read_opts;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
-
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(read_schema, read_opts, &iter).ok());
-
- RowBlockV2 block(read_schema, 1024);
- EXPECT_TRUE(iter->next_batch(&block).ok());
- EXPECT_FALSE(iter->is_lazy_materialization_read());
- EXPECT_EQ(100, block.selected_size());
- for (int i = 0; i < block.selected_size(); ++i) {
- auto row = block.row(block.selection_vector()[i]);
- EXPECT_EQ(strings::Substitute("[$0]", i * 10), row.debug_string());
- }
- }
- }
-
- {
- tablet_schema = create_schema({create_int_key(1, true, false, true), create_int_value(2)});
- shared_ptr<Segment> segment;
- SegmentWriterOptions write_opts;
- build_segment(write_opts, tablet_schema, tablet_schema, 100, data_gen, &segment);
- EXPECT_TRUE(column_contains_index(segment->footer().columns(0), BITMAP_INDEX));
- {
- // lazy disabled when all predicates are removed by bitmap index:
- // select c1, c2 where c2 = 30;
- Schema read_schema(tablet_schema);
- std::unique_ptr<ColumnPredicate> predicate(
- new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(0, 20));
- const std::vector<ColumnPredicate*> predicates = {predicate.get()};
-
- OlapReaderStatistics stats;
- StorageReadOptions read_opts;
- read_opts.column_predicates = predicates;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
-
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(read_schema, read_opts, &iter).ok());
-
- RowBlockV2 block(read_schema, 1024);
- EXPECT_TRUE(iter->next_batch(&block).ok());
- EXPECT_FALSE(iter->is_lazy_materialization_read());
- EXPECT_EQ(1, block.selected_size());
- EXPECT_EQ(99, stats.rows_bitmap_index_filtered);
- EXPECT_EQ(0, stats.rows_vec_cond_filtered);
- auto row = block.row(block.selection_vector()[0]);
- EXPECT_EQ("[20,200]", row.debug_string());
- }
- }
-}
-
-TEST_F(SegmentReaderWriterTest, TestIndex) {
- TabletSchemaSPtr tablet_schema =
- create_schema({create_int_key(1), create_int_key(2, true, true), create_int_key(3),
- create_int_value(4)});
-
- SegmentWriterOptions opts;
- opts.num_rows_per_block = 10;
-
- std::shared_ptr<Segment> segment;
- // 0, 1, 2, 3
- // 10, 11, 12, 13
- // 20, 21, 22, 23
- // ...
- // 64k int will generate 4 pages
- build_segment(
- opts, tablet_schema, tablet_schema, 64 * 1024,
- [](size_t rid, int cid, int block_id, RowCursorCell& cell) {
- cell.set_not_null();
- if (rid >= 16 * 1024 && rid < 32 * 1024) {
- // make second page all rows equal
- *(int*)cell.mutable_cell_ptr() = 164000 + cid;
-
- } else {
- *(int*)cell.mutable_cell_ptr() = rid * 10 + cid;
- }
- },
- &segment);
-}
-
-TEST_F(SegmentReaderWriterTest, estimate_segment_size) {
- size_t num_rows_per_block = 10;
-
- std::shared_ptr<TabletSchema> tablet_schema(new TabletSchema());
- tablet_schema->_num_columns = 4;
- tablet_schema->_num_key_columns = 3;
- tablet_schema->_num_short_key_columns = 2;
- tablet_schema->_num_rows_per_row_block = num_rows_per_block;
- tablet_schema->_cols.push_back(create_int_key(1));
- tablet_schema->_cols.push_back(create_int_key(2));
- tablet_schema->_cols.push_back(create_int_key(3));
- tablet_schema->_cols.push_back(create_int_value(4));
-
- SegmentWriterOptions opts;
- opts.num_rows_per_block = num_rows_per_block;
-
- std::string fname = kSegmentDir + "/int_case";
- auto fs = io::global_local_filesystem();
-
- io::FileWriterPtr file_writer;
- Status st = fs->create_file(fname, &file_writer);
- EXPECT_TRUE(st.ok()) << st.to_string();
- DataDir data_dir(kSegmentDir);
- data_dir.init();
- SegmentWriter writer(file_writer.get(), 0, tablet_schema, &data_dir, INT32_MAX, opts);
- st = writer.init();
- EXPECT_TRUE(st.ok()) << st.to_string();
-
- RowCursor row;
- auto olap_st = row.init(tablet_schema);
- EXPECT_EQ(Status::OK(), olap_st);
-
- // 0, 1, 2, 3
- // 10, 11, 12, 13
- // 20, 21, 22, 23
- for (int i = 0; i < LOOP_LESS_OR_MORE(1024, 1048576); ++i) {
- for (int j = 0; j < 4; ++j) {
- auto cell = row.cell(j);
- cell.set_not_null();
- *(int*)cell.mutable_cell_ptr() = i * 10 + j;
- }
- writer.append_row(row);
- }
-
- uint32_t segment_size = writer.estimate_segment_size();
- LOG(INFO) << "estimate segment size is:" << segment_size;
-
- uint64_t file_size = 0;
- uint64_t index_size;
- EXPECT_TRUE(writer.finalize(&file_size, &index_size).ok());
- EXPECT_TRUE(file_writer->close().ok());
-
- file_size = std::filesystem::file_size(fname);
- LOG(INFO) << "segment file size is:" << file_size;
-
- EXPECT_NE(segment_size, 0);
-}
-
-TEST_F(SegmentReaderWriterTest, TestDefaultValueColumn) {
- std::vector<TabletColumn> columns = {create_int_key(1), create_int_key(2), create_int_value(3),
- create_int_value(4)};
- TabletSchemaSPtr build_schema = create_schema(columns);
-
- // add a column with null default value
- {
- std::vector<TabletColumn> read_columns = columns;
- read_columns.push_back(create_int_value(5, OLAP_FIELD_AGGREGATION_SUM, true, "NULL"));
- TabletSchemaSPtr query_schema = create_schema(read_columns);
-
- std::shared_ptr<Segment> segment;
- build_segment(SegmentWriterOptions(), build_schema, query_schema, 4096, DefaultIntGenerator,
- &segment);
-
- Schema schema(query_schema);
- OlapReaderStatistics stats;
- // scan all rows
- {
- StorageReadOptions read_opts;
- read_opts.stats = &stats;
- read_opts.tablet_schema = query_schema;
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 1024);
-
- int left = 4096;
-
- int rowid = 0;
- while (left > 0) {
- int rows_read = left > 1024 ? 1024 : left;
- block.clear();
- EXPECT_TRUE(iter->next_batch(&block).ok());
- EXPECT_EQ(DEL_NOT_SATISFIED, block.delete_state());
- EXPECT_EQ(rows_read, block.num_rows());
- left -= rows_read;
-
- for (int j = 0; j < block.schema()->column_ids().size(); ++j) {
- auto cid = block.schema()->column_ids()[j];
- auto column_block = block.column_block(j);
- for (int i = 0; i < rows_read; ++i) {
- int rid = rowid + i;
- if (cid == 4) {
- EXPECT_TRUE(column_block.is_null(i));
- } else {
- EXPECT_FALSE(column_block.is_null(i));
- EXPECT_EQ(rid * 10 + cid, *(int*)column_block.cell_ptr(i));
- }
- }
- }
- rowid += rows_read;
- }
- }
- }
-
- // add a column with non-null default value
- {
- std::vector<TabletColumn> read_columns = columns;
- read_columns.push_back(create_int_value(5, OLAP_FIELD_AGGREGATION_SUM, true, "10086"));
- TabletSchemaSPtr query_schema = create_schema(read_columns);
-
- std::shared_ptr<Segment> segment;
- build_segment(SegmentWriterOptions(), build_schema, query_schema, 4096, DefaultIntGenerator,
- &segment);
-
- Schema schema(query_schema);
- OlapReaderStatistics stats;
- // scan all rows
- {
- StorageReadOptions read_opts;
- read_opts.stats = &stats;
- read_opts.tablet_schema = query_schema;
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 1024);
-
- int left = 4096;
-
- int rowid = 0;
- while (left > 0) {
- int rows_read = left > 1024 ? 1024 : left;
- block.clear();
- EXPECT_TRUE(iter->next_batch(&block).ok());
- EXPECT_EQ(rows_read, block.num_rows());
- left -= rows_read;
-
- for (int j = 0; j < block.schema()->column_ids().size(); ++j) {
- auto cid = block.schema()->column_ids()[j];
- auto column_block = block.column_block(j);
- for (int i = 0; i < rows_read; ++i) {
- int rid = rowid + i;
- if (cid == 4) {
- EXPECT_FALSE(column_block.is_null(i));
- EXPECT_EQ(10086, *(int*)column_block.cell_ptr(i));
- } else {
- EXPECT_FALSE(column_block.is_null(i));
- EXPECT_EQ(rid * 10 + cid, *(int*)column_block.cell_ptr(i));
- }
- }
- }
- rowid += rows_read;
- }
- }
- }
-}
-
-TEST_F(SegmentReaderWriterTest, TestStringDict) {
- size_t num_rows_per_block = 10;
- MemPool pool;
-
- std::shared_ptr<TabletSchema> tablet_schema(new TabletSchema());
- tablet_schema->_num_short_key_columns = 2;
- tablet_schema->_num_rows_per_row_block = num_rows_per_block;
- tablet_schema->append_column(create_char_key(1));
- tablet_schema->append_column(create_char_key(2));
- tablet_schema->append_column(create_varchar_key(3));
- tablet_schema->append_column(create_varchar_key(4));
-
- SegmentWriterOptions opts;
- opts.num_rows_per_block = num_rows_per_block;
-
- std::string fname = kSegmentDir + "/string_case";
- auto fs = io::global_local_filesystem();
-
- io::FileWriterPtr file_writer;
- Status st = fs->create_file(fname, &file_writer);
- EXPECT_TRUE(st.ok());
- DataDir data_dir(kSegmentDir);
- data_dir.init();
- SegmentWriter writer(file_writer.get(), 0, tablet_schema, &data_dir, INT32_MAX, opts);
- st = writer.init();
- EXPECT_TRUE(st.ok());
-
- RowCursor row;
- auto olap_st = row.init(tablet_schema);
- EXPECT_EQ(Status::OK(), olap_st);
-
- // 0, 1, 2, 3
- // 10, 11, 12, 13
- // 20, 21, 22, 23
- // convert int to string
- for (int i = 0; i < 4096; ++i) {
- for (int j = 0; j < 4; ++j) {
- auto cell = row.cell(j);
- cell.set_not_null();
- set_column_value_by_type(tablet_schema->_cols[j]._type, i * 10 + j,
- (char*)cell.mutable_cell_ptr(), &pool,
- tablet_schema->_cols[j]._length);
- }
- Status status = writer.append_row(row);
- EXPECT_TRUE(status.ok());
- }
-
- uint64_t file_size = 0;
- uint64_t index_size;
- EXPECT_TRUE(writer.finalize(&file_size, &index_size).ok());
- EXPECT_TRUE(file_writer->close().ok());
-
- {
- std::shared_ptr<Segment> segment;
- st = Segment::open(fs, fname, "", 0, {}, tablet_schema, &segment);
- EXPECT_TRUE(st.ok());
- EXPECT_EQ(4096, segment->num_rows());
- Schema schema(tablet_schema);
- OlapReaderStatistics stats;
- // scan all rows
- {
- StorageReadOptions read_opts;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 1024);
-
- int left = 4096;
- int rowid = 0;
-
- while (left > 0) {
- int rows_read = left > 1024 ? 1024 : left;
- block.clear();
- st = iter->next_batch(&block);
- EXPECT_TRUE(st.ok());
- EXPECT_EQ(DEL_NOT_SATISFIED, block.delete_state());
- EXPECT_EQ(rows_read, block.num_rows());
- left -= rows_read;
-
- for (int j = 0; j < block.schema()->column_ids().size(); ++j) {
- auto cid = block.schema()->column_ids()[j];
- auto column_block = block.column_block(j);
- for (int i = 0; i < rows_read; ++i) {
- int rid = rowid + i;
- EXPECT_FALSE(column_block.is_null(i));
- const Slice* actual =
- reinterpret_cast<const Slice*>(column_block.cell_ptr(i));
-
- Slice expect;
- set_column_value_by_type(tablet_schema->_cols[j]._type, rid * 10 + cid,
- reinterpret_cast<char*>(&expect), &pool,
- tablet_schema->_cols[j]._length);
- EXPECT_EQ(expect.to_string(), actual->to_string());
- }
- }
- rowid += rows_read;
- }
- }
-
- // test seek, key
- {
- // lower bound
- std::unique_ptr<RowCursor> lower_bound(new RowCursor());
- lower_bound->init(tablet_schema, 1);
- {
- auto cell = lower_bound->cell(0);
- cell.set_not_null();
- set_column_value_by_type(OLAP_FIELD_TYPE_CHAR, 40970,
- (char*)cell.mutable_cell_ptr(), &pool,
- tablet_schema->_cols[0]._length);
- }
-
- StorageReadOptions read_opts;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
- read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false);
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 100);
- st = iter->next_batch(&block);
- EXPECT_TRUE(st.is<END_OF_FILE>());
- EXPECT_EQ(0, block.num_rows());
- }
-
- // test seek, key (-2, -1)
- {
- // lower bound
- std::unique_ptr<RowCursor> lower_bound(new RowCursor());
- lower_bound->init(tablet_schema, 1);
- {
- auto cell = lower_bound->cell(0);
- cell.set_not_null();
- set_column_value_by_type(OLAP_FIELD_TYPE_CHAR, -2, (char*)cell.mutable_cell_ptr(),
- &pool, tablet_schema->_cols[0]._length);
- }
-
- std::unique_ptr<RowCursor> upper_bound(new RowCursor());
- upper_bound->init(tablet_schema, 1);
- {
- auto cell = upper_bound->cell(0);
- cell.set_not_null();
- set_column_value_by_type(OLAP_FIELD_TYPE_CHAR, -1, (char*)cell.mutable_cell_ptr(),
- &pool, tablet_schema->_cols[0]._length);
- }
-
- StorageReadOptions read_opts;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
- read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), false);
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 100);
- st = iter->next_batch(&block);
- EXPECT_TRUE(st.is<END_OF_FILE>());
- EXPECT_EQ(0, block.num_rows());
- }
- }
-}
-
-TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) {
- TabletSchemaSPtr tablet_schema = create_schema({create_int_key(1, true, false, true),
- create_int_key(2, true, false, true),
- create_int_value(3), create_int_value(4)});
-
- SegmentWriterOptions opts;
- shared_ptr<Segment> segment;
- build_segment(opts, tablet_schema, tablet_schema, 4096, DefaultIntGenerator, &segment);
- EXPECT_TRUE(column_contains_index(segment->footer().columns(0), BITMAP_INDEX));
- EXPECT_TRUE(column_contains_index(segment->footer().columns(1), BITMAP_INDEX));
-
- {
- Schema schema(tablet_schema);
-
- // test where v1=10
- {
- std::vector<ColumnPredicate*> column_predicates;
- std::unique_ptr<ColumnPredicate> predicate(
- new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(0, 10));
- column_predicates.emplace_back(predicate.get());
-
- StorageReadOptions read_opts;
- OlapReaderStatistics stats;
- read_opts.column_predicates = column_predicates;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
-
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 1024);
- EXPECT_TRUE(iter->next_batch(&block).ok());
- EXPECT_EQ(block.num_rows(), 1);
- EXPECT_EQ(read_opts.stats->raw_rows_read, 1);
- }
-
- // test where v1=10 and v2=11
- {
- std::vector<ColumnPredicate*> column_predicates;
- std::unique_ptr<ColumnPredicate> predicate(
- new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(0, 10));
- std::unique_ptr<ColumnPredicate> predicate2(
- new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(1, 11));
- column_predicates.emplace_back(predicate.get());
- column_predicates.emplace_back(predicate2.get());
-
- StorageReadOptions read_opts;
- OlapReaderStatistics stats;
- read_opts.column_predicates = column_predicates;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
-
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 1024);
- EXPECT_TRUE(iter->next_batch(&block).ok());
- EXPECT_EQ(block.num_rows(), 1);
- EXPECT_EQ(read_opts.stats->raw_rows_read, 1);
- }
-
- // test where v1=10 and v2=15
- {
- std::vector<ColumnPredicate*> column_predicates;
- std::unique_ptr<ColumnPredicate> predicate(
- new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(0, 10));
- std::unique_ptr<ColumnPredicate> predicate2(
- new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(1, 15));
- column_predicates.emplace_back(predicate.get());
- column_predicates.emplace_back(predicate2.get());
-
- StorageReadOptions read_opts;
- OlapReaderStatistics stats;
- read_opts.column_predicates = column_predicates;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
-
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 1024);
- EXPECT_FALSE(iter->next_batch(&block).ok());
- EXPECT_EQ(read_opts.stats->raw_rows_read, 0);
- }
-
- // test where v1 in (10,20,1)
- {
- std::vector<ColumnPredicate*> column_predicates;
- phmap::flat_hash_set<int32_t> values;
- values.insert(10);
- values.insert(20);
- values.insert(1);
- std::unique_ptr<ColumnPredicate> predicate(
- new InListPredicateBase<TYPE_INT, PredicateType::IN_LIST>(0, values));
- column_predicates.emplace_back(predicate.get());
-
- StorageReadOptions read_opts;
- OlapReaderStatistics stats;
- read_opts.column_predicates = column_predicates;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
-
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 1024);
- EXPECT_TRUE(iter->next_batch(&block).ok());
- EXPECT_EQ(read_opts.stats->raw_rows_read, 2);
- }
-
- // test where v1 not in (10,20)
- {
- std::vector<ColumnPredicate*> column_predicates;
- phmap::flat_hash_set<int32_t> values;
- values.insert(10);
- values.insert(20);
- std::unique_ptr<ColumnPredicate> predicate(
- new InListPredicateBase<TYPE_INT, PredicateType::NOT_IN_LIST>(0, values));
- column_predicates.emplace_back(predicate.get());
-
- StorageReadOptions read_opts;
- OlapReaderStatistics stats;
- read_opts.column_predicates = column_predicates;
- read_opts.stats = &stats;
- read_opts.tablet_schema = tablet_schema;
-
- std::unique_ptr<RowwiseIterator> iter;
- ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
-
- RowBlockV2 block(schema, 1024);
-
- Status st;
- do {
- block.clear();
- st = iter->next_batch(&block);
- } while (st.ok());
- EXPECT_EQ(read_opts.stats->raw_rows_read, 4094);
- }
- }
-}
-
-TEST_F(SegmentReaderWriterTest, TestBloomFilterIndexUniqueModel) {
- TabletSchemaSPtr schema =
- create_schema({create_int_key(1), create_int_key(2), create_int_key(3),
- create_int_value(4, OLAP_FIELD_AGGREGATION_REPLACE, true, "", true)});
-
- // for not base segment
- SegmentWriterOptions opts1;
- shared_ptr<Segment> seg1;
- build_segment(opts1, schema, schema, 100, DefaultIntGenerator, &seg1);
- EXPECT_TRUE(column_contains_index(seg1->footer().columns(3), BLOOM_FILTER_INDEX));
-
- // for base segment
- SegmentWriterOptions opts2;
- shared_ptr<Segment> seg2;
- build_segment(opts2, schema, schema, 100, DefaultIntGenerator, &seg2);
- EXPECT_TRUE(column_contains_index(seg2->footer().columns(3), BLOOM_FILTER_INDEX));
-}
-
-TEST_F(SegmentReaderWriterTest, TestLookupRowKey) {
- TabletSchemaSPtr tablet_schema = create_schema(
- {create_int_key(1), create_int_key(2), create_int_value(3), create_int_value(4)},
- UNIQUE_KEYS);
- SegmentWriterOptions opts;
- opts.enable_unique_key_merge_on_write = true;
- opts.num_rows_per_block = 10;
-
- shared_ptr<Segment> segment;
- build_segment(opts, tablet_schema, tablet_schema, 4096, DefaultIntGenerator, &segment);
-
- // key exist
- {
- RowCursor row;
- auto olap_st = row.init(tablet_schema);
- EXPECT_EQ(Status::OK(), olap_st);
- for (size_t rid = 0; rid < 4096; ++rid) {
- for (int cid = 0; cid < tablet_schema->num_columns(); ++cid) {
- int row_block_id = rid / opts.num_rows_per_block;
- RowCursorCell cell = row.cell(cid);
- DefaultIntGenerator(rid, cid, row_block_id, cell);
- }
- std::string encoded_key;
- encode_key<RowCursor, true, true>(&encoded_key, row, tablet_schema->num_key_columns());
- RowLocation row_location;
- Status st = segment->lookup_row_key(encoded_key, &row_location);
- EXPECT_EQ(row_location.row_id, rid);
- EXPECT_EQ(st, Status::OK());
- }
- }
-
- // key not exist
- {
- RowCursor row;
- auto olap_st = row.init(tablet_schema);
- EXPECT_EQ(Status::OK(), olap_st);
- for (size_t rid = 4096; rid < 4100; ++rid) {
- for (int cid = 0; cid < tablet_schema->num_columns(); ++cid) {
- int row_block_id = rid / opts.num_rows_per_block;
- RowCursorCell cell = row.cell(cid);
- DefaultIntGenerator(rid, cid, row_block_id, cell);
- }
- std::string encoded_key;
- encode_key<RowCursor, true, true>(&encoded_key, row, tablet_schema->num_key_columns());
- RowLocation row_location;
- Status st = segment->lookup_row_key(encoded_key, &row_location);
- EXPECT_EQ(st.is<NOT_FOUND>(), true);
- }
- }
-}
-
-TEST_F(SegmentReaderWriterTest, TestLookupRowKeyWithSequenceCol) {
- TabletSchemaSPtr tablet_schema = create_schema(
- {create_int_key(1), create_int_key(2), create_int_value(3), create_int_value(4)},
- UNIQUE_KEYS);
- tablet_schema->_sequence_col_idx = 3;
- SegmentWriterOptions opts;
- opts.enable_unique_key_merge_on_write = true;
- opts.num_rows_per_block = 10;
-
- shared_ptr<Segment> segment;
- build_segment(opts, tablet_schema, tablet_schema, 4096, DefaultIntGenerator, &segment);
-
- // key exist
- {
- RowCursor row;
- auto olap_st = row.init(tablet_schema);
- EXPECT_EQ(Status::OK(), olap_st);
- for (size_t rid = 0; rid < 4096; ++rid) {
- for (int cid = 0; cid < tablet_schema->num_columns(); ++cid) {
- int row_block_id = rid / opts.num_rows_per_block;
- RowCursorCell cell = row.cell(cid);
- DefaultIntGenerator(rid, cid, row_block_id, cell);
- }
- std::string encoded_key;
- encode_key<RowCursor, true, true>(&encoded_key, row, tablet_schema->num_key_columns());
- encoded_key.push_back(KEY_NORMAL_MARKER);
- auto cid = tablet_schema->sequence_col_idx();
- auto cell = row.cell(cid);
- row.schema()->column(cid)->full_encode_ascending(cell.cell_ptr(), &encoded_key);
-
- RowLocation row_location;
- Status st = segment->lookup_row_key(encoded_key, &row_location);
- EXPECT_EQ(row_location.row_id, rid);
- EXPECT_EQ(st, Status::OK());
- }
- }
-
- // key not exist
- {
- RowCursor row;
- auto olap_st = row.init(tablet_schema);
- EXPECT_EQ(Status::OK(), olap_st);
- for (size_t rid = 4096; rid < 4100; ++rid) {
- for (int cid = 0; cid < tablet_schema->num_columns(); ++cid) {
- int row_block_id = rid / opts.num_rows_per_block;
- RowCursorCell cell = row.cell(cid);
- DefaultIntGenerator(rid, cid, row_block_id, cell);
- }
- std::string encoded_key;
- encode_key<RowCursor, true, true>(&encoded_key, row, tablet_schema->num_key_columns());
-
- encoded_key.push_back(KEY_NORMAL_MARKER);
- auto cid = tablet_schema->sequence_col_idx();
- auto cell = row.cell(cid);
- row.schema()->column(cid)->full_encode_ascending(cell.cell_ptr(), &encoded_key);
-
- RowLocation row_location;
- Status st = segment->lookup_row_key(encoded_key, &row_location);
- EXPECT_EQ(st.is<NOT_FOUND>(), true);
- }
- }
-
- // key exist, sequence id is smaller
- {
- RowCursor row;
- auto olap_st = row.init(tablet_schema);
- EXPECT_EQ(Status::OK(), olap_st);
- for (int cid = 0; cid < tablet_schema->num_columns(); ++cid) {
- RowCursorCell cell = row.cell(cid);
- cell.set_not_null();
- if (cid == tablet_schema->sequence_col_idx()) {
- *(int*)cell.mutable_cell_ptr() = 100 + cid - 3;
- } else {
- *(int*)cell.mutable_cell_ptr() = 100 + cid;
- }
- }
- std::string encoded_key;
- encode_key<RowCursor, true, true>(&encoded_key, row, tablet_schema->num_key_columns());
-
- encoded_key.push_back(KEY_NORMAL_MARKER);
- auto cid = tablet_schema->sequence_col_idx();
- auto cell = row.cell(cid);
- row.schema()->column(cid)->full_encode_ascending(cell.cell_ptr(), &encoded_key);
-
- RowLocation row_location;
- Status st = segment->lookup_row_key(encoded_key, &row_location);
- EXPECT_EQ(st.is<ALREADY_EXIST>(), true);
- }
-}
-
-} // namespace segment_v2
-} // namespace doris
diff --git a/be/test/olap/segcompaction_test.cpp b/be/test/olap/segcompaction_test.cpp
index 80af09f65f..2e50172f5b 100644
--- a/be/test/olap/segcompaction_test.cpp
+++ b/be/test/olap/segcompaction_test.cpp
@@ -201,115 +201,6 @@ private:
std::unique_ptr<DataDir> _data_dir;
};
-TEST_F(SegCompactionTest, SegCompactionThenRead) {
- config::enable_segcompaction = true;
- config::enable_storage_vectorization = true;
- Status s;
- TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
- create_tablet_schema(tablet_schema);
-
- RowsetSharedPtr rowset;
- const int num_segments = 15;
- const uint32_t rows_per_segment = 4096;
- config::segcompaction_small_threshold = 6000; // set threshold above
- // rows_per_segment
- std::vector<uint32_t> segment_num_rows;
- { // write `num_segments * rows_per_segment` rows to rowset
- RowsetWriterContext writer_context;
- create_rowset_writer_context(10047, tablet_schema, &writer_context);
-
- std::unique_ptr<RowsetWriter> rowset_writer;
- s = RowsetFactory::create_rowset_writer(writer_context, false, &rowset_writer);
- EXPECT_EQ(Status::OK(), s);
-
- RowCursor input_row;
- input_row.init(tablet_schema);
-
- // for segment "i", row "rid"
- // k1 := rid*10 + i
- // k2 := k1 * 10
- // k3 := rid
- for (int i = 0; i < num_segments; ++i) {
- MemPool mem_pool;
- for (int rid = 0; rid < rows_per_segment; ++rid) {
- uint32_t k1 = rid * 100 + i;
- uint32_t k2 = i;
- uint32_t k3 = rid;
- input_row.set_field_content(0, reinterpret_cast<char*>(&k1), &mem_pool);
- input_row.set_field_content(1, reinterpret_cast<char*>(&k2), &mem_pool);
- input_row.set_field_content(2, reinterpret_cast<char*>(&k3), &mem_pool);
- s = rowset_writer->add_row(input_row);
- EXPECT_EQ(Status::OK(), s);
- }
- s = rowset_writer->flush();
- EXPECT_EQ(Status::OK(), s);
- }
-
- rowset = rowset_writer->build();
- std::vector<std::string> ls;
- ls.push_back("10047_0.dat");
- ls.push_back("10047_1.dat");
- ls.push_back("10047_2.dat");
- ls.push_back("10047_3.dat");
- ls.push_back("10047_4.dat");
- ls.push_back("10047_5.dat");
- ls.push_back("10047_6.dat");
- EXPECT_TRUE(check_dir(ls));
- }
-
- { // read
- RowsetReaderContext reader_context;
- reader_context.tablet_schema = tablet_schema;
- // use this type to avoid cache from other ut
- reader_context.reader_type = READER_CUMULATIVE_COMPACTION;
- reader_context.need_ordered_result = true;
- std::vector<uint32_t> return_columns = {0, 1, 2};
- reader_context.return_columns = &return_columns;
- reader_context.stats = &_stats;
-
- // without predicates
- {
- RowsetReaderSharedPtr rowset_reader;
- create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader);
- RowBlock* output_block;
- uint32_t num_rows_read = 0;
- while ((s = rowset_reader->next_block(&output_block)) == Status::OK()) {
- EXPECT_TRUE(output_block != nullptr);
- EXPECT_GT(output_block->row_num(), 0);
- EXPECT_EQ(0, output_block->pos());
- EXPECT_EQ(output_block->row_num(), output_block->limit());
- EXPECT_EQ(return_columns, output_block->row_block_info().column_ids);
- // after sort merge segments, k1 will be 0, 1, 2, 10, 11, 12, 20, 21, 22, ..., 40950, 40951, 40952
- for (int i = 0; i < output_block->row_num(); ++i) {
- char* field1 = output_block->field_ptr(i, 0);
- char* field2 = output_block->field_ptr(i, 1);
- char* field3 = output_block->field_ptr(i, 2);
- // test null bit
- EXPECT_FALSE(*reinterpret_cast<bool*>(field1));
- EXPECT_FALSE(*reinterpret_cast<bool*>(field2));
- EXPECT_FALSE(*reinterpret_cast<bool*>(field3));
- uint32_t k1 = *reinterpret_cast<uint32_t*>(field1 + 1);
- uint32_t k2 = *reinterpret_cast<uint32_t*>(field2 + 1);
- uint32_t k3 = *reinterpret_cast<uint32_t*>(field3 + 1);
- EXPECT_EQ(100 * k3 + k2, k1);
-
- num_rows_read++;
- }
- }
- EXPECT_EQ(Status::Error<END_OF_FILE>(), s);
- EXPECT_TRUE(output_block == nullptr);
- EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read);
- EXPECT_TRUE(rowset_reader->get_segment_num_rows(&segment_num_rows).ok());
- size_t total_num_rows = 0;
- //EXPECT_EQ(segment_num_rows.size(), num_segments);
- for (const auto& i : segment_num_rows) {
- total_num_rows += i;
- }
- EXPECT_EQ(total_num_rows, num_rows_read);
- }
- }
-}
-
TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) {
config::enable_segcompaction = true;
config::enable_storage_vectorization = true;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org