You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/01 07:02:28 UTC
[doris] 02/02: [Enhancement](bitmapfilter) Support bitmap filter to apply zone_map index to filter pages (#14635)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-unstable
in repository https://gitbox.apache.org/repos/asf/doris.git
commit d85c1e98b472bd915f68c66e9e407b4c17e6d3ee
Author: luozenglin <37...@users.noreply.github.com>
AuthorDate: Thu Dec 1 10:41:09 2022 +0800
[Enhancement](bitmapfilter) Support bitmap filter to apply zone_map index to filter pages (#14635)
---
be/src/exprs/bitmapfilter_predicate.h | 35 ++++++----
be/src/exprs/runtime_filter.cpp | 21 ++++--
be/src/exprs/runtime_filter.h | 3 +
be/src/olap/bitmap_filter_predicate.h | 45 +++++++------
be/src/olap/olap_common.h | 2 +
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 1 +
be/src/util/bitmap_value.h | 76 +++++++++++++++++++++-
be/src/vec/exec/scan/new_olap_scan_node.cpp | 1 +
be/src/vec/exec/scan/new_olap_scan_node.h | 1 +
be/src/vec/exec/scan/new_olap_scanner.cpp | 2 +
be/src/vec/runtime/shared_hash_table_controller.h | 2 +-
11 files changed, 151 insertions(+), 38 deletions(-)
diff --git a/be/src/exprs/bitmapfilter_predicate.h b/be/src/exprs/bitmapfilter_predicate.h
index c9a2efc621..dbc9447034 100644
--- a/be/src/exprs/bitmapfilter_predicate.h
+++ b/be/src/exprs/bitmapfilter_predicate.h
@@ -17,7 +17,8 @@
#pragma once
-#include "common/object_pool.h"
+#include <algorithm>
+
#include "gutil/integral_types.h"
#include "runtime/define_primitive_type.h"
#include "runtime/primitive_type.h"
@@ -30,14 +31,15 @@ class BitmapFilterFuncBase {
public:
virtual void insert(const void* data) = 0;
virtual void insert_many(const std::vector<const BitmapValue*> bitmaps) = 0;
- virtual bool find(uint64 data) const = 0;
- virtual bool is_empty() = 0;
+ virtual bool empty() = 0;
virtual Status assign(BitmapValue* bitmap_value) = 0;
virtual void light_copy(BitmapFilterFuncBase* other) { _not_in = other->_not_in; };
virtual uint16_t find_fixed_len_olap_engine(const char* data, const uint8* nullmap,
uint16_t* offsets, int number) = 0;
virtual void find_batch(const char* data, const uint8* nullmap, int number,
uint8* results) const = 0;
+ virtual size_t size() const = 0;
+ bool is_not_in() const { return _not_in; }
void set_not_in(bool not_in) { _not_in = not_in; }
virtual ~BitmapFilterFuncBase() = default;
@@ -51,7 +53,7 @@ class BitmapFilterFunc : public BitmapFilterFuncBase {
public:
using CppType = typename PrimitiveTypeTraits<type>::CppType;
- BitmapFilterFunc() : _bitmap_value(std::make_shared<BitmapValue>()), _empty(true) {}
+ BitmapFilterFunc() : _bitmap_value(std::make_shared<BitmapValue>()) {}
~BitmapFilterFunc() override = default;
@@ -59,15 +61,13 @@ public:
void insert_many(const std::vector<const BitmapValue*> bitmaps) override;
- bool find(uint64 data) const override { return _not_in ^ _bitmap_value->contains(data); }
-
uint16_t find_fixed_len_olap_engine(const char* data, const uint8* nullmap, uint16_t* offsets,
int number) override;
void find_batch(const char* data, const uint8* nullmap, int number,
uint8* results) const override;
- bool is_empty() override { return _empty; }
+ bool empty() override { return _bitmap_value->empty(); }
Status assign(BitmapValue* bitmap_value) override {
*_bitmap_value = *bitmap_value;
@@ -76,9 +76,25 @@ public:
void light_copy(BitmapFilterFuncBase* bloomfilter_func) override;
+ size_t size() const override { return _bitmap_value->cardinality(); }
+
+ uint64_t max() { return _bitmap_value->max(nullptr); }
+
+ uint64_t min() { return _bitmap_value->min(nullptr); }
+
+ bool contains_any(CppType left, CppType right) {
+ if (right < 0) {
+ return false;
+ }
+ return _bitmap_value->contains_any(std::max(left, (CppType)0), right);
+ }
+
+ std::shared_ptr<BitmapValue> get_inner_bitmap() { return _bitmap_value; }
+
private:
std::shared_ptr<BitmapValue> _bitmap_value;
- bool _empty;
+
+ bool find(CppType data) const { return _not_in ^ (data >= 0 && _bitmap_value->contains(data)); }
};
template <PrimitiveType type>
@@ -88,7 +104,6 @@ void BitmapFilterFunc<type>::insert(const void* data) {
}
*_bitmap_value |= *reinterpret_cast<const BitmapValue*>(data);
- _empty = false;
}
template <PrimitiveType type>
@@ -97,7 +112,6 @@ void BitmapFilterFunc<type>::insert_many(const std::vector<const BitmapValue*> b
return;
}
_bitmap_value->fastunion(bitmaps);
- _empty = false;
}
template <PrimitiveType type>
@@ -136,7 +150,6 @@ template <PrimitiveType type>
void BitmapFilterFunc<type>::light_copy(BitmapFilterFuncBase* bitmapfilter_func) {
BitmapFilterFuncBase::light_copy(bitmapfilter_func);
auto other_func = reinterpret_cast<BitmapFilterFunc*>(bitmapfilter_func);
- _empty = other_func->_empty;
_bitmap_value = other_func->_bitmap_value;
}
diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp
index 39760c7c3d..d584f06d17 100644
--- a/be/src/exprs/runtime_filter.cpp
+++ b/be/src/exprs/runtime_filter.cpp
@@ -17,6 +17,8 @@
#include "runtime_filter.h"
+#include <memory>
+
#include "common/object_pool.h"
#include "common/status.h"
#include "exprs/binary_predicate.h"
@@ -457,8 +459,8 @@ public:
return Status::OK();
}
case RuntimeFilterType::BITMAP_FILTER: {
- _context._bitmap_filter_func.reset(create_bitmap_filter(_column_return_type));
- _context._bitmap_filter_func->set_not_in(params->bitmap_filter_not_in);
+ _context.bitmap_filter_func.reset(create_bitmap_filter(_column_return_type));
+ _context.bitmap_filter_func->set_not_in(params->bitmap_filter_not_in);
return Status::OK();
}
default:
@@ -524,7 +526,7 @@ public:
break;
}
case RuntimeFilterType::BITMAP_FILTER: {
- _context._bitmap_filter_func->insert(data);
+ _context.bitmap_filter_func->insert(data);
break;
}
default:
@@ -602,7 +604,7 @@ public:
for (int index : rows) {
bitmaps.push_back(&(col->get_data()[index]));
}
- _context._bitmap_filter_func->insert_many(bitmaps);
+ _context.bitmap_filter_func->insert_many(bitmaps);
}
RuntimeFilterType get_real_type() {
@@ -1086,6 +1088,10 @@ public:
size_t get_in_filter_size() const { return _context.hybrid_set->size(); }
+ std::shared_ptr<BitmapFilterFuncBase> get_bitmap_filter() const {
+ return _context.bitmap_filter_func;
+ }
+
friend class IRuntimeFilter;
private:
@@ -1260,6 +1266,11 @@ void IRuntimeFilter::signal() {
if (_wrapper->get_real_type() == RuntimeFilterType::IN_FILTER) {
_profile->add_info_string("InFilterSize", std::to_string(_wrapper->get_in_filter_size()));
}
+ if (_wrapper->get_real_type() == RuntimeFilterType::BITMAP_FILTER) {
+ auto bitmap_filter = _wrapper->get_bitmap_filter();
+ _profile->add_info_string("BitmapSize", std::to_string(bitmap_filter->size()));
+ _profile->add_info_string("IsNotIn", bitmap_filter->is_not_in() ? "true" : "false");
+ }
}
BloomFilterFuncBase* IRuntimeFilter::get_bloomfilter() const {
@@ -1935,7 +1946,7 @@ Status RuntimePredicateWrapper::get_push_vexprs(std::vector<doris::vectorized::V
node.__set_vector_opcode(to_in_opcode(_column_return_type));
node.__set_is_nullable(false);
auto bitmap_pred = _pool->add(new vectorized::VBitmapPredicate(node));
- bitmap_pred->set_filter(_context._bitmap_filter_func);
+ bitmap_pred->set_filter(_context.bitmap_filter_func);
auto cloned_vexpr = vprob_expr->root()->clone(_pool);
bitmap_pred->add_child(cloned_vexpr);
auto wrapper = _pool->add(new vectorized::VRuntimeFilterWrapper(node, bitmap_pred));
diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h
index 09afe6c640..5cd08aca46 100644
--- a/be/src/exprs/runtime_filter.h
+++ b/be/src/exprs/runtime_filter.h
@@ -43,6 +43,7 @@ class PMinMaxFilter;
class HashJoinNode;
class RuntimeProfile;
class BloomFilterFuncBase;
+class BitmapFilterFuncBase;
namespace vectorized {
class VExpr;
@@ -252,6 +253,8 @@ public:
void ready_for_publish();
+ std::shared_ptr<BitmapFilterFuncBase> get_bitmap_filter() const;
+
static bool enable_use_batch(int be_exec_version, PrimitiveType type) {
return be_exec_version > 0 && (is_int_or_bool(type) || is_float_or_double(type));
}
diff --git a/be/src/olap/bitmap_filter_predicate.h b/be/src/olap/bitmap_filter_predicate.h
index be3dc23cc9..053cdb748d 100644
--- a/be/src/olap/bitmap_filter_predicate.h
+++ b/be/src/olap/bitmap_filter_predicate.h
@@ -17,9 +17,14 @@
#pragma once
+#include <cstdint>
+
#include "exprs/bitmapfilter_predicate.h"
+#include "exprs/cast_functions.h"
#include "exprs/runtime_filter.h"
#include "olap/column_predicate.h"
+#include "olap/wrapper_field.h"
+#include "util/bitmap_value.h"
#include "vec/columns/column_dictionary.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_vector.h"
@@ -51,6 +56,26 @@ public:
void evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size,
bool* flags) const override {};
+ bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& statistic) const override {
+ if (_specific_filter->is_not_in()) {
+ return true;
+ }
+
+ CppType max_value;
+ if (statistic.second->is_null()) {
+ // no non-null values
+ return false;
+ } else {
+ max_value = *reinterpret_cast<const CppType*>(statistic.second->cell_ptr());
+ }
+
+ CppType min_value =
+ statistic.first->is_null() /* contains null values */
+ ? 0
+ : *reinterpret_cast<const CppType*>(statistic.first->cell_ptr());
+ return _specific_filter->contains_any(min_value, max_value);
+ }
+
Status evaluate(BitmapIndexIterator*, uint32_t, roaring::Roaring*) const override {
return Status::OK();
}
@@ -84,25 +109,7 @@ private:
};
template <PrimitiveType T>
-void BitmapFilterColumnPredicate<T>::evaluate(ColumnBlock* block, uint16_t* sel,
- uint16_t* size) const {
- uint16_t new_size = 0;
- if (block->is_nullable()) {
- for (uint16_t i = 0; i < *size; ++i) {
- uint16_t idx = sel[i];
- sel[new_size] = idx;
- new_size += (!block->cell(idx).is_null() &&
- _specific_filter->find(*block->cell(idx).cell_ptr()));
- }
- } else {
- for (uint16_t i = 0; i < *size; ++i) {
- uint16_t idx = sel[i];
- sel[new_size] = idx;
- new_size += _specific_filter->find(*block->cell(idx).cell_ptr());
- }
- }
- *size = new_size;
-}
+void BitmapFilterColumnPredicate<T>::evaluate(ColumnBlock*, uint16_t*, uint16_t*) const {}
template <PrimitiveType T>
uint16_t BitmapFilterColumnPredicate<T>::evaluate(const vectorized::IColumn& column, uint16_t* sel,
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 2ca2c37982..41f9c2132a 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -286,6 +286,7 @@ struct OlapReaderStatistics {
// block_load_ns
// block_init_ns
// block_init_seek_ns
+ // block_conditions_filtered_ns
// first_read_ns
// block_first_read_seek_ns
// lazy_read_ns
@@ -322,6 +323,7 @@ struct OlapReaderStatistics {
int64_t rows_del_by_bitmap = 0;
// the number of rows filtered by various column indexes.
int64_t rows_conditions_filtered = 0;
+ int64_t block_conditions_filtered_ns = 0;
int64_t index_load_ns = 0;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 1caa62f638..5f5419073a 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -282,6 +282,7 @@ Status SegmentIterator::_prepare_seek(const StorageReadOptions::KeyRange& key_ra
}
Status SegmentIterator::_get_row_ranges_by_column_conditions() {
+ SCOPED_RAW_TIMER(&_opts.stats->block_conditions_filtered_ns);
if (_row_bitmap.isEmpty()) {
return Status::OK();
}
diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h
index e8f10a4f39..2717f18895 100644
--- a/be/src/util/bitmap_value.h
+++ b/be/src/util/bitmap_value.h
@@ -21,6 +21,7 @@
#include <algorithm>
#include <cstdarg>
+#include <cstdint>
#include <cstdio>
#include <limits>
#include <map>
@@ -32,6 +33,7 @@
#include <utility>
#include "common/logging.h"
+#include "gutil/integral_types.h"
#include "udf/udf.h"
#include "util/coding.h"
#include "vec/common/pod_array.h"
@@ -1430,6 +1432,9 @@ public:
return false;
}
+ // true if contains a value that belongs to the range [left, right].
+ bool contains_any(uint64_t left, uint64_t right) const;
+
uint64_t cardinality() const {
switch (_type) {
case EMPTY:
@@ -1677,6 +1682,16 @@ public:
}
}
+ uint64_t max(bool* empty) const {
+ return min_or_max(empty, [&]() { return _bitmap.maximum(); });
+ }
+
+ uint64_t min(bool* empty) const {
+ return min_or_max(empty, [&]() { return _bitmap.minimum(); });
+ }
+
+ bool empty() const { return _type == EMPTY; }
+
/**
* Return new set with specified range (not include the range_end)
*/
@@ -1823,6 +1838,7 @@ public:
b_iterator begin() const;
b_iterator end() const;
+ b_iterator lower_bound(uint64_t val) const;
private:
void _convert_to_smaller_type() {
@@ -1839,6 +1855,25 @@ private:
}
}
+ uint64_t min_or_max(bool* empty, std::function<uint64_t()> func) const {
+ bool is_empty = false;
+ uint64_t result = 0;
+ switch (_type) {
+ case SINGLE:
+ result = _sv;
+ break;
+ case BITMAP:
+ result = func();
+ break;
+ default:
+ is_empty = true;
+ }
+ if (empty) {
+ *empty = is_empty;
+ }
+ return result;
+ }
+
enum BitmapDataType {
EMPTY = 0,
SINGLE = 1, // single element
@@ -1876,7 +1911,9 @@ public:
}
BitmapValueIterator(const BitmapValueIterator& other)
- : _bitmap(other._bitmap), _iter(other._iter), _sv(other._sv), _end(other._end) {}
+ : _bitmap(other._bitmap), _sv(other._sv), _end(other._end) {
+ _iter = other._iter ? new detail::Roaring64MapSetBitForwardIterator(*other._iter) : nullptr;
+ }
~BitmapValueIterator() {
if (_iter != nullptr) {
@@ -1930,7 +1967,9 @@ public:
}
bool operator==(const BitmapValueIterator& other) const {
- if (_end && other._end) return true;
+ if (_end && other._end) {
+ return true;
+ }
switch (_bitmap._type) {
case BitmapValue::BitmapDataType::EMPTY:
@@ -1947,6 +1986,27 @@ public:
bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); }
+ /**
+ * Move the iterator to the first value >= `val`.
+ */
+ BitmapValueIterator& move(uint64_t val) {
+ switch (_bitmap._type) {
+ case BitmapValue::BitmapDataType::SINGLE:
+ if (_sv < val) {
+ _end = true;
+ }
+ break;
+ case BitmapValue::BitmapDataType::BITMAP:
+ if (!_iter->move(val)) {
+ _end = true;
+ }
+ break;
+ default:
+ break;
+ }
+ return *this;
+ }
+
private:
const BitmapValue& _bitmap;
detail::Roaring64MapSetBitForwardIterator* _iter = nullptr;
@@ -1962,4 +2022,16 @@ inline BitmapValueIterator BitmapValue::end() const {
return BitmapValueIterator(*this, true);
}
+inline BitmapValueIterator BitmapValue::lower_bound(uint64_t val) const {
+ return BitmapValueIterator(*this).move(val);
+}
+
+inline bool BitmapValue::contains_any(uint64_t left, uint64_t right) const {
+ if (left > right) {
+ return false;
+ }
+ auto it = lower_bound(left);
+ return it != end() && *it <= right;
+}
+
} // namespace doris
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp
index 46cb029d38..ad978de41f 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp
@@ -66,6 +66,7 @@ Status NewOlapScanNode::_init_profile() {
_block_init_timer = ADD_TIMER(_segment_profile, "BlockInitTime");
_block_init_seek_timer = ADD_TIMER(_segment_profile, "BlockInitSeekTime");
_block_init_seek_counter = ADD_COUNTER(_segment_profile, "BlockInitSeekCount", TUnit::UNIT);
+ _block_conditions_filtered_timer = ADD_TIMER(_segment_profile, "BlockConditionsFilteredTime");
_rows_vec_cond_counter = ADD_COUNTER(_segment_profile, "RowsVectorPredFiltered", TUnit::UNIT);
_vec_cond_timer = ADD_TIMER(_segment_profile, "VectorPredEvalTime");
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h b/be/src/vec/exec/scan/new_olap_scan_node.h
index f55e715659..dc882aa6a9 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.h
+++ b/be/src/vec/exec/scan/new_olap_scan_node.h
@@ -95,6 +95,7 @@ private:
RuntimeProfile::Counter* _block_init_timer = nullptr;
RuntimeProfile::Counter* _block_init_seek_timer = nullptr;
RuntimeProfile::Counter* _block_init_seek_counter = nullptr;
+ RuntimeProfile::Counter* _block_conditions_filtered_timer = nullptr;
RuntimeProfile::Counter* _first_read_timer = nullptr;
RuntimeProfile::Counter* _first_read_seek_timer = nullptr;
RuntimeProfile::Counter* _first_read_seek_counter = nullptr;
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp
index 3ddccdf460..224affa4bc 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -386,6 +386,8 @@ void NewOlapScanner::_update_counters_before_close() {
COUNTER_UPDATE(olap_parent->_block_init_timer, stats.block_init_ns);
COUNTER_UPDATE(olap_parent->_block_init_seek_timer, stats.block_init_seek_ns);
COUNTER_UPDATE(olap_parent->_block_init_seek_counter, stats.block_init_seek_num);
+ COUNTER_UPDATE(olap_parent->_block_conditions_filtered_timer,
+ stats.block_conditions_filtered_ns);
COUNTER_UPDATE(olap_parent->_first_read_timer, stats.first_read_ns);
COUNTER_UPDATE(olap_parent->_first_read_seek_timer, stats.block_first_read_seek_ns);
COUNTER_UPDATE(olap_parent->_first_read_seek_counter, stats.block_first_read_seek_num);
diff --git a/be/src/vec/runtime/shared_hash_table_controller.h b/be/src/vec/runtime/shared_hash_table_controller.h
index 0a308f8aea..e2c54f533d 100644
--- a/be/src/vec/runtime/shared_hash_table_controller.h
+++ b/be/src/vec/runtime/shared_hash_table_controller.h
@@ -42,7 +42,7 @@ struct SharedRuntimeFilterContext {
std::shared_ptr<MinMaxFuncBase> minmax_func;
std::shared_ptr<HybridSetBase> hybrid_set;
std::shared_ptr<BloomFilterFuncBase> bloom_filter_func;
- std::shared_ptr<BitmapFilterFuncBase> _bitmap_filter_func;
+ std::shared_ptr<BitmapFilterFuncBase> bitmap_filter_func;
};
struct SharedHashTableContext {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org