You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/06/25 13:29:39 UTC
[doris] branch master updated: [Bug][Vectorized] Fix cord dump of BloomFilter not support DATE type (#10417)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f12b22a51e [Bug][Vectorized] Fix cord dump of BloomFilter not support DATE type (#10417)
f12b22a51e is described below
commit f12b22a51ef73604bb934d0ad6668376b3fe8c5b
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Sat Jun 25 21:29:32 2022 +0800
[Bug][Vectorized] Fix cord dump of BloomFilter not support DATE type (#10417)
---
be/src/olap/bloom_filter_predicate.h | 104 +++++++++++++++++++++--------------
1 file changed, 63 insertions(+), 41 deletions(-)
diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h
index 9e6901cbfb..52ec6c920c 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -68,6 +68,65 @@ public:
uint16_t evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t size) const override;
private:
+ template <bool is_nullable, typename file_type = void>
+ uint16_t evaluate(vectorized::IColumn& column, uint8_t* null_map, uint16_t* sel,
+ uint16_t size) const {
+ if constexpr (is_nullable) DCHECK(null_map);
+
+ uint16_t new_size = 0;
+ if (column.is_column_dictionary()) {
+ auto* dict_col = reinterpret_cast<vectorized::ColumnDictI32*>(&column);
+ dict_col->generate_hash_values_for_runtime_filter();
+ for (uint16_t i = 0; i < size; i++) {
+ uint16_t idx = sel[i];
+ sel[new_size] = idx;
+ if constexpr (is_nullable) {
+ new_size += !null_map[idx] &&
+ _specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
+ } else {
+ new_size += _specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
+ }
+ }
+ } else {
+ uint24_t tmp_uint24_value;
+ auto get_column_data = [](vectorized::IColumn& column) {
+ if constexpr (std::is_same_v<file_type, uint24_t>) {
+ return reinterpret_cast<vectorized::PredicateColumnType<uint32_t>*>(&column)
+ ->get_data()
+ .data();
+ } else {
+ return reinterpret_cast<vectorized::PredicateColumnType<file_type>*>(&column)
+ ->get_data()
+ .data();
+ }
+ };
+
+ auto get_cell_value = [&tmp_uint24_value](auto& data) {
+ if constexpr (std::is_same_v<std::decay_t<decltype(data)>, uint32_t>) {
+ memcpy((char*)(&tmp_uint24_value), (char*)(&data), sizeof(uint24_t));
+ return (const char*)&tmp_uint24_value;
+ } else {
+ return (const char*)&data;
+ }
+ };
+
+ auto pred_col_data = get_column_data(column);
+ for (uint16_t i = 0; i < size; i++) {
+ uint16_t idx = sel[i];
+ sel[new_size] = idx;
+
+ if constexpr (is_nullable) {
+ new_size += !null_map[idx] && _specific_filter->find_olap_engine(
+ get_cell_value(pred_col_data[idx]));
+ } else {
+ new_size +=
+ _specific_filter->find_olap_engine(get_cell_value(pred_col_data[idx]));
+ }
+ }
+ }
+ return new_size;
+ }
+
std::shared_ptr<IBloomFilterFuncBase> _filter;
SpecificFilter* _specific_filter; // owned by _filter
mutable uint64_t _evaluated_rows = 1;
@@ -119,49 +178,12 @@ uint16_t BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, ui
return size;
}
if (column.is_nullable()) {
- auto* nullable_col = vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
+ auto* nullable_col = reinterpret_cast<vectorized::ColumnNullable*>(&column);
auto& null_map_data = nullable_col->get_null_map_column().get_data();
- // deal ColumnDict
- if (nullable_col->get_nested_column().is_column_dictionary()) {
- auto* dict_col = vectorized::check_and_get_column<vectorized::ColumnDictI32>(
- nullable_col->get_nested_column());
- const_cast<vectorized::ColumnDictI32*>(dict_col)
- ->generate_hash_values_for_runtime_filter();
- for (uint16_t i = 0; i < size; i++) {
- uint16_t idx = sel[i];
- sel[new_size] = idx;
- new_size += (!null_map_data[idx]) &&
- _specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
- }
- } else {
- auto* pred_col = vectorized::check_and_get_column<vectorized::PredicateColumnType<FT>>(
- nullable_col->get_nested_column());
- auto& pred_col_data = pred_col->get_data();
- for (uint16_t i = 0; i < size; i++) {
- uint16_t idx = sel[i];
- sel[new_size] = idx;
- const auto* cell_value = reinterpret_cast<const void*>(&(pred_col_data[idx]));
- new_size += (!null_map_data[idx]) && _specific_filter->find_olap_engine(cell_value);
- }
- }
- } else if (column.is_column_dictionary()) {
- auto* dict_col = vectorized::check_and_get_column<vectorized::ColumnDictI32>(column);
- const_cast<vectorized::ColumnDictI32*>(dict_col)->generate_hash_values_for_runtime_filter();
- for (uint16_t i = 0; i < size; i++) {
- uint16_t idx = sel[i];
- sel[new_size] = idx;
- new_size += _specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
- }
+ new_size = evaluate<true, FT>(nullable_col->get_nested_column(), null_map_data.data(), sel,
+ size);
} else {
- auto* pred_col =
- vectorized::check_and_get_column<vectorized::PredicateColumnType<FT>>(column);
- auto& pred_col_data = pred_col->get_data();
- for (uint16_t i = 0; i < size; i++) {
- uint16_t idx = sel[i];
- sel[new_size] = idx;
- const auto* cell_value = reinterpret_cast<const void*>(&(pred_col_data[idx]));
- new_size += _specific_filter->find_olap_engine(cell_value);
- }
+ new_size = evaluate<false, FT>(column, nullptr, sel, size);
}
// If the pass rate is very high, for example > 50%, then the bloomfilter is useless.
// Some bloomfilter is useless, for example ssb 4.3, it consumes a lot of cpu but it is
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org