You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/06/26 08:39:52 UTC
[doris] branch master updated: [Bug](RuntimeFiter) Fix bf error change the murmurhash to crc32 in regression test p2 (#21167)
This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5fdd9b9254 [Bug](RuntimeFiter) Fix bf error change the murmurhash to crc32 in regression test p2 (#21167)
5fdd9b9254 is described below
commit 5fdd9b9254ce8e797922754106062d258c0a56fe
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Mon Jun 26 16:39:45 2023 +0800
[Bug](RuntimeFiter) Fix bf error change the murmurhash to crc32 in regression test p2 (#21167)
---
be/src/agent/be_exec_version_manager.h | 1 +
be/src/olap/bloom_filter_predicate.h | 44 +++++++++++++++++++++++++---------
2 files changed, 34 insertions(+), 11 deletions(-)
diff --git a/be/src/agent/be_exec_version_manager.h b/be/src/agent/be_exec_version_manager.h
index 0491a038c8..657ebab02d 100644
--- a/be/src/agent/be_exec_version_manager.h
+++ b/be/src/agent/be_exec_version_manager.h
@@ -55,6 +55,7 @@ private:
* 2: start from doris 2.0
* a. function month/day/hour/minute/second's return type is changed to smaller type.
* b. in order to solve agg of sum/count is not compatibility during the upgrade process
+ * c. change the string hash method in runtime filter
*
*/
inline const int BeExecVersionManager::max_be_exec_version = 2;
diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h
index 99debfa94b..885927d3f5 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -63,6 +63,17 @@ private:
DCHECK(null_map);
}
+ uint24_t tmp_uint24_value;
+ auto get_cell_value = [&tmp_uint24_value](auto& data) {
+ if constexpr (std::is_same_v<std::decay_t<decltype(data)>, uint32_t> &&
+ T == PrimitiveType::TYPE_DATE) {
+ memcpy((char*)(&tmp_uint24_value), (char*)(&data), sizeof(uint24_t));
+ return (const char*)&tmp_uint24_value;
+ } else {
+ return (const char*)&data;
+ }
+ };
+
uint16_t new_size = 0;
if (column.is_column_dictionary()) {
auto* dict_col = reinterpret_cast<const vectorized::ColumnDictI32*>(&column);
@@ -90,6 +101,28 @@ private:
}
}
}
+ } else if (is_string_type(T) && _be_exec_version >= 2) {
+ auto& pred_col =
+ reinterpret_cast<
+ const vectorized::PredicateColumnType<PredicateEvaluateType<T>>*>(
+ &column)
+ ->get_data();
+
+ auto pred_col_data = pred_col.data();
+ const bool is_dense_column = pred_col.size() == size;
+ for (uint16_t i = 0; i < size; i++) {
+ uint16_t idx = is_dense_column ? i : sel[i];
+ if constexpr (is_nullable) {
+ if (!null_map[idx] &&
+ _specific_filter->find_crc32_hash(get_cell_value(pred_col_data[idx]))) {
+ sel[new_size++] = idx;
+ }
+ } else {
+ if (_specific_filter->find_crc32_hash(get_cell_value(pred_col_data[idx]))) {
+ sel[new_size++] = idx;
+ }
+ }
+ }
} else if (IRuntimeFilter::enable_use_batch(_be_exec_version > 0, T)) {
const auto& data =
reinterpret_cast<
@@ -99,17 +132,6 @@ private:
new_size = _specific_filter->find_fixed_len_olap_engine((char*)data.data(), null_map,
sel, size, data.size() != size);
} else {
- uint24_t tmp_uint24_value;
- auto get_cell_value = [&tmp_uint24_value](auto& data) {
- if constexpr (std::is_same_v<std::decay_t<decltype(data)>, uint32_t> &&
- T == PrimitiveType::TYPE_DATE) {
- memcpy((char*)(&tmp_uint24_value), (char*)(&data), sizeof(uint24_t));
- return (const char*)&tmp_uint24_value;
- } else {
- return (const char*)&data;
- }
- };
-
auto& pred_col =
reinterpret_cast<
const vectorized::PredicateColumnType<PredicateEvaluateType<T>>*>(
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org