You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/06/26 08:39:52 UTC

[doris] branch master updated: [Bug](RuntimeFiter) Fix bf error change the murmurhash to crc32 in regression test p2 (#21167)

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5fdd9b9254 [Bug](RuntimeFiter) Fix bf error change the murmurhash to crc32 in regression test p2 (#21167)
5fdd9b9254 is described below

commit 5fdd9b9254ce8e797922754106062d258c0a56fe
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Mon Jun 26 16:39:45 2023 +0800

    [Bug](RuntimeFiter) Fix bf error change the murmurhash to crc32 in regression test p2 (#21167)
---
 be/src/agent/be_exec_version_manager.h |  1 +
 be/src/olap/bloom_filter_predicate.h   | 44 +++++++++++++++++++++++++---------
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/be/src/agent/be_exec_version_manager.h b/be/src/agent/be_exec_version_manager.h
index 0491a038c8..657ebab02d 100644
--- a/be/src/agent/be_exec_version_manager.h
+++ b/be/src/agent/be_exec_version_manager.h
@@ -55,6 +55,7 @@ private:
  * 2: start from doris 2.0
  *    a. function month/day/hour/minute/second's return type is changed to smaller type.
  *    b. in order to solve agg of sum/count is not compatibility during the upgrade process
+ *    c. change the string hash method in runtime filter
  *
 */
 inline const int BeExecVersionManager::max_be_exec_version = 2;
diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h
index 99debfa94b..885927d3f5 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -63,6 +63,17 @@ private:
             DCHECK(null_map);
         }
 
+        uint24_t tmp_uint24_value;
+        auto get_cell_value = [&tmp_uint24_value](auto& data) {
+            if constexpr (std::is_same_v<std::decay_t<decltype(data)>, uint32_t> &&
+                          T == PrimitiveType::TYPE_DATE) {
+                memcpy((char*)(&tmp_uint24_value), (char*)(&data), sizeof(uint24_t));
+                return (const char*)&tmp_uint24_value;
+            } else {
+                return (const char*)&data;
+            }
+        };
+
         uint16_t new_size = 0;
         if (column.is_column_dictionary()) {
             auto* dict_col = reinterpret_cast<const vectorized::ColumnDictI32*>(&column);
@@ -90,6 +101,28 @@ private:
                     }
                 }
             }
+        } else if (is_string_type(T) && _be_exec_version >= 2) {
+            auto& pred_col =
+                    reinterpret_cast<
+                            const vectorized::PredicateColumnType<PredicateEvaluateType<T>>*>(
+                            &column)
+                            ->get_data();
+
+            auto pred_col_data = pred_col.data();
+            const bool is_dense_column = pred_col.size() == size;
+            for (uint16_t i = 0; i < size; i++) {
+                uint16_t idx = is_dense_column ? i : sel[i];
+                if constexpr (is_nullable) {
+                    if (!null_map[idx] &&
+                        _specific_filter->find_crc32_hash(get_cell_value(pred_col_data[idx]))) {
+                        sel[new_size++] = idx;
+                    }
+                } else {
+                    if (_specific_filter->find_crc32_hash(get_cell_value(pred_col_data[idx]))) {
+                        sel[new_size++] = idx;
+                    }
+                }
+            }
         } else if (IRuntimeFilter::enable_use_batch(_be_exec_version > 0, T)) {
             const auto& data =
                     reinterpret_cast<
@@ -99,17 +132,6 @@ private:
             new_size = _specific_filter->find_fixed_len_olap_engine((char*)data.data(), null_map,
                                                                     sel, size, data.size() != size);
         } else {
-            uint24_t tmp_uint24_value;
-            auto get_cell_value = [&tmp_uint24_value](auto& data) {
-                if constexpr (std::is_same_v<std::decay_t<decltype(data)>, uint32_t> &&
-                              T == PrimitiveType::TYPE_DATE) {
-                    memcpy((char*)(&tmp_uint24_value), (char*)(&data), sizeof(uint24_t));
-                    return (const char*)&tmp_uint24_value;
-                } else {
-                    return (const char*)&data;
-                }
-            };
-
             auto& pred_col =
                     reinterpret_cast<
                             const vectorized::PredicateColumnType<PredicateEvaluateType<T>>*>(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org