You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/06/25 13:29:39 UTC

[doris] branch master updated: [Bug][Vectorized] Fix cord dump of BloomFilter not support DATE type (#10417)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f12b22a51e [Bug][Vectorized] Fix cord dump of BloomFilter not support DATE type (#10417)
f12b22a51e is described below

commit f12b22a51ef73604bb934d0ad6668376b3fe8c5b
Author: HappenLee <ha...@hotmail.com>
AuthorDate: Sat Jun 25 21:29:32 2022 +0800

    [Bug][Vectorized] Fix cord dump of BloomFilter not support DATE type (#10417)
---
 be/src/olap/bloom_filter_predicate.h | 104 +++++++++++++++++++++--------------
 1 file changed, 63 insertions(+), 41 deletions(-)

diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h
index 9e6901cbfb..52ec6c920c 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -68,6 +68,65 @@ public:
     uint16_t evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t size) const override;
 
 private:
+    template <bool is_nullable, typename file_type = void>
+    uint16_t evaluate(vectorized::IColumn& column, uint8_t* null_map, uint16_t* sel,
+                      uint16_t size) const {
+        if constexpr (is_nullable) DCHECK(null_map);
+
+        uint16_t new_size = 0;
+        if (column.is_column_dictionary()) {
+            auto* dict_col = reinterpret_cast<vectorized::ColumnDictI32*>(&column);
+            dict_col->generate_hash_values_for_runtime_filter();
+            for (uint16_t i = 0; i < size; i++) {
+                uint16_t idx = sel[i];
+                sel[new_size] = idx;
+                if constexpr (is_nullable) {
+                    new_size += !null_map[idx] &&
+                                _specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
+                } else {
+                    new_size += _specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
+                }
+            }
+        } else {
+            uint24_t tmp_uint24_value;
+            auto get_column_data = [](vectorized::IColumn& column) {
+                if constexpr (std::is_same_v<file_type, uint24_t>) {
+                    return reinterpret_cast<vectorized::PredicateColumnType<uint32_t>*>(&column)
+                            ->get_data()
+                            .data();
+                } else {
+                    return reinterpret_cast<vectorized::PredicateColumnType<file_type>*>(&column)
+                            ->get_data()
+                            .data();
+                }
+            };
+
+            auto get_cell_value = [&tmp_uint24_value](auto& data) {
+                if constexpr (std::is_same_v<std::decay_t<decltype(data)>, uint32_t>) {
+                    memcpy((char*)(&tmp_uint24_value), (char*)(&data), sizeof(uint24_t));
+                    return (const char*)&tmp_uint24_value;
+                } else {
+                    return (const char*)&data;
+                }
+            };
+
+            auto pred_col_data = get_column_data(column);
+            for (uint16_t i = 0; i < size; i++) {
+                uint16_t idx = sel[i];
+                sel[new_size] = idx;
+
+                if constexpr (is_nullable) {
+                    new_size += !null_map[idx] && _specific_filter->find_olap_engine(
+                                                          get_cell_value(pred_col_data[idx]));
+                } else {
+                    new_size +=
+                            _specific_filter->find_olap_engine(get_cell_value(pred_col_data[idx]));
+                }
+            }
+        }
+        return new_size;
+    }
+
     std::shared_ptr<IBloomFilterFuncBase> _filter;
     SpecificFilter* _specific_filter; // owned by _filter
     mutable uint64_t _evaluated_rows = 1;
@@ -119,49 +178,12 @@ uint16_t BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, ui
         return size;
     }
     if (column.is_nullable()) {
-        auto* nullable_col = vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
+        auto* nullable_col = reinterpret_cast<vectorized::ColumnNullable*>(&column);
         auto& null_map_data = nullable_col->get_null_map_column().get_data();
-        // deal ColumnDict
-        if (nullable_col->get_nested_column().is_column_dictionary()) {
-            auto* dict_col = vectorized::check_and_get_column<vectorized::ColumnDictI32>(
-                    nullable_col->get_nested_column());
-            const_cast<vectorized::ColumnDictI32*>(dict_col)
-                    ->generate_hash_values_for_runtime_filter();
-            for (uint16_t i = 0; i < size; i++) {
-                uint16_t idx = sel[i];
-                sel[new_size] = idx;
-                new_size += (!null_map_data[idx]) &&
-                            _specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
-            }
-        } else {
-            auto* pred_col = vectorized::check_and_get_column<vectorized::PredicateColumnType<FT>>(
-                    nullable_col->get_nested_column());
-            auto& pred_col_data = pred_col->get_data();
-            for (uint16_t i = 0; i < size; i++) {
-                uint16_t idx = sel[i];
-                sel[new_size] = idx;
-                const auto* cell_value = reinterpret_cast<const void*>(&(pred_col_data[idx]));
-                new_size += (!null_map_data[idx]) && _specific_filter->find_olap_engine(cell_value);
-            }
-        }
-    } else if (column.is_column_dictionary()) {
-        auto* dict_col = vectorized::check_and_get_column<vectorized::ColumnDictI32>(column);
-        const_cast<vectorized::ColumnDictI32*>(dict_col)->generate_hash_values_for_runtime_filter();
-        for (uint16_t i = 0; i < size; i++) {
-            uint16_t idx = sel[i];
-            sel[new_size] = idx;
-            new_size += _specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
-        }
+        new_size = evaluate<true, FT>(nullable_col->get_nested_column(), null_map_data.data(), sel,
+                                      size);
     } else {
-        auto* pred_col =
-                vectorized::check_and_get_column<vectorized::PredicateColumnType<FT>>(column);
-        auto& pred_col_data = pred_col->get_data();
-        for (uint16_t i = 0; i < size; i++) {
-            uint16_t idx = sel[i];
-            sel[new_size] = idx;
-            const auto* cell_value = reinterpret_cast<const void*>(&(pred_col_data[idx]));
-            new_size += _specific_filter->find_olap_engine(cell_value);
-        }
+        new_size = evaluate<false, FT>(column, nullptr, sel, size);
     }
     // If the pass rate is very high, for example > 50%, then the bloomfilter is useless.
     // Some bloomfilter is useless, for example ssb 4.3, it consumes a lot of cpu but it is


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org