You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/06/16 10:09:38 UTC
[incubator-doris] branch master updated: [Enhancement][Storage] refactor InListPredicate/NotInListPredicate (#10139)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new ae9c231925 [Enhancement][Storage] refactor InListPredicate/NotInListPredicate (#10139)
ae9c231925 is described below
commit ae9c231925d7b4dd76acac2f924b550217f64b28
Author: Pxl <95...@qq.com>
AuthorDate: Thu Jun 16 18:09:29 2022 +0800
[Enhancement][Storage] refactor InListPredicate/NotInListPredicate (#10139)
* refactor in_list_pred
* update
---
be/src/olap/CMakeLists.txt | 1 -
be/src/olap/in_list_predicate.cpp | 391 ---------------------------------
be/src/olap/in_list_predicate.h | 313 +++++++++++++++++++++++---
be/src/vec/columns/column_dictionary.h | 18 +-
4 files changed, 289 insertions(+), 434 deletions(-)
diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt
index 2c02122883..615a9fff44 100644
--- a/be/src/olap/CMakeLists.txt
+++ b/be/src/olap/CMakeLists.txt
@@ -47,7 +47,6 @@ add_library(Olap STATIC
file_stream.cpp
generic_iterators.cpp
hll.cpp
- in_list_predicate.cpp
bloom_filter_predicate.cpp
in_stream.cpp
key_coder.cpp
diff --git a/be/src/olap/in_list_predicate.cpp b/be/src/olap/in_list_predicate.cpp
deleted file mode 100644
index 1b2ab20f3b..0000000000
--- a/be/src/olap/in_list_predicate.cpp
+++ /dev/null
@@ -1,391 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "olap/in_list_predicate.h"
-
-#include "olap/field.h"
-#include "runtime/string_value.hpp"
-#include "runtime/vectorized_row_batch.h"
-#include "vec/columns/column_dictionary.h"
-#include "vec/columns/column_nullable.h"
-#include "vec/columns/predicate_column.h"
-
-namespace doris {
-
-#define IN_LIST_PRED_CONSTRUCTOR(CLASS) \
- template <class T> \
- CLASS<T>::CLASS(uint32_t column_id, phmap::flat_hash_set<T>&& values, bool opposite) \
- : ColumnPredicate(column_id, opposite), _values(std::move(values)) {}
-
-IN_LIST_PRED_CONSTRUCTOR(InListPredicate)
-IN_LIST_PRED_CONSTRUCTOR(NotInListPredicate)
-
-#define IN_LIST_PRED_EVALUATE(CLASS, OP) \
- template <class T> \
- void CLASS<T>::evaluate(VectorizedRowBatch* batch) const { \
- uint16_t n = batch->size(); \
- if (n == 0) { \
- return; \
- } \
- uint16_t* sel = batch->selected(); \
- const T* col_vector = reinterpret_cast<const T*>(batch->column(_column_id)->col_data()); \
- uint16_t new_size = 0; \
- if (batch->column(_column_id)->no_nulls()) { \
- if (batch->selected_in_use()) { \
- for (uint16_t j = 0; j != n; ++j) { \
- uint16_t i = sel[j]; \
- sel[new_size] = i; \
- new_size += (_values.find(col_vector[i]) OP _values.end()); \
- } \
- batch->set_size(new_size); \
- } else { \
- for (uint16_t i = 0; i != n; ++i) { \
- sel[new_size] = i; \
- new_size += (_values.find(col_vector[i]) OP _values.end()); \
- } \
- if (new_size < n) { \
- batch->set_size(new_size); \
- batch->set_selected_in_use(true); \
- } \
- } \
- } else { \
- bool* is_null = batch->column(_column_id)->is_null(); \
- if (batch->selected_in_use()) { \
- for (uint16_t j = 0; j != n; ++j) { \
- uint16_t i = sel[j]; \
- sel[new_size] = i; \
- new_size += (!is_null[i] && _values.find(col_vector[i]) OP _values.end()); \
- } \
- batch->set_size(new_size); \
- } else { \
- for (int i = 0; i != n; ++i) { \
- sel[new_size] = i; \
- new_size += (!is_null[i] && _values.find(col_vector[i]) OP _values.end()); \
- } \
- if (new_size < n) { \
- batch->set_size(new_size); \
- batch->set_selected_in_use(true); \
- } \
- } \
- } \
- }
-
-IN_LIST_PRED_EVALUATE(InListPredicate, !=)
-IN_LIST_PRED_EVALUATE(NotInListPredicate, ==)
-
-#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP) \
- template <class T> \
- void CLASS<T>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { \
- uint16_t new_size = 0; \
- if (block->is_nullable()) { \
- for (uint16_t i = 0; i < *size; ++i) { \
- uint16_t idx = sel[i]; \
- sel[new_size] = idx; \
- const T* cell_value = reinterpret_cast<const T*>(block->cell(idx).cell_ptr()); \
- auto result = (!block->cell(idx).is_null() && _values.find(*cell_value) \
- OP _values.end()); \
- new_size += _opposite ? !result : result; \
- } \
- } else { \
- for (uint16_t i = 0; i < *size; ++i) { \
- uint16_t idx = sel[i]; \
- sel[new_size] = idx; \
- const T* cell_value = reinterpret_cast<const T*>(block->cell(idx).cell_ptr()); \
- auto result = (_values.find(*cell_value) OP _values.end()); \
- new_size += _opposite ? !result : result; \
- } \
- } \
- *size = new_size; \
- }
-
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(InListPredicate, !=)
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
-
-// todo(zeno) define interface in IColumn to simplify code
-#define IN_LIST_PRED_COLUMN_EVALUATE(CLASS, OP) \
- template <class T> \
- void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \
- uint16_t new_size = 0; \
- if (column.is_nullable()) { \
- auto* nullable_col = \
- vectorized::check_and_get_column<vectorized::ColumnNullable>(column); \
- auto& null_bitmap = reinterpret_cast<const vectorized::ColumnUInt8&>( \
- nullable_col->get_null_map_column()) \
- .get_data(); \
- auto& nested_col = nullable_col->get_nested_column(); \
- if (nested_col.is_column_dictionary()) { \
- if constexpr (std::is_same_v<T, StringValue>) { \
- auto* nested_col_ptr = vectorized::check_and_get_column< \
- vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \
- auto& data_array = nested_col_ptr->get_data(); \
- std::vector<bool> selected; \
- nested_col_ptr->find_codes(_values, selected); \
- for (uint16_t i = 0; i < *size; i++) { \
- uint16_t idx = sel[i]; \
- sel[new_size] = idx; \
- const auto& cell_value = data_array[idx]; \
- DCHECK(cell_value < (int64_t)selected.size()); \
- bool ret = !null_bitmap[idx] && (selected[cell_value] OP false); \
- new_size += _opposite ? !ret : ret; \
- } \
- } \
- } else { \
- auto* nested_col_ptr = \
- vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>( \
- nested_col); \
- auto& data_array = nested_col_ptr->get_data(); \
- for (uint16_t i = 0; i < *size; i++) { \
- uint16_t idx = sel[i]; \
- sel[new_size] = idx; \
- const auto& cell_value = reinterpret_cast<const T&>(data_array[idx]); \
- bool ret = !null_bitmap[idx] && (_values.find(cell_value) OP _values.end()); \
- new_size += _opposite ? !ret : ret; \
- } \
- } \
- } else if (column.is_column_dictionary()) { \
- if constexpr (std::is_same_v<T, StringValue>) { \
- auto& dict_col = \
- reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \
- column); \
- auto& data_array = dict_col.get_data(); \
- std::vector<bool> selected; \
- dict_col.find_codes(_values, selected); \
- for (uint16_t i = 0; i < *size; i++) { \
- uint16_t idx = sel[i]; \
- sel[new_size] = idx; \
- const auto& cell_value = data_array[idx]; \
- DCHECK(cell_value < (int64_t)selected.size()); \
- auto result = (selected[cell_value] OP false); \
- new_size += _opposite ? !result : result; \
- } \
- } \
- } else { \
- auto& number_column = reinterpret_cast<vectorized::PredicateColumnType<T>&>(column); \
- auto& data_array = number_column.get_data(); \
- for (uint16_t i = 0; i < *size; i++) { \
- uint16_t idx = sel[i]; \
- sel[new_size] = idx; \
- const auto& cell_value = reinterpret_cast<const T&>(data_array[idx]); \
- auto result = (_values.find(cell_value) OP _values.end()); \
- new_size += _opposite ? !result : result; \
- } \
- } \
- *size = new_size; \
- }
-
-IN_LIST_PRED_COLUMN_EVALUATE(InListPredicate, !=)
-IN_LIST_PRED_COLUMN_EVALUATE(NotInListPredicate, ==)
-
-#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP) \
- template <class T> \
- void CLASS<T>::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \
- const { \
- if (block->is_nullable()) { \
- for (uint16_t i = 0; i < size; ++i) { \
- if (flags[i]) continue; \
- uint16_t idx = sel[i]; \
- const T* cell_value = reinterpret_cast<const T*>(block->cell(idx).cell_ptr()); \
- auto result = (!block->cell(idx).is_null() && _values.find(*cell_value) \
- OP _values.end()); \
- flags[i] |= _opposite ? !result : result; \
- } \
- } else { \
- for (uint16_t i = 0; i < size; ++i) { \
- if (flags[i]) continue; \
- uint16_t idx = sel[i]; \
- const T* cell_value = reinterpret_cast<const T*>(block->cell(idx).cell_ptr()); \
- auto result = (_values.find(*cell_value) OP _values.end()); \
- flags[i] |= _opposite ? !result : result; \
- } \
- } \
- }
-
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(InListPredicate, !=)
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(NotInListPredicate, ==)
-
-#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(CLASS, OP) \
- template <class T> \
- void CLASS<T>::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \
- const { \
- if (block->is_nullable()) { \
- for (uint16_t i = 0; i < size; ++i) { \
- if (!flags[i]) continue; \
- uint16_t idx = sel[i]; \
- const T* cell_value = reinterpret_cast<const T*>(block->cell(idx).cell_ptr()); \
- auto result = (!block->cell(idx).is_null() && _values.find(*cell_value) \
- OP _values.end()); \
- flags[i] &= _opposite ? !result : result; \
- } \
- } else { \
- for (uint16_t i = 0; i < size; ++i) { \
- if (!flags[i]) continue; \
- uint16_t idx = sel[i]; \
- const T* cell_value = reinterpret_cast<const T*>(block->cell(idx).cell_ptr()); \
- auto result = (_values.find(*cell_value) OP _values.end()); \
- flags[i] &= _opposite ? !result : result; \
- } \
- } \
- }
-
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(InListPredicate, !=)
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(NotInListPredicate, ==)
-
-#define IN_LIST_PRED_BITMAP_EVALUATE(CLASS, OP) \
- template <class T> \
- Status CLASS<T>::evaluate(const Schema& schema, \
- const std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows, roaring::Roaring* result) const { \
- BitmapIndexIterator* iterator = iterators[_column_id]; \
- if (iterator == nullptr) { \
- return Status::OK(); \
- } \
- if (iterator->has_null_bitmap()) { \
- roaring::Roaring null_bitmap; \
- RETURN_IF_ERROR(iterator->read_null_bitmap(&null_bitmap)); \
- *result -= null_bitmap; \
- } \
- roaring::Roaring indices; \
- for (auto value : _values) { \
- bool exact_match; \
- Status s = iterator->seek_dictionary(&value, &exact_match); \
- rowid_t seeked_ordinal = iterator->current_ordinal(); \
- if (!s.is_not_found()) { \
- if (!s.ok()) { \
- return s; \
- } \
- if (exact_match) { \
- roaring::Roaring index; \
- RETURN_IF_ERROR(iterator->read_bitmap(seeked_ordinal, &index)); \
- indices |= index; \
- } \
- } \
- } \
- *result OP indices; \
- return Status::OK(); \
- }
-
-IN_LIST_PRED_BITMAP_EVALUATE(InListPredicate, &=)
-IN_LIST_PRED_BITMAP_EVALUATE(NotInListPredicate, -=)
-
-#define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS) \
- template CLASS<int8_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int8_t>&& values, \
- bool opposite); \
- template CLASS<int16_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int16_t>&& values, \
- bool opposite); \
- template CLASS<int32_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int32_t>&& values, \
- bool opposite); \
- template CLASS<int64_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int64_t>&& values, \
- bool opposite); \
- template CLASS<int128_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int128_t>&& values, \
- bool opposite); \
- template CLASS<float>::CLASS(uint32_t column_id, phmap::flat_hash_set<float>&& values, \
- bool opposite); \
- template CLASS<double>::CLASS(uint32_t column_id, phmap::flat_hash_set<double>&& values, \
- bool opposite); \
- template CLASS<decimal12_t>::CLASS(uint32_t column_id, \
- phmap::flat_hash_set<decimal12_t>&& values, bool opposite); \
- template CLASS<StringValue>::CLASS(uint32_t column_id, \
- phmap::flat_hash_set<StringValue>&& values, bool opposite); \
- template CLASS<uint24_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<uint24_t>&& values, \
- bool opposite); \
- template CLASS<uint64_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<uint64_t>&& values, \
- bool opposite);
-
-IN_LIST_PRED_CONSTRUCTOR_DECLARATION(InListPredicate)
-IN_LIST_PRED_CONSTRUCTOR_DECLARATION(NotInListPredicate)
-
-#define IN_LIST_PRED_EVALUATE_DECLARATION(CLASS) \
- template void CLASS<int8_t>::evaluate(VectorizedRowBatch* batch) const; \
- template void CLASS<int16_t>::evaluate(VectorizedRowBatch* batch) const; \
- template void CLASS<int32_t>::evaluate(VectorizedRowBatch* batch) const; \
- template void CLASS<int64_t>::evaluate(VectorizedRowBatch* batch) const; \
- template void CLASS<int128_t>::evaluate(VectorizedRowBatch* batch) const; \
- template void CLASS<float>::evaluate(VectorizedRowBatch* batch) const; \
- template void CLASS<double>::evaluate(VectorizedRowBatch* batch) const; \
- template void CLASS<decimal12_t>::evaluate(VectorizedRowBatch* batch) const; \
- template void CLASS<StringValue>::evaluate(VectorizedRowBatch* batch) const; \
- template void CLASS<uint24_t>::evaluate(VectorizedRowBatch* batch) const; \
- template void CLASS<uint64_t>::evaluate(VectorizedRowBatch* batch) const;
-
-IN_LIST_PRED_EVALUATE_DECLARATION(InListPredicate)
-IN_LIST_PRED_EVALUATE_DECLARATION(NotInListPredicate)
-
-#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(CLASS) \
- template void CLASS<int8_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \
- const; \
- template void CLASS<int16_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \
- const; \
- template void CLASS<int32_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \
- const; \
- template void CLASS<int64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \
- const; \
- template void CLASS<int128_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \
- const; \
- template void CLASS<float>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \
- template void CLASS<double>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \
- const; \
- template void CLASS<decimal12_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \
- const; \
- template void CLASS<StringValue>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \
- const; \
- template void CLASS<uint24_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \
- const; \
- template void CLASS<uint64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \
- const;
-
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(InListPredicate)
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotInListPredicate)
-
-#define IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(CLASS) \
- template Status CLASS<int8_t>::evaluate(const Schema& schema, \
- const std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const; \
- template Status CLASS<int16_t>::evaluate(const Schema& schema, \
- const std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const; \
- template Status CLASS<int32_t>::evaluate(const Schema& schema, \
- const std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const; \
- template Status CLASS<int64_t>::evaluate(const Schema& schema, \
- const std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const; \
- template Status CLASS<int128_t>::evaluate(const Schema& schema, \
- const std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const; \
- template Status CLASS<float>::evaluate(const Schema& schema, \
- const std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const; \
- template Status CLASS<double>::evaluate(const Schema& schema, \
- const std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const; \
- template Status CLASS<decimal12_t>::evaluate( \
- const Schema& schema, const std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const; \
- template Status CLASS<StringValue>::evaluate( \
- const Schema& schema, const std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const; \
- template Status CLASS<uint24_t>::evaluate(const Schema& schema, \
- const std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const; \
- template Status CLASS<uint64_t>::evaluate(const Schema& schema, \
- const std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const;
-
-IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(InListPredicate)
-IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(NotInListPredicate)
-
-} //namespace doris
\ No newline at end of file
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index cf2f4b2cdc..f41a7f51c5 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -21,11 +21,15 @@
#include <stdint.h>
#include <roaring/roaring.hh>
+#include <type_traits>
#include "decimal12.h"
#include "olap/column_predicate.h"
#include "runtime/string_value.h"
+#include "runtime/vectorized_row_batch.h"
#include "uint24.h"
+#include "vec/columns/column_dictionary.h"
+#include "vec/core/types.h"
namespace std {
// for string value
@@ -75,34 +79,289 @@ namespace doris {
class VectorizedRowBatch;
-// todo(wb) support evaluate_and,evaluate_or
-
-#define IN_LIST_PRED_CLASS_DEFINE(CLASS, PT) \
- template <class T> \
- class CLASS : public ColumnPredicate { \
- public: \
- CLASS(uint32_t column_id, phmap::flat_hash_set<T>&& values, bool is_opposite = false); \
- PredicateType type() const override { return PredicateType::PT; } \
- virtual void evaluate(VectorizedRowBatch* batch) const override; \
- void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const override; \
- void evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, \
- bool* flags) const override; \
- void evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size, \
- bool* flags) const override; \
- virtual Status evaluate(const Schema& schema, \
- const std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const override; \
- void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override; \
- void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \
- bool* flags) const override {} \
- void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \
- bool* flags) const override {} \
- \
- private: \
- phmap::flat_hash_set<T> _values; \
+template <class T, PredicateType PT>
+class InListPredicateBase : public ColumnPredicate {
+public:
+ InListPredicateBase(uint32_t column_id, phmap::flat_hash_set<T>&& values,
+ bool is_opposite = false)
+ : ColumnPredicate(column_id, is_opposite), _values(std::move(values)) {}
+
+ PredicateType type() const override { return PT; }
+
+ void evaluate(VectorizedRowBatch* batch) const override {
+ uint16_t n = batch->size();
+ if (!n) {
+ return;
+ }
+
+ uint16_t* sel = batch->selected();
+ const T* col_vector = reinterpret_cast<const T*>(batch->column(_column_id)->col_data());
+ uint16_t new_size = 0;
+ if (batch->column(_column_id)->no_nulls()) {
+ if (batch->selected_in_use()) {
+ for (uint16_t j = 0; j != n; ++j) {
+ uint16_t i = sel[j];
+ sel[new_size] = i;
+ new_size += _operator(_values.find(col_vector[i]), _values.end());
+ }
+ batch->set_size(new_size);
+ } else {
+ for (uint16_t i = 0; i != n; ++i) {
+ sel[new_size] = i;
+ new_size += _operator(_values.find(col_vector[i]), _values.end());
+ }
+ if (new_size < n) {
+ batch->set_size(new_size);
+ batch->set_selected_in_use(true);
+ }
+ }
+ } else {
+ bool* is_null = batch->column(_column_id)->is_null();
+ if (batch->selected_in_use()) {
+ for (uint16_t j = 0; j != n; ++j) {
+ uint16_t i = sel[j];
+ sel[new_size] = i;
+ new_size +=
+ (!is_null[i] && _operator(_values.find(col_vector[i]), _values.end()));
+ }
+ batch->set_size(new_size);
+ } else {
+ for (int i = 0; i != n; ++i) {
+ sel[new_size] = i;
+ new_size +=
+ (!is_null[i] && _operator(_values.find(col_vector[i]), _values.end()));
+ }
+ if (new_size < n) {
+ batch->set_size(new_size);
+ batch->set_selected_in_use(true);
+ }
+ }
+ }
};
-IN_LIST_PRED_CLASS_DEFINE(InListPredicate, IN_LIST)
-IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NOT_IN_LIST)
+ void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const override {
+ if (block->is_nullable()) {
+ _base_evaluate<true>(block, sel, size);
+ } else {
+ _base_evaluate<false>(block, sel, size);
+ }
+ }
+
+ void evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) const override {
+ if (block->is_nullable()) {
+ _base_evaluate<true, false>(block, sel, size, flags);
+ } else {
+ _base_evaluate<false, false>(block, sel, size, flags);
+ }
+ }
+
+ void evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size,
+ bool* flags) const override {
+ if (block->is_nullable()) {
+ _base_evaluate<true, true>(block, sel, size, flags);
+ } else {
+ _base_evaluate<false, true>(block, sel, size, flags);
+ }
+ }
+
+ Status evaluate(const Schema& schema, const std::vector<BitmapIndexIterator*>& iterators,
+ uint32_t num_rows, roaring::Roaring* result) const override {
+ BitmapIndexIterator* iterator = iterators[_column_id];
+ if (iterator == nullptr) {
+ return Status::OK();
+ }
+ if (iterator->has_null_bitmap()) {
+ roaring::Roaring null_bitmap;
+ RETURN_IF_ERROR(iterator->read_null_bitmap(&null_bitmap));
+ *result -= null_bitmap;
+ }
+ roaring::Roaring indices;
+ for (auto value : _values) {
+ bool exact_match;
+ Status s = iterator->seek_dictionary(&value, &exact_match);
+ rowid_t seeked_ordinal = iterator->current_ordinal();
+ if (!s.is_not_found()) {
+ if (!s.ok()) {
+ return s;
+ }
+ if (exact_match) {
+ roaring::Roaring index;
+ RETURN_IF_ERROR(iterator->read_bitmap(seeked_ordinal, &index));
+ indices |= index;
+ }
+ }
+ }
+
+ if constexpr (PT == PredicateType::IN_LIST) {
+ *result &= indices;
+ } else {
+ *result -= indices;
+ }
+
+ return Status::OK();
+ }
+
+ void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override {
+ if (column.is_nullable()) {
+ auto* nullable_col =
+ vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
+ auto& null_bitmap = reinterpret_cast<const vectorized::ColumnUInt8&>(
+ nullable_col->get_null_map_column())
+ .get_data();
+ auto& nested_col = nullable_col->get_nested_column();
+
+ if (_opposite) {
+ _base_evaluate<true, true>(&nested_col, &null_bitmap, sel, size);
+ } else {
+ _base_evaluate<true, false>(&nested_col, &null_bitmap, sel, size);
+ }
+ } else {
+ if (_opposite) {
+ _base_evaluate<false, true>(&column, nullptr, sel, size);
+ } else {
+ _base_evaluate<false, false>(&column, nullptr, sel, size);
+ }
+ }
+ }
+
+ // todo(wb) support evaluate_and,evaluate_or
+ void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size,
+ bool* flags) const override {
+ LOG(FATAL) << "IColumn not support in_list_predicate.evaluate_and now.";
+ }
+ void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size,
+ bool* flags) const override {
+ LOG(FATAL) << "IColumn not support in_list_predicate.evaluate_or now.";
+ }
+
+private:
+ template <typename LeftT, typename RightT>
+ bool _operator(const LeftT& lhs, const RightT& rhs) const {
+ if constexpr (PT == PredicateType::IN_LIST) {
+ return lhs != rhs;
+ }
+ return lhs == rhs;
+ }
+
+ template <bool is_nullable>
+ void _base_evaluate(const ColumnBlock* block, uint16_t* sel, uint16_t* size) const {
+ uint16_t new_size = 0;
+ for (uint16_t i = 0; i < *size; ++i) {
+ uint16_t idx = sel[i];
+ sel[new_size] = idx;
+ const T* cell_value = reinterpret_cast<const T*>(block->cell(idx).cell_ptr());
+ if constexpr (is_nullable) {
+ new_size += _opposite ^ (!block->cell(idx).is_null() &&
+ _operator(_values.find(*cell_value), _values.end()));
+ } else {
+ new_size += _opposite ^ _operator(_values.find(*cell_value), _values.end());
+ }
+ }
+ *size = new_size;
+ }
+
+ template <bool is_nullable, bool is_and>
+ void _base_evaluate(const ColumnBlock* block, const uint16_t* sel, uint16_t size,
+ bool* flags) const {
+ for (uint16_t i = 0; i < size; ++i) {
+ if (!flags[i]) {
+ continue;
+ }
+
+ uint16_t idx = sel[i];
+ const T* cell_value = reinterpret_cast<const T*>(block->cell(idx).cell_ptr());
+ auto result = true;
+ if constexpr (is_nullable) {
+ result &= !block->cell(idx).is_null();
+ }
+ result &= _operator(_values.find(*cell_value), _values.end());
+
+ if constexpr (is_and) {
+ flags[i] &= _opposite ^ result;
+ } else {
+ flags[i] |= _opposite ^ result;
+ }
+ }
+ }
+
+ template <bool is_nullable, bool is_opposite>
+ void _base_evaluate(const vectorized::IColumn* column,
+ const vectorized::PaddedPODArray<vectorized::UInt8>* null_map,
+ uint16_t* sel, uint16_t* size) const {
+ uint16_t new_size = 0;
+
+ if (column->is_column_dictionary()) {
+ if constexpr (std::is_same_v<T, StringValue>) {
+ auto* nested_col_ptr = vectorized::check_and_get_column<
+ vectorized::ColumnDictionary<vectorized::Int32>>(column);
+ auto& data_array = nested_col_ptr->get_data();
+ std::vector<vectorized::UInt8> selected;
+ nested_col_ptr->find_codes(_values, selected);
+
+ for (uint16_t i = 0; i < *size; i++) {
+ uint16_t idx = sel[i];
+ if constexpr (is_nullable) {
+ if ((*null_map)[idx]) {
+ if constexpr (is_opposite) {
+ sel[new_size++] = idx;
+ }
+ continue;
+ }
+ }
+
+ if constexpr (is_opposite != (PT == PredicateType::IN_LIST)) {
+ if (selected[data_array[idx]]) {
+ sel[new_size++] = idx;
+ }
+ } else {
+ if (!selected[data_array[idx]]) {
+ sel[new_size++] = idx;
+ }
+ }
+ }
+ } else {
+ LOG(FATAL) << "column_dictionary must use StringValue predicate.";
+ }
+ } else {
+ auto* nested_col_ptr =
+ vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(column);
+ auto& data_array = nested_col_ptr->get_data();
+
+ for (uint16_t i = 0; i < *size; i++) {
+ uint16_t idx = sel[i];
+ if constexpr (is_nullable) {
+ if ((*null_map)[idx]) {
+ if constexpr (is_opposite) {
+ sel[new_size++] = idx;
+ }
+ continue;
+ }
+ }
+
+ if constexpr (is_opposite != (PT == PredicateType::IN_LIST)) {
+ if (_operator(_values.find(reinterpret_cast<const T&>(data_array[idx])),
+ _values.end())) {
+ sel[new_size++] = idx;
+ }
+ } else {
+ if (!_operator(_values.find(reinterpret_cast<const T&>(data_array[idx])),
+ _values.end())) {
+ sel[new_size++] = idx;
+ }
+ }
+ }
+ }
+
+ *size = new_size;
+ }
+
+ phmap::flat_hash_set<T> _values;
+};
+
+template <class T>
+using InListPredicate = InListPredicateBase<T, PredicateType::IN_LIST>;
+
+template <class T>
+using NotInListPredicate = InListPredicateBase<T, PredicateType::NOT_IN_LIST>;
} //namespace doris
diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h
index 29db3a334c..d38bc4e049 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -21,21 +21,10 @@
#include <algorithm>
-#include "gutil/hash/string_hash.h"
-#include "olap/column_predicate.h"
-#include "olap/comparison_predicate.h"
-#include "olap/decimal12.h"
-#include "olap/in_list_predicate.h"
-#include "olap/uint24.h"
#include "runtime/string_value.h"
-#include "util/slice.h"
#include "vec/columns/column.h"
-#include "vec/columns/column_decimal.h"
-#include "vec/columns/column_impl.h"
#include "vec/columns/column_string.h"
-#include "vec/columns/column_vector.h"
#include "vec/columns/predicate_column.h"
-#include "vec/common/typeid_cast.h"
#include "vec/core/types.h"
namespace doris::vectorized {
@@ -259,7 +248,7 @@ public:
uint32_t get_hash_value(uint32_t idx) const { return _dict.get_hash_value(_codes[idx]); }
void find_codes(const phmap::flat_hash_set<StringValue>& values,
- std::vector<bool>& selected) const {
+ std::vector<vectorized::UInt8>& selected) const {
return _dict.find_codes(values, selected);
}
@@ -363,13 +352,12 @@ public:
}
void find_codes(const phmap::flat_hash_set<StringValue>& values,
- std::vector<bool>& selected) const {
+ std::vector<vectorized::UInt8>& selected) const {
size_t dict_word_num = _dict_data.size();
selected.resize(dict_word_num);
selected.assign(dict_word_num, false);
for (const auto& value : values) {
- auto it = _inverted_index.find(value);
- if (it != _inverted_index.end()) {
+ if (auto it = _inverted_index.find(value); it != _inverted_index.end()) {
selected[it->second] = true;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org