You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/07/27 08:12:29 UTC

[doris] branch master updated: [enhancement][Storage] refactor create predicate (#11017)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 461a31b1f6 [enhancement][Storage] refactor create predicate (#11017)
461a31b1f6 is described below

commit 461a31b1f6fe43d7ac2dc8b797f1a96d935689ca
Author: Pxl <95...@qq.com>
AuthorDate: Wed Jul 27 16:12:23 2022 +0800

    [enhancement][Storage] refactor create predicate (#11017)
---
 be/src/olap/column_predicate.h                     |  11 +
 be/src/olap/predicate_creator.h                    | 260 ++++++++++++
 be/src/olap/reader.cpp                             | 437 ++-------------------
 be/src/olap/reader.h                               |  21 +-
 be/src/vec/runtime/vdatetime_value.h               |   2 +-
 .../java/org/apache/doris/udf/UdfExecutorTest.java |   4 +-
 6 files changed, 311 insertions(+), 424 deletions(-)

diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index b2480fe672..2edf19d6da 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -51,7 +51,18 @@ struct PredicateTypeTraits {
         return (type == PredicateType::LT || type == PredicateType::LE ||
                 type == PredicateType::GT || type == PredicateType::GE);
     }
+
     static constexpr bool is_bloom_filter(PredicateType type) { return type == PredicateType::BF; }
+
+    static constexpr bool is_list(PredicateType type) {
+        return (type == PredicateType::IN_LIST || type == PredicateType::NOT_IN_LIST);
+    }
+
+    static constexpr bool is_comparison(PredicateType type) {
+        return (type == PredicateType::EQ || type == PredicateType::NE ||
+                type == PredicateType::LT || type == PredicateType::LE ||
+                type == PredicateType::GT || type == PredicateType::GE);
+    }
 };
 
 class ColumnPredicate {
diff --git a/be/src/olap/predicate_creator.h b/be/src/olap/predicate_creator.h
new file mode 100644
index 0000000000..1605d34337
--- /dev/null
+++ b/be/src/olap/predicate_creator.h
@@ -0,0 +1,260 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <charconv>
+
+#include "olap/column_predicate.h"
+#include "olap/comparison_predicate.h"
+#include "olap/in_list_predicate.h"
+#include "olap/olap_cond.h"
+#include "olap/tablet_schema.h"
+#include "util/date_func.h"
+
+namespace doris {
+
+template <typename ConditionType>
+class PredicateCreator {
+public:
+    virtual ColumnPredicate* create(const TabletColumn& column, int index,
+                                    const ConditionType& conditions, bool opposite,
+                                    MemPool* pool) = 0;
+    virtual ~PredicateCreator() = default;
+};
+
+template <typename CppType, PredicateType PT, typename ConditionType>
+class IntegerPredicateCreator : public PredicateCreator<ConditionType> {
+public:
+    ColumnPredicate* create(const TabletColumn& column, int index, const ConditionType& conditions,
+                            bool opposite, MemPool* pool) override {
+        if constexpr (PredicateTypeTraits::is_list(PT)) {
+            phmap::flat_hash_set<CppType> values;
+            for (const auto& condition : conditions) {
+                values.insert(convert(condition));
+            }
+            return new InListPredicateBase<CppType, PT>(index, std::move(values), opposite);
+        } else {
+            static_assert(PredicateTypeTraits::is_comparison(PT));
+            return new ComparisonPredicateBase<CppType, PT>(index, convert(conditions), opposite);
+        }
+    }
+
+private:
+    CppType convert(const std::string& condition) {
+        CppType value = 0;
+        std::from_chars(condition.data(), condition.data() + condition.size(), value);
+        return value;
+    }
+};
+
+template <typename CppType, PredicateType PT, typename ConditionType>
+class DecimalPredicateCreator : public PredicateCreator<ConditionType> {
+public:
+    ColumnPredicate* create(const TabletColumn& column, int index, const ConditionType& conditions,
+                            bool opposite, MemPool* pool) override {
+        if constexpr (PredicateTypeTraits::is_list(PT)) {
+            phmap::flat_hash_set<CppType> values;
+            for (const auto& condition : conditions) {
+                values.insert(convert(column, condition));
+            }
+            return new InListPredicateBase<CppType, PT>(index, std::move(values), opposite);
+        } else {
+            static_assert(PredicateTypeTraits::is_comparison(PT));
+            return new ComparisonPredicateBase<CppType, PT>(index, convert(column, conditions),
+                                                            opposite);
+        }
+    }
+
+private:
+    CppType convert(const TabletColumn& column, const std::string& condition) {
+        StringParser::ParseResult result = StringParser::ParseResult::PARSE_SUCCESS;
+        // return CppType value cast from int128_t
+        return StringParser::string_to_decimal<int128_t>(
+                condition.data(), condition.size(), column.precision(), column.frac(), &result);
+    }
+};
+
+template <PredicateType PT, typename ConditionType>
+class StringPredicateCreator : public PredicateCreator<ConditionType> {
+public:
+    StringPredicateCreator(bool should_padding) : _should_padding(should_padding) {};
+
+    ColumnPredicate* create(const TabletColumn& column, int index, const ConditionType& conditions,
+                            bool opposite, MemPool* pool) override {
+        if constexpr (PredicateTypeTraits::is_list(PT)) {
+            phmap::flat_hash_set<StringValue> values;
+            for (const auto& condition : conditions) {
+                values.insert(convert(column, condition, pool));
+            }
+            return new InListPredicateBase<StringValue, PT>(index, std::move(values), opposite);
+        } else {
+            static_assert(PredicateTypeTraits::is_comparison(PT));
+            return new ComparisonPredicateBase<StringValue, PT>(
+                    index, convert(column, conditions, pool), opposite);
+        }
+    }
+
+private:
+    bool _should_padding;
+    StringValue convert(const TabletColumn& column, const std::string& condition, MemPool* pool) {
+        size_t length = condition.length();
+        if (_should_padding) {
+            length = std::max(static_cast<size_t>(column.length()), length);
+        }
+
+        char* buffer = reinterpret_cast<char*>(pool->allocate(length));
+        memset(buffer, 0, length);
+        memory_copy(buffer, condition.data(), condition.length());
+
+        return StringValue(buffer, length);
+    }
+};
+
+template <typename CppType, PredicateType PT, typename ConditionType>
+struct CustomPredicateCreator : public PredicateCreator<ConditionType> {
+public:
+    CustomPredicateCreator(const std::function<CppType(const std::string& condition)>& convert)
+            : _convert(convert) {};
+
+    ColumnPredicate* create(const TabletColumn& column, int index, const ConditionType& conditions,
+                            bool opposite, MemPool* pool) override {
+        if constexpr (PredicateTypeTraits::is_list(PT)) {
+            phmap::flat_hash_set<CppType> values;
+            for (const auto& condition : conditions) {
+                values.insert(_convert(condition));
+            }
+            return new InListPredicateBase<CppType, PT>(index, std::move(values), opposite);
+        } else {
+            static_assert(PredicateTypeTraits::is_comparison(PT));
+            return new ComparisonPredicateBase<CppType, PT>(index, _convert(conditions), opposite);
+        }
+    }
+
+private:
+    std::function<CppType(const std::string& condition)> _convert;
+};
+
+template <PredicateType PT, typename ConditionType>
+inline std::unique_ptr<PredicateCreator<ConditionType>> get_creator(const FieldType& type) {
+    switch (type) {
+    case OLAP_FIELD_TYPE_TINYINT: {
+        return std::make_unique<IntegerPredicateCreator<int8_t, PT, ConditionType>>();
+    }
+    case OLAP_FIELD_TYPE_SMALLINT: {
+        return std::make_unique<IntegerPredicateCreator<int16_t, PT, ConditionType>>();
+    }
+    case OLAP_FIELD_TYPE_INT: {
+        return std::make_unique<IntegerPredicateCreator<int32_t, PT, ConditionType>>();
+    }
+    case OLAP_FIELD_TYPE_BIGINT: {
+        return std::make_unique<IntegerPredicateCreator<int64_t, PT, ConditionType>>();
+    }
+    case OLAP_FIELD_TYPE_LARGEINT: {
+        return std::make_unique<IntegerPredicateCreator<int128_t, PT, ConditionType>>();
+    }
+    case OLAP_FIELD_TYPE_DECIMAL: {
+        return std::make_unique<CustomPredicateCreator<decimal12_t, PT, ConditionType>>(
+                [](const std::string& condition) {
+                    decimal12_t value = {0, 0};
+                    value.from_string(condition);
+                    return value;
+                });
+    }
+    case OLAP_FIELD_TYPE_DECIMAL32: {
+        return std::make_unique<DecimalPredicateCreator<int32_t, PT, ConditionType>>();
+    }
+    case OLAP_FIELD_TYPE_DECIMAL64: {
+        return std::make_unique<DecimalPredicateCreator<int64_t, PT, ConditionType>>();
+    }
+    case OLAP_FIELD_TYPE_DECIMAL128: {
+        return std::make_unique<DecimalPredicateCreator<int128_t, PT, ConditionType>>();
+    }
+    case OLAP_FIELD_TYPE_CHAR: {
+        return std::make_unique<StringPredicateCreator<PT, ConditionType>>(true);
+    }
+    case OLAP_FIELD_TYPE_VARCHAR:
+    case OLAP_FIELD_TYPE_STRING: {
+        return std::make_unique<StringPredicateCreator<PT, ConditionType>>(false);
+    }
+    case OLAP_FIELD_TYPE_DATE: {
+        return std::make_unique<CustomPredicateCreator<uint24_t, PT, ConditionType>>(
+                timestamp_from_date);
+    }
+    case OLAP_FIELD_TYPE_DATEV2: {
+        return std::make_unique<CustomPredicateCreator<uint32_t, PT, ConditionType>>(
+                timestamp_from_date_v2);
+    }
+    case OLAP_FIELD_TYPE_DATETIME: {
+        return std::make_unique<CustomPredicateCreator<uint64_t, PT, ConditionType>>(
+                timestamp_from_datetime);
+    }
+    case OLAP_FIELD_TYPE_DATETIMEV2: {
+        return std::make_unique<CustomPredicateCreator<uint64_t, PT, ConditionType>>(
+                timestamp_from_datetime_v2);
+    }
+    case OLAP_FIELD_TYPE_BOOL: {
+        return std::make_unique<CustomPredicateCreator<bool, PT, ConditionType>>(
+                [](const std::string& condition) {
+                    int32_t ivalue = 0;
+                    auto result = std::from_chars(condition.data(),
+                                                  condition.data() + condition.size(), ivalue);
+                    if (result.ec == std::errc()) {
+                        return bool(ivalue);
+                    }
+
+                    StringParser::ParseResult parse_result;
+                    bool value = StringParser::string_to_bool(condition.data(), condition.size(),
+                                                              &parse_result);
+                    return value;
+                });
+    }
+    default:
+        return nullptr;
+    }
+}
+
+template <PredicateType PT, typename ConditionType>
+inline ColumnPredicate* create_predicate(const TabletColumn& column, int index,
+                                         const ConditionType& conditions, bool opposite,
+                                         MemPool* pool) {
+    return get_creator<PT, ConditionType>(column.type())
+            ->create(column, index, conditions, opposite, pool);
+}
+
+template <PredicateType PT>
+inline ColumnPredicate* create_comparison_predicate(const TabletColumn& column, int index,
+                                                    const std::string& condition, bool opposite,
+                                                    MemPool* pool) {
+    static_assert(PredicateTypeTraits::is_comparison(PT));
+    return create_predicate<PT, std::string>(column, index, condition, opposite, pool);
+}
+
+template <PredicateType PT>
+inline ColumnPredicate* create_list_predicate(const TabletColumn& column, int index,
+                                              const std::vector<std::string>& conditions,
+                                              bool opposite, MemPool* pool) {
+    if (column.type() == OLAP_FIELD_TYPE_BOOL) {
+        LOG(FATAL) << "Failed to create list preacate! input column type is invalid";
+        return nullptr;
+    }
+    static_assert(PredicateTypeTraits::is_list(PT));
+    return create_predicate<PT, std::vector<std::string>>(column, index, conditions, opposite,
+                                                          pool);
+}
+
+} //namespace doris
diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp
index b098e24a70..4e14e5daaf 100644
--- a/be/src/olap/reader.cpp
+++ b/be/src/olap/reader.cpp
@@ -19,10 +19,6 @@
 
 #include <parallel_hashmap/phmap.h>
 
-#include <boost/algorithm/string/case_conv.hpp>
-#include <charconv>
-#include <unordered_set>
-
 #include "common/status.h"
 #include "olap/bloom_filter_predicate.h"
 #include "olap/collect_iterator.h"
@@ -31,19 +27,16 @@
 #include "olap/like_column_predicate.h"
 #include "olap/null_predicate.h"
 #include "olap/olap_common.h"
+#include "olap/predicate_creator.h"
 #include "olap/row.h"
 #include "olap/row_cursor.h"
 #include "olap/schema.h"
 #include "olap/tablet.h"
 #include "runtime/mem_pool.h"
-#include "util/date_func.h"
 #include "util/mem_util.hpp"
+#include "util/string_util.h"
 #include "vec/data_types/data_type_decimal.h"
 
-using std::nothrow;
-using std::set;
-using std::vector;
-
 namespace doris {
 
 void TabletReader::ReaderParams::check_validation() const {
@@ -294,7 +287,7 @@ Status TabletReader::_init_return_columns(const ReaderParams& read_params) {
 
         if (!_delete_handler.empty()) {
             // We need to fetch columns which there are deletion conditions on them.
-            set<uint32_t> column_set(_return_columns.begin(), _return_columns.end());
+            std::set<uint32_t> column_set(_return_columns.begin(), _return_columns.end());
             for (const auto& conds : _delete_handler.get_delete_conditions()) {
                 for (const auto& cond_column : conds.del_cond->columns()) {
                     if (column_set.find(cond_column.first) == column_set.end()) {
@@ -479,146 +472,6 @@ void TabletReader::_init_conditions_param(const ReaderParams& read_params) {
     }
 }
 
-#define COMPARISON_PREDICATE_CONDITION_VALUE(NAME, PREDICATE)                                      \
-    ColumnPredicate* TabletReader::_new_##NAME##_pred(                                             \
-            const TabletColumn& column, int index, const std::string& cond, bool opposite) const { \
-        ColumnPredicate* predicate = nullptr;                                                      \
-        switch (column.type()) {                                                                   \
-        case OLAP_FIELD_TYPE_TINYINT: {                                                            \
-            int8_t value = 0;                                                                      \
-            std::from_chars(cond.data(), cond.data() + cond.size(), value);                        \
-            predicate = new PREDICATE<int8_t>(index, value, opposite);                             \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_SMALLINT: {                                                           \
-            int16_t value = 0;                                                                     \
-            std::from_chars(cond.data(), cond.data() + cond.size(), value);                        \
-            predicate = new PREDICATE<int16_t>(index, value, opposite);                            \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_DECIMAL32: {                                                          \
-            int32_t value = 0;                                                                     \
-            StringParser::ParseResult result = StringParser::ParseResult::PARSE_SUCCESS;           \
-            value = (int32_t)StringParser::string_to_decimal<int128_t>(                            \
-                    cond.data(), cond.size(), column.precision(), column.frac(), &result);         \
-                                                                                                   \
-            predicate = new PREDICATE<int32_t>(index, value, opposite);                            \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_DECIMAL64: {                                                          \
-            int64_t value = 0;                                                                     \
-            StringParser::ParseResult result = StringParser::ParseResult::PARSE_SUCCESS;           \
-            value = (int64_t)StringParser::string_to_decimal<int128_t>(                            \
-                    cond.data(), cond.size(), column.precision(), column.frac(), &result);         \
-            predicate = new PREDICATE<int64_t>(index, value, opposite);                            \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_DECIMAL128: {                                                         \
-            int128_t value = 0;                                                                    \
-            StringParser::ParseResult result;                                                      \
-            value = StringParser::string_to_decimal<int128_t>(                                     \
-                    cond.data(), cond.size(), column.precision(), column.frac(), &result);         \
-            predicate = new PREDICATE<int128_t>(index, value, opposite);                           \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_INT: {                                                                \
-            int32_t value = 0;                                                                     \
-            std::from_chars(cond.data(), cond.data() + cond.size(), value);                        \
-            predicate = new PREDICATE<int32_t>(index, value, opposite);                            \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_BIGINT: {                                                             \
-            int64_t value = 0;                                                                     \
-            std::from_chars(cond.data(), cond.data() + cond.size(), value);                        \
-            predicate = new PREDICATE<int64_t>(index, value, opposite);                            \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_LARGEINT: {                                                           \
-            int128_t value = 0;                                                                    \
-            StringParser::ParseResult result;                                                      \
-            value = StringParser::string_to_int<__int128>(cond.data(), cond.size(), &result);      \
-            predicate = new PREDICATE<int128_t>(index, value, opposite);                           \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_DECIMAL: {                                                            \
-            decimal12_t value = {0, 0};                                                            \
-            value.from_string(cond);                                                               \
-            predicate = new PREDICATE<decimal12_t>(index, value, opposite);                        \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_CHAR: {                                                               \
-            StringValue value;                                                                     \
-            size_t length = std::max(static_cast<size_t>(column.length()), cond.length());         \
-            char* buffer = reinterpret_cast<char*>(_predicate_mem_pool->allocate(length));         \
-            memset(buffer, 0, length);                                                             \
-            memory_copy(buffer, cond.c_str(), cond.length());                                      \
-            value.len = length;                                                                    \
-            value.ptr = buffer;                                                                    \
-            predicate = new PREDICATE<StringValue>(index, value, opposite);                        \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_VARCHAR:                                                              \
-        case OLAP_FIELD_TYPE_STRING: {                                                             \
-            StringValue value;                                                                     \
-            int32_t length = cond.length();                                                        \
-            char* buffer = reinterpret_cast<char*>(_predicate_mem_pool->allocate(length));         \
-            memory_copy(buffer, cond.c_str(), length);                                             \
-            value.len = length;                                                                    \
-            value.ptr = buffer;                                                                    \
-            predicate = new PREDICATE<StringValue>(index, value, opposite);                        \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_DATE: {                                                               \
-            uint24_t value = timestamp_from_date(cond);                                            \
-            predicate = new PREDICATE<uint24_t>(index, value, opposite);                           \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_DATEV2: {                                                             \
-            uint32_t value = timestamp_from_date_v2(cond);                                         \
-            predicate = new PREDICATE<uint32_t>(index, value, opposite);                           \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_DATETIMEV2: {                                                         \
-            uint64_t value = timestamp_from_datetime_v2(cond);                                     \
-            predicate = new PREDICATE<uint64_t>(index, value, opposite);                           \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_DATETIME: {                                                           \
-            uint64_t value = timestamp_from_datetime(cond);                                        \
-            predicate = new PREDICATE<uint64_t>(index, value, opposite);                           \
-            break;                                                                                 \
-        }                                                                                          \
-        case OLAP_FIELD_TYPE_BOOL: {                                                               \
-            int32_t ivalue = 0;                                                                    \
-            auto result = std::from_chars(cond.data(), cond.data() + cond.size(), ivalue);         \
-            bool value = false;                                                                    \
-            if (result.ec == std::errc()) {                                                        \
-                if (ivalue == 0) {                                                                 \
-                    value = false;                                                                 \
-                } else {                                                                           \
-                    value = true;                                                                  \
-                }                                                                                  \
-            } else {                                                                               \
-                StringParser::ParseResult parse_result;                                            \
-                value = StringParser::string_to_bool(cond.data(), cond.size(), &parse_result);     \
-            }                                                                                      \
-            predicate = new PREDICATE<bool>(index, value, opposite);                               \
-            break;                                                                                 \
-        }                                                                                          \
-        default:                                                                                   \
-            break;                                                                                 \
-        }                                                                                          \
-                                                                                                   \
-        return predicate;                                                                          \
-    }
-
-COMPARISON_PREDICATE_CONDITION_VALUE(eq, EqualPredicate)
-COMPARISON_PREDICATE_CONDITION_VALUE(ne, NotEqualPredicate)
-COMPARISON_PREDICATE_CONDITION_VALUE(lt, LessPredicate)
-COMPARISON_PREDICATE_CONDITION_VALUE(le, LessEqualPredicate)
-COMPARISON_PREDICATE_CONDITION_VALUE(gt, GreaterPredicate)
-COMPARISON_PREDICATE_CONDITION_VALUE(ge, GreaterEqualPredicate)
-
 ColumnPredicate* TabletReader::_parse_to_predicate(
         const std::pair<std::string, std::shared_ptr<IBloomFilterFuncBase>>& bloom_filter) {
     int32_t index = _tablet_schema->field_index(bloom_filter.first);
@@ -650,265 +503,41 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const TCondition& condition,
     }
 
     const TabletColumn& column = _tablet_schema->column(index);
-    ColumnPredicate* predicate = nullptr;
-
-    if ((condition.condition_op == "*=" || condition.condition_op == "!*=" ||
-         condition.condition_op == "=" || condition.condition_op == "!=") &&
-        condition.condition_values.size() == 1) {
-        predicate = condition.condition_op == "*=" || condition.condition_op == "="
-                            ? _new_eq_pred(column, index, condition.condition_values[0], opposite)
-                            : _new_ne_pred(column, index, condition.condition_values[0], opposite);
+
+    if (to_lower(condition.condition_op) == "is") {
+        return new NullPredicate(index, to_lower(condition.condition_values[0]) == "null",
+                                 opposite);
+    }
+
+    if ((condition.condition_op == "*=" || condition.condition_op == "!*=") &&
+        condition.condition_values.size() > 1) {
+        decltype(create_list_predicate<PredicateType::UNKNOWN>)* create = nullptr;
+
+        if (condition.condition_op == "*=") {
+            create = create_list_predicate<PredicateType::IN_LIST>;
+        } else {
+            create = create_list_predicate<PredicateType::NOT_IN_LIST>;
+        }
+        return create(column, index, condition.condition_values, opposite,
+                      _predicate_mem_pool.get());
+    }
+
+    decltype(create_comparison_predicate<PredicateType::UNKNOWN>)* create = nullptr;
+    if (condition.condition_op == "*=" || condition.condition_op == "=") {
+        create = create_comparison_predicate<PredicateType::EQ>;
+    } else if (condition.condition_op == "!*=" || condition.condition_op == "!=") {
+        create = create_comparison_predicate<PredicateType::NE>;
     } else if (condition.condition_op == "<<") {
-        predicate = _new_lt_pred(column, index, condition.condition_values[0], opposite);
+        create = create_comparison_predicate<PredicateType::LT>;
     } else if (condition.condition_op == "<=") {
-        predicate = _new_le_pred(column, index, condition.condition_values[0], opposite);
+        create = create_comparison_predicate<PredicateType::LE>;
     } else if (condition.condition_op == ">>") {
-        predicate = _new_gt_pred(column, index, condition.condition_values[0], opposite);
+        create = create_comparison_predicate<PredicateType::GT>;
     } else if (condition.condition_op == ">=") {
-        predicate = _new_ge_pred(column, index, condition.condition_values[0], opposite);
-    } else if ((condition.condition_op == "*=" || condition.condition_op == "!*=") &&
-               condition.condition_values.size() > 1) {
-        switch (column.type()) {
-        case OLAP_FIELD_TYPE_TINYINT: {
-            phmap::flat_hash_set<int8_t> values;
-            int8_t value = 0;
-            for (auto& cond_val : condition.condition_values) {
-                std::from_chars(cond_val.data(), cond_val.data() + cond_val.size(), value);
-                values.insert(value);
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<int8_t>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<int8_t>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_SMALLINT: {
-            phmap::flat_hash_set<int16_t> values;
-            int16_t value = 0;
-            for (auto& cond_val : condition.condition_values) {
-                std::from_chars(cond_val.data(), cond_val.data() + cond_val.size(), value);
-                values.insert(value);
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<int16_t>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<int16_t>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_DECIMAL32: {
-            phmap::flat_hash_set<int32_t> values;
-            for (auto& cond_val : condition.condition_values) {
-                StringParser::ParseResult result = StringParser::ParseResult::PARSE_SUCCESS;
-                int128_t val = StringParser::string_to_decimal<int128_t>(
-                        cond_val.data(), cond_val.size(), column.precision(), column.frac(),
-                        &result);
-                if (result == StringParser::ParseResult::PARSE_SUCCESS) {
-                    values.insert((int32_t)val);
-                }
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<int32_t>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<int32_t>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_DECIMAL64: {
-            phmap::flat_hash_set<int64_t> values;
-            for (auto& cond_val : condition.condition_values) {
-                StringParser::ParseResult result;
-                int128_t val = StringParser::string_to_decimal<int128_t>(
-                        cond_val.data(), cond_val.size(), column.precision(), column.frac(),
-                        &result);
-                if (result == StringParser::ParseResult::PARSE_SUCCESS) {
-                    values.insert((int64_t)val);
-                }
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<int64_t>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<int64_t>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_DECIMAL128: {
-            phmap::flat_hash_set<int128_t> values;
-            int128_t val;
-            for (auto& cond_val : condition.condition_values) {
-                StringParser::ParseResult result = StringParser::ParseResult::PARSE_SUCCESS;
-                val = StringParser::string_to_decimal<int128_t>(cond_val.data(), cond_val.size(),
-                                                                column.precision(), column.frac(),
-                                                                &result);
-                if (result == StringParser::ParseResult::PARSE_SUCCESS) {
-                    values.insert(val);
-                }
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<int128_t>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<int128_t>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_INT: {
-            phmap::flat_hash_set<int32_t> values;
-            int32_t value = 0;
-            for (auto& cond_val : condition.condition_values) {
-                std::from_chars(cond_val.data(), cond_val.data() + cond_val.size(), value);
-                values.insert(value);
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<int32_t>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<int32_t>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_BIGINT: {
-            phmap::flat_hash_set<int64_t> values;
-            int64_t value = 0;
-            for (auto& cond_val : condition.condition_values) {
-                std::from_chars(cond_val.data(), cond_val.data() + cond_val.size(), value);
-                values.insert(value);
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<int64_t>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<int64_t>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_LARGEINT: {
-            phmap::flat_hash_set<int128_t> values;
-            int128_t value = 0;
-            StringParser::ParseResult result;
-            for (auto& cond_val : condition.condition_values) {
-                value = StringParser::string_to_int<__int128>(cond_val.c_str(), cond_val.size(),
-                                                              &result);
-                values.insert(value);
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<int128_t>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<int128_t>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_DECIMAL: {
-            phmap::flat_hash_set<decimal12_t> values;
-            for (auto& cond_val : condition.condition_values) {
-                decimal12_t value = {0, 0};
-                value.from_string(cond_val);
-                values.insert(value);
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<decimal12_t>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<decimal12_t>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_CHAR: {
-            phmap::flat_hash_set<StringValue> values;
-            for (auto& cond_val : condition.condition_values) {
-                StringValue value;
-                size_t length = std::max(static_cast<size_t>(column.length()), cond_val.length());
-                char* buffer = reinterpret_cast<char*>(_predicate_mem_pool->allocate(length));
-                memset(buffer, 0, length);
-                memory_copy(buffer, cond_val.c_str(), cond_val.length());
-                value.len = length;
-                value.ptr = buffer;
-                values.insert(value);
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<StringValue>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<StringValue>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_VARCHAR:
-        case OLAP_FIELD_TYPE_STRING: {
-            phmap::flat_hash_set<StringValue> values;
-            for (auto& cond_val : condition.condition_values) {
-                StringValue value;
-                int32_t length = cond_val.length();
-                char* buffer = reinterpret_cast<char*>(_predicate_mem_pool->allocate(length));
-                memory_copy(buffer, cond_val.c_str(), length);
-                value.len = length;
-                value.ptr = buffer;
-                values.insert(value);
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<StringValue>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<StringValue>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_DATE: {
-            phmap::flat_hash_set<uint24_t> values;
-            for (auto& cond_val : condition.condition_values) {
-                uint24_t value = timestamp_from_date(cond_val);
-                values.insert(value);
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<uint24_t>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<uint24_t>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_DATEV2: {
-            phmap::flat_hash_set<uint32_t> values;
-            for (auto& cond_val : condition.condition_values) {
-                uint32_t value = timestamp_from_date_v2(cond_val);
-                values.insert(value);
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<uint32_t>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<uint32_t>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_DATETIMEV2: {
-            phmap::flat_hash_set<uint64_t> values;
-            for (auto& cond_val : condition.condition_values) {
-                uint64_t value = timestamp_from_datetime_v2(cond_val);
-                values.insert(value);
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<uint64_t>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<uint64_t>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        case OLAP_FIELD_TYPE_DATETIME: {
-            phmap::flat_hash_set<uint64_t> values;
-            for (auto& cond_val : condition.condition_values) {
-                uint64_t value = timestamp_from_datetime(cond_val);
-                values.insert(value);
-            }
-            if (condition.condition_op == "*=") {
-                predicate = new InListPredicate<uint64_t>(index, std::move(values), opposite);
-            } else {
-                predicate = new NotInListPredicate<uint64_t>(index, std::move(values), opposite);
-            }
-            break;
-        }
-        // OLAP_FIELD_TYPE_BOOL is not valid in this case.
-        default:
-            break;
-        }
-    } else if (boost::to_lower_copy(condition.condition_op) == "is") {
-        predicate = new NullPredicate(
-                index, boost::to_lower_copy(condition.condition_values[0]) == "null", opposite);
+        create = create_comparison_predicate<PredicateType::GE>;
     }
-    return predicate;
+    return create(column, index, condition.condition_values[0], opposite,
+                  _predicate_mem_pool.get());
 }
 void TabletReader::_init_load_bf_columns(const ReaderParams& read_params) {
     _init_load_bf_columns(read_params, &_conditions, &_load_bf_columns);
diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h
index 4739e190e8..b14d77a8b8 100644
--- a/be/src/olap/reader.h
+++ b/be/src/olap/reader.h
@@ -17,18 +17,17 @@
 
 #pragma once
 
-#include <gen_cpp/PaloInternalService_types.h>
 #include <thrift/protocol/TDebugProtocol.h>
 
 #include "exprs/bloomfilter_predicate.h"
 #include "exprs/function_filter.h"
-#include "olap/column_predicate.h"
 #include "olap/delete_handler.h"
 #include "olap/olap_cond.h"
 #include "olap/row_cursor.h"
 #include "olap/rowset/rowset_reader.h"
 #include "olap/tablet.h"
 #include "olap/tablet_schema.h"
+#include "util/date_func.h"
 #include "util/runtime_profile.h"
 
 namespace doris {
@@ -102,6 +101,9 @@ public:
 
     virtual ~TabletReader();
 
+    TabletReader(const TabletReader&) = delete;
+    void operator=(const TabletReader&) = delete;
+
     // Initialize TabletReader with tablet, data version and fetch range.
     virtual Status init(const ReaderParams& read_params);
 
@@ -150,19 +152,6 @@ protected:
 
     void _init_conditions_param(const ReaderParams& read_params);
 
-    ColumnPredicate* _new_eq_pred(const TabletColumn& column, int index, const std::string& cond,
-                                  bool opposite) const;
-    ColumnPredicate* _new_ne_pred(const TabletColumn& column, int index, const std::string& cond,
-                                  bool opposite) const;
-    ColumnPredicate* _new_lt_pred(const TabletColumn& column, int index, const std::string& cond,
-                                  bool opposite) const;
-    ColumnPredicate* _new_le_pred(const TabletColumn& column, int index, const std::string& cond,
-                                  bool opposite) const;
-    ColumnPredicate* _new_gt_pred(const TabletColumn& column, int index, const std::string& cond,
-                                  bool opposite) const;
-    ColumnPredicate* _new_ge_pred(const TabletColumn& column, int index, const std::string& cond,
-                                  bool opposite) const;
-
     ColumnPredicate* _parse_to_predicate(const TCondition& condition, bool opposite = false) const;
 
     ColumnPredicate* _parse_to_predicate(
@@ -221,8 +210,6 @@ protected:
 
     uint64_t _merged_rows = 0;
     OlapReaderStatistics _stats;
-
-    DISALLOW_COPY_AND_ASSIGN(TabletReader);
 };
 
 } // namespace doris
diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h
index 5b6663c796..e00d5bc0c8 100644
--- a/be/src/vec/runtime/vdatetime_value.h
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -237,7 +237,7 @@ public:
     // olap storage layer date data format:
     // 64 bits binary data [year(remaining bits), month(4 bits), day(5 bits)]
     // execute layer date/datetime and olap storage layer datetime data format:
-    // 8 bytes interger data [year(remaining digits), month(2 digits), day(2 digits), hour(2 digits), minute(2 digits) ,second(2 digits)]
+    // 8 bytes integer data [year(remaining digits), month(2 digits), day(2 digits), hour(2 digits), minute(2 digits) ,second(2 digits)]
 
     static VecDateTimeValue create_from_olap_date(uint64_t value) {
         VecDateTimeValue date;
diff --git a/fe/java-udf/src/test/java/org/apache/doris/udf/UdfExecutorTest.java b/fe/java-udf/src/test/java/org/apache/doris/udf/UdfExecutorTest.java
index 814839f2aa..f48df0d0c4 100644
--- a/fe/java-udf/src/test/java/org/apache/doris/udf/UdfExecutorTest.java
+++ b/fe/java-udf/src/test/java/org/apache/doris/udf/UdfExecutorTest.java
@@ -218,8 +218,8 @@ public class UdfExecutorTest {
             assert (UdfUtils.UNSAFE.getByte(outputNull + i) == 0);
             UdfUtils.copyMemory(null, outputBuffer + 16 * i, bytes, UdfUtils.BYTE_ARRAY_OFFSET, bytes.length);
 
-            BigInteger interger = new BigInteger(convertByteOrder(bytes));
-            BigDecimal result = new BigDecimal(interger, 9);
+            BigInteger integer = new BigInteger(convertByteOrder(bytes));
+            BigDecimal result = new BigDecimal(integer, 9);
             assert (result.equals(decimalArray[i]));
         }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org