You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/07/21 16:57:51 UTC

[doris] branch branch-2.0 updated (a1b4338ecb -> a31be851df)

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a change to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


    from a1b4338ecb [fix](compaction) fix time series compaction point policy (#21670)
     new edf236f574 [fix](planner)shouldn't force push down conjuncts for union statement (#22079)
     new 1f1cc3b04d [fix](nereids)PredicatePropagation only support integer types for now (#22096)
     new a31be851df [Bug](node) fix partition sort node forget handle some type of key in hashmap  (#22037)

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/vec/exec/vpartition_sort_node.h             | 130 ++++++++++++++++++++-
 .../rules/rewrite/PredicatePropagation.java        |  22 ++--
 .../apache/doris/planner/SingleNodePlanner.java    |  12 --
 .../test_push_conjuncts_inlineview.groovy          |   2 +-
 .../infer_predicate/infer_predicate.groovy         |   7 ++
 5 files changed, 143 insertions(+), 30 deletions(-)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 03/03: [Bug](node) fix partition sort node forget handle some type of key in hashmap (#22037)

Posted by kx...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit a31be851df41b2fddf1c182c7f6ffba01e59dd70
Author: zhangstar333 <87...@users.noreply.github.com>
AuthorDate: Fri Jul 21 23:30:40 2023 +0800

    [Bug](node) fix partition sort node forget handle some type of key in hashmap  (#22037)
    
    * [enhancement](repeat) add filter in repeat node in BE
    
    * update
---
 be/src/vec/exec/vpartition_sort_node.h | 130 +++++++++++++++++++++++++++++++--
 1 file changed, 125 insertions(+), 5 deletions(-)

diff --git a/be/src/vec/exec/vpartition_sort_node.h b/be/src/vec/exec/vpartition_sort_node.h
index 45b44da81e..65384dc1c5 100644
--- a/be/src/vec/exec/vpartition_sort_node.h
+++ b/be/src/vec/exec/vpartition_sort_node.h
@@ -26,6 +26,7 @@
 #include "exec/exec_node.h"
 #include "vec/columns/column.h"
 #include "vec/common/columns_hashing.h"
+#include "vec/common/hash_table/fixed_hash_map.h"
 #include "vec/common/hash_table/hash.h"
 #include "vec/common/hash_table/ph_hash_map.h"
 #include "vec/common/hash_table/string_hash_map.h"
@@ -83,8 +84,13 @@ public:
 using PartitionDataPtr = PartitionBlocks*;
 using PartitionDataWithStringKey = PHHashMap<StringRef, PartitionDataPtr, DefaultHash<StringRef>>;
 using PartitionDataWithShortStringKey = StringHashMap<PartitionDataPtr>;
+using PartitionDataWithUInt8Key =
+        FixedImplicitZeroHashMapWithCalculatedSize<UInt8, PartitionDataPtr>;
+using PartitionDataWithUInt16Key = FixedImplicitZeroHashMap<UInt16, PartitionDataPtr>;
 using PartitionDataWithUInt32Key = PHHashMap<UInt32, PartitionDataPtr, HashCRC32<UInt32>>;
-
+using PartitionDataWithUInt64Key = PHHashMap<UInt64, PartitionDataPtr, HashCRC32<UInt64>>;
+using PartitionDataWithUInt128Key = PHHashMap<UInt128, PartitionDataPtr, HashCRC32<UInt128>>;
+using PartitionDataWithUInt256Key = PHHashMap<UInt256, PartitionDataPtr, HashCRC32<UInt256>>;
 template <typename TData>
 struct PartitionMethodSerialized {
     using Data = TData;
@@ -249,11 +255,48 @@ struct PartitionMethodSingleNullableColumn : public SingleColumnMethod {
     using State = ColumnsHashing::HashMethodSingleLowNullableColumn<BaseState, Mapped, true>;
 };
 
+template <typename TData, bool has_nullable_keys_ = false>
+struct PartitionMethodKeysFixed {
+    using Data = TData;
+    using Key = typename Data::key_type;
+    using Mapped = typename Data::mapped_type;
+    using Iterator = typename Data::iterator;
+    static constexpr bool has_nullable_keys = has_nullable_keys_;
+
+    Data data;
+    Iterator iterator;
+    PartitionMethodKeysFixed() = default;
+
+    template <typename Other>
+    PartitionMethodKeysFixed(const Other& other) : data(other.data) {}
+
+    using State = ColumnsHashing::HashMethodKeysFixed<typename Data::value_type, Key, Mapped,
+                                                      has_nullable_keys, false>;
+};
+
 using PartitionedMethodVariants =
         std::variant<PartitionMethodSerialized<PartitionDataWithStringKey>,
+                     PartitionMethodOneNumber<UInt8, PartitionDataWithUInt8Key>,
+                     PartitionMethodOneNumber<UInt16, PartitionDataWithUInt16Key>,
                      PartitionMethodOneNumber<UInt32, PartitionDataWithUInt32Key>,
+                     PartitionMethodOneNumber<UInt64, PartitionDataWithUInt64Key>,
+                     PartitionMethodOneNumber<UInt128, PartitionDataWithUInt128Key>,
+                     PartitionMethodSingleNullableColumn<PartitionMethodOneNumber<
+                             UInt8, PartitionDataWithNullKey<PartitionDataWithUInt8Key>>>,
+                     PartitionMethodSingleNullableColumn<PartitionMethodOneNumber<
+                             UInt16, PartitionDataWithNullKey<PartitionDataWithUInt16Key>>>,
                      PartitionMethodSingleNullableColumn<PartitionMethodOneNumber<
                              UInt32, PartitionDataWithNullKey<PartitionDataWithUInt32Key>>>,
+                     PartitionMethodSingleNullableColumn<PartitionMethodOneNumber<
+                             UInt64, PartitionDataWithNullKey<PartitionDataWithUInt64Key>>>,
+                     PartitionMethodSingleNullableColumn<PartitionMethodOneNumber<
+                             UInt128, PartitionDataWithNullKey<PartitionDataWithUInt128Key>>>,
+                     PartitionMethodKeysFixed<PartitionDataWithUInt64Key, false>,
+                     PartitionMethodKeysFixed<PartitionDataWithUInt64Key, true>,
+                     PartitionMethodKeysFixed<PartitionDataWithUInt128Key, false>,
+                     PartitionMethodKeysFixed<PartitionDataWithUInt128Key, true>,
+                     PartitionMethodKeysFixed<PartitionDataWithUInt256Key, false>,
+                     PartitionMethodKeysFixed<PartitionDataWithUInt256Key, true>,
                      PartitionMethodStringNoCache<PartitionDataWithShortStringKey>,
                      PartitionMethodSingleNullableColumn<PartitionMethodStringNoCache<
                              PartitionDataWithNullKey<PartitionDataWithShortStringKey>>>>;
@@ -283,11 +326,34 @@ struct PartitionedHashMapVariants {
     void init(Type type, bool is_nullable = false) {
         _type = type;
         switch (_type) {
-        case Type::serialized:
+        case Type::serialized: {
             _partition_method_variant
                     .emplace<PartitionMethodSerialized<PartitionDataWithStringKey>>();
             break;
-        case Type::int32_key:
+        }
+        case Type::int8_key: {
+            if (is_nullable) {
+                _partition_method_variant
+                        .emplace<PartitionMethodSingleNullableColumn<PartitionMethodOneNumber<
+                                UInt8, PartitionDataWithNullKey<PartitionDataWithUInt8Key>>>>();
+            } else {
+                _partition_method_variant
+                        .emplace<PartitionMethodOneNumber<UInt8, PartitionDataWithUInt8Key>>();
+            }
+            break;
+        }
+        case Type::int16_key: {
+            if (is_nullable) {
+                _partition_method_variant
+                        .emplace<PartitionMethodSingleNullableColumn<PartitionMethodOneNumber<
+                                UInt16, PartitionDataWithNullKey<PartitionDataWithUInt16Key>>>>();
+            } else {
+                _partition_method_variant
+                        .emplace<PartitionMethodOneNumber<UInt16, PartitionDataWithUInt16Key>>();
+            }
+            break;
+        }
+        case Type::int32_key: {
             if (is_nullable) {
                 _partition_method_variant
                         .emplace<PartitionMethodSingleNullableColumn<PartitionMethodOneNumber<
@@ -297,7 +363,60 @@ struct PartitionedHashMapVariants {
                         .emplace<PartitionMethodOneNumber<UInt32, PartitionDataWithUInt32Key>>();
             }
             break;
-        case Type::string_key:
+        }
+        case Type::int64_key: {
+            if (is_nullable) {
+                _partition_method_variant
+                        .emplace<PartitionMethodSingleNullableColumn<PartitionMethodOneNumber<
+                                UInt64, PartitionDataWithNullKey<PartitionDataWithUInt64Key>>>>();
+            } else {
+                _partition_method_variant
+                        .emplace<PartitionMethodOneNumber<UInt64, PartitionDataWithUInt64Key>>();
+            }
+            break;
+        }
+        case Type::int128_key: {
+            if (is_nullable) {
+                _partition_method_variant
+                        .emplace<PartitionMethodSingleNullableColumn<PartitionMethodOneNumber<
+                                UInt128, PartitionDataWithNullKey<PartitionDataWithUInt128Key>>>>();
+            } else {
+                _partition_method_variant
+                        .emplace<PartitionMethodOneNumber<UInt128, PartitionDataWithUInt128Key>>();
+            }
+            break;
+        }
+        case Type::int64_keys: {
+            if (is_nullable) {
+                _partition_method_variant
+                        .emplace<PartitionMethodKeysFixed<PartitionDataWithUInt64Key, true>>();
+            } else {
+                _partition_method_variant
+                        .emplace<PartitionMethodKeysFixed<PartitionDataWithUInt64Key, false>>();
+            }
+            break;
+        }
+        case Type::int128_keys: {
+            if (is_nullable) {
+                _partition_method_variant
+                        .emplace<PartitionMethodKeysFixed<PartitionDataWithUInt128Key, true>>();
+            } else {
+                _partition_method_variant
+                        .emplace<PartitionMethodKeysFixed<PartitionDataWithUInt128Key, false>>();
+            }
+            break;
+        }
+        case Type::int256_keys: {
+            if (is_nullable) {
+                _partition_method_variant
+                        .emplace<PartitionMethodKeysFixed<PartitionDataWithUInt256Key, true>>();
+            } else {
+                _partition_method_variant
+                        .emplace<PartitionMethodKeysFixed<PartitionDataWithUInt256Key, false>>();
+            }
+            break;
+        }
+        case Type::string_key: {
             if (is_nullable) {
                 _partition_method_variant
                         .emplace<PartitionMethodSingleNullableColumn<PartitionMethodStringNoCache<
@@ -307,8 +426,9 @@ struct PartitionedHashMapVariants {
                         .emplace<PartitionMethodStringNoCache<PartitionDataWithShortStringKey>>();
             }
             break;
+        }
         default:
-            DCHECK(false) << "Do not have a rigth partition by data type";
+            DCHECK(false) << "Do not have a rigth partition by data type: ";
         }
     }
 };


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 01/03: [fix](planner)shouldn't force push down conjuncts for union statement (#22079)

Posted by kx...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit edf236f574920b701de57480c6f04376fa5c3792
Author: starocean999 <40...@users.noreply.github.com>
AuthorDate: Fri Jul 21 21:12:56 2023 +0800

    [fix](planner)shouldn't force push down conjuncts for union statement (#22079)
    
    * [fix](planner)shouldn't force push down conjuncts for union statement
---
 .../java/org/apache/doris/planner/SingleNodePlanner.java     | 12 ------------
 .../correctness_p0/test_push_conjuncts_inlineview.groovy     |  2 +-
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
index 488b09b6cc..e7f130ee36 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@@ -1823,18 +1823,6 @@ public class SingleNodePlanner {
             e.setIsOnClauseConjunct(false);
         }
         inlineViewRef.getAnalyzer().registerConjuncts(viewPredicates, inlineViewRef.getAllTupleIds());
-        QueryStmt queryStmt = inlineViewRef.getQueryStmt();
-        if (queryStmt instanceof SetOperationStmt) {
-            // registerConjuncts for every set operand
-            SetOperationStmt setOperationStmt = (SetOperationStmt) queryStmt;
-            for (SetOperationStmt.SetOperand setOperand : setOperationStmt.getOperands()) {
-                setOperand.getAnalyzer().registerConjuncts(
-                        Expr.substituteList(viewPredicates, setOperand.getSmap(),
-                                setOperand.getAnalyzer(), false),
-                        inlineViewRef.getAllTupleIds());
-            }
-        }
-
         // mark (fully resolve) slots referenced by remaining unassigned conjuncts as
         // materialized
         List<Expr> substUnassigned = Expr.substituteList(unassignedConjuncts,
diff --git a/regression-test/suites/correctness_p0/test_push_conjuncts_inlineview.groovy b/regression-test/suites/correctness_p0/test_push_conjuncts_inlineview.groovy
index 462dfed26d..7cadefc83a 100644
--- a/regression-test/suites/correctness_p0/test_push_conjuncts_inlineview.groovy
+++ b/regression-test/suites/correctness_p0/test_push_conjuncts_inlineview.groovy
@@ -57,7 +57,7 @@ suite("test_push_conjuncts_inlineview") {
                     )a
                 where
                     a.px = 1;""")
-        contains "4:VSELECT"
+        contains "5:VSELECT"
     }
 
 explain {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[doris] 02/03: [fix](nereids)PredicatePropagation only support integer types for now (#22096)

Posted by kx...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 1f1cc3b04d13c01d92edb53178a53dab4b19057e
Author: starocean999 <40...@users.noreply.github.com>
AuthorDate: Fri Jul 21 23:40:08 2023 +0800

    [fix](nereids)PredicatePropagation only support integer types for now (#22096)
---
 .../rules/rewrite/PredicatePropagation.java        | 22 ++++++++++------------
 .../infer_predicate/infer_predicate.groovy         |  7 +++++++
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java
index 9602bb4a56..cc45952817 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java
@@ -83,14 +83,14 @@ public class PredicatePropagation {
                 return super.visit(cp, context);
             }
 
-            private boolean isOriginDataTypeBigger(DataType originDataType, Expression expr) {
+            private boolean isDataTypeValid(DataType originDataType, Expression expr) {
                 if ((leftSlotEqualToRightSlot.child(0).getDataType() instanceof IntegralType)
                         && (leftSlotEqualToRightSlot.child(1).getDataType() instanceof IntegralType)
                                 && (originDataType instanceof IntegralType)) {
                     // infer filter can not be lower than original datatype, or dataset would be wrong
-                    if (((IntegralType) originDataType).widerThan(
+                    if (!((IntegralType) originDataType).widerThan(
                             (IntegralType) leftSlotEqualToRightSlot.child(0).getDataType())
-                                    || ((IntegralType) originDataType).widerThan(
+                                    && !((IntegralType) originDataType).widerThan(
                                             (IntegralType) leftSlotEqualToRightSlot.child(1).getDataType())) {
                         return true;
                     }
@@ -100,16 +100,14 @@ public class PredicatePropagation {
 
             private Expression replaceSlot(Expression expr, DataType originDataType) {
                 return expr.rewriteUp(e -> {
-                    if (isOriginDataTypeBigger(originDataType, leftSlotEqualToRightSlot)) {
-                        return e;
-                    }
-                    if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(0))) {
-                        return leftSlotEqualToRightSlot.child(1);
-                    } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(1))) {
-                        return leftSlotEqualToRightSlot.child(0);
-                    } else {
-                        return e;
+                    if (isDataTypeValid(originDataType, leftSlotEqualToRightSlot)) {
+                        if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(0))) {
+                            return leftSlotEqualToRightSlot.child(1);
+                        } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(1))) {
+                            return leftSlotEqualToRightSlot.child(0);
+                        }
                     }
+                    return e;
                 });
             }
         }, null);
diff --git a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy
index ac46201185..f93de7273b 100644
--- a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy
+++ b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy
@@ -26,6 +26,8 @@ suite("test_infer_predicate") {
 
     sql '''create table infer_tb2 (k1 tinyint, k2 smallint, k3 int, k4 bigint, k5 largeint, k6 date, k7 datetime, k8 float, k9 double) distributed by hash(k1) buckets 3 properties('replication_num' = '1');'''
 
+    sql '''create table infer_tb3 (k1 varchar(100), k2 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');'''
+
     explain {
         sql "select * from infer_tb1 inner join infer_tb2 where infer_tb2.k1 = infer_tb1.k2  and infer_tb2.k1 = 1;"
         contains "PREDICATES: k2[#20] = 1"
@@ -40,4 +42,9 @@ suite("test_infer_predicate") {
         sql "select * from infer_tb1 inner join infer_tb2 where cast(infer_tb2.k4 as int) = infer_tb1.k2  and infer_tb2.k4 = 1;"
         notContains "PREDICATES: k2[#20] = 1"
     }
+
+    explain {
+        sql "select * from infer_tb1 inner join infer_tb3 where infer_tb3.k1 = infer_tb1.k2  and infer_tb3.k1 = '123';"
+        notContains "PREDICATES: k2[#6] = '123'"
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org