You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/07/02 13:07:20 UTC

[GitHub] [doris] morningman commented on a diff in pull request #10555: [feature] support `max_by` and `min_by` on row-based engine

morningman commented on code in PR #10555:
URL: https://github.com/apache/doris/pull/10555#discussion_r912361287


##########
be/src/exprs/aggregate_functions.cpp:
##########
@@ -363,6 +363,320 @@ struct DecimalV2AvgState {
     int64_t count = 0;
 };
 
+template <typename T, typename KT>
+struct MaxMinByState {
+    T val1;
+    KT val2;
+    bool flag = false;
+};
+
+template <typename T, typename KT>
+struct MaxMinByStateWithString {
+    T val1;
+    KT val2;
+    bool flag = false;
+
+    static const int STRING_LENGTH_RECORD_LENGTH = 4;
+    StringVal serialize(FunctionContext* ctx) {
+        // calculate total serialize buffer length
+        int total_serialized_set_length = 1;
+        if constexpr (std::is_same_v<StringVal, T>) {
+            total_serialized_set_length += STRING_LENGTH_RECORD_LENGTH + ((StringVal)val1).len;
+        } else {
+            total_serialized_set_length += STRING_LENGTH_RECORD_LENGTH + sizeof(T);
+        }
+
+        if constexpr (std::is_same_v<StringVal, KT>) {
+            total_serialized_set_length += STRING_LENGTH_RECORD_LENGTH + ((StringVal)val2).len;
+        } else {
+            total_serialized_set_length += STRING_LENGTH_RECORD_LENGTH + sizeof(KT);
+        }
+
+        StringVal result(ctx, total_serialized_set_length);
+        uint8_t* writer = result.ptr;
+        // type
+        *writer = flag;
+        writer++;
+
+        if constexpr (std::is_same_v<StringVal, T>) {
+            *(int*)writer = ((StringVal)val1).len;
+            writer += STRING_LENGTH_RECORD_LENGTH;
+            memcpy(writer, ((StringVal)val1).ptr, ((StringVal)val1).len);
+            writer += ((StringVal)val1).len;
+        } else {
+            *(int*)writer = sizeof(T);
+            writer += STRING_LENGTH_RECORD_LENGTH;
+            *(T*)writer = val1;
+            writer += sizeof(T);
+        }
+
+        if constexpr (std::is_same_v<StringVal, KT>) {
+            *(int*)writer = ((StringVal)val2).len;
+            writer += STRING_LENGTH_RECORD_LENGTH;
+            memcpy(writer, ((StringVal)val2).ptr, ((StringVal)val2).len);
+        } else {
+            *(int*)writer = sizeof(KT);
+            writer += STRING_LENGTH_RECORD_LENGTH;
+            *(KT*)writer = val2;
+        }
+        return result;
+    }
+
+    void deserialize(const StringVal& src) {
+        uint8_t* reader = src.ptr;
+        // skip type ,no used now
+        flag = (bool)*reader;
+        reader++;
+        const uint8_t* end = src.ptr + src.len;
+
+        const int val1_length = *(int*)reader;
+        reader += STRING_LENGTH_RECORD_LENGTH;
+
+        if constexpr (std::is_same_v<StringVal, T>) {
+            StringVal value((uint8_t*)reader, val1_length);
+            val1 = value;
+        } else {
+            val1 = *(T*)reader;
+        }
+        reader += val1_length;
+
+        const int val2_length = *(int*)reader;
+        reader += STRING_LENGTH_RECORD_LENGTH;
+        if constexpr (std::is_same_v<StringVal, KT>) {
+            StringVal value((uint8_t*)reader, val2_length);
+            val2 = value;
+        } else {
+            val2 = *(KT*)reader;
+        }
+        reader += val2_length;
+        DCHECK(reader == end);
+    }
+};
+
+template <typename T, typename KT>
+void AggregateFunctions::maxminby_init(FunctionContext* ctx, StringVal* dst) {
+    dst->is_null = false;
+    int len;
+    if constexpr (std::is_same_v<T, StringVal> || std::is_same_v<KT, StringVal>) {
+        len = sizeof(MaxMinByStateWithString<T, KT>);
+        dst->ptr = (uint8_t*)new MaxMinByStateWithString<T, KT>;
+    } else {
+        len = sizeof(MaxMinByState<T, KT>);
+        dst->ptr = (uint8_t*)new MaxMinByState<T, KT>;
+    }
+    dst->len = len;
+}
+
+template <typename T, bool max_by_fn>
+constexpr bool maxminby_compare(T x, T y) {
+    if constexpr (max_by_fn) {
+        if constexpr (std::is_same_v<T, StringVal>) {
+            return x.to_string() > y.to_string();
+        } else if constexpr (std::is_same_v<T, DateTimeVal>) {
+            return x.packed_time > y.packed_time;
+        } else {
+            return x.val > y.val;
+        }
+    } else {
+        if constexpr (std::is_same_v<T, StringVal>) {
+            return x.to_string() < y.to_string();
+        } else if constexpr (std::is_same_v<T, DateTimeVal>) {
+            return x.packed_time < y.packed_time;
+        } else {
+            return x.val < y.val;
+        }
+    }
+}
+
+template <typename T, typename KT, bool max_by_fn>
+void AggregateFunctions::maxminby_update(FunctionContext* ctx, const T& slot1, const KT& slot2,
+                                         StringVal* dst) {
+    if (slot1.is_null) {
+        return;
+    }
+    DCHECK(dst->ptr != nullptr);
+    if constexpr (std::is_same_v<T, StringVal> || std::is_same_v<KT, StringVal>) {
+        DCHECK_EQ(sizeof(MaxMinByStateWithString<T, KT>), dst->len);
+        auto max_by = reinterpret_cast<MaxMinByStateWithString<T, KT>*>(dst->ptr);
+
+        bool condition = false;
+        if constexpr (std::is_same_v<StringVal, KT>) {
+            condition = !max_by->flag || maxminby_compare<KT, max_by_fn>(slot2, max_by->val2);

Review Comment:
   I can't see the difference among these `if else`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org