You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/05/05 08:37:08 UTC

[GitHub] [incubator-doris] Gabriel39 opened a new pull request, #9376: [BUG][Vectorized] fix `replace_if_not_null` in vectorized compaction

Gabriel39 opened a new pull request, #9376:
URL: https://github.com/apache/incubator-doris/pull/9376

   # Proposed changes
   
   Issue Number: close #9374 
   
   ## Problem Summary:
   
   Describe the overview of changes.
   
   ## Checklist(Required)
   
   1. Does it affect the original behavior: (Yes/No/I Don't know)
   2. Has unit tests been added: (Yes/No/No Need)
   3. Has document been added or modified: (Yes/No/No Need)
   4. Does it need to update dependencies: (Yes/No)
   5. Are there any changes that cannot be rolled back: (Yes/No)
   
   ## Further comments
   
   If this is a relatively large or complex change, kick off the discussion at [dev@doris.apache.org](mailto:dev@doris.apache.org) by explaining why you chose the solution you did and what alternatives you considered, etc...
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[GitHub] [incubator-doris] HappenLee commented on a diff in pull request #9376: [BUG][Vectorized] fix `replace_if_not_null` in vectorized compaction

Posted by GitBox <gi...@apache.org>.
HappenLee commented on code in PR #9376:
URL: https://github.com/apache/incubator-doris/pull/9376#discussion_r866474902


##########
be/src/vec/aggregate_functions/aggregate_function_window.h:
##########
@@ -408,61 +489,78 @@ class WindowFunctionData final
 };
 
 template <template <typename> class AggregateFunctionTemplate, template <typename> class Data,
-          bool is_nullable, bool is_copy = false>
+          bool is_nullable, bool is_copy = false, bool init_with_null = false>
 static IAggregateFunction* create_function_single_value(const String& name,
                                                         const DataTypes& argument_types,
                                                         const Array& parameters) {
     using StoreType = std::conditional_t<is_copy, CopiedValue, Value>;
 
     assert_arity_at_most<3>(name, argument_types);
 
-    auto type = argument_types[0].get();
-    if (type->is_nullable()) {
-        type = assert_cast<const DataTypeNullable*>(type)->get_nested_type().get();
-    }
+    auto type = remove_nullable(argument_types[0]);
     WhichDataType which(*type);
 
-#define DISPATCH(TYPE)                        \
-    if (which.idx == TypeIndex::TYPE)         \
-        return new AggregateFunctionTemplate< \
-                Data<LeadAndLagData<TYPE, is_nullable, false, StoreType>>>(argument_types);
+#define DISPATCH(TYPE)                                                                      \
+    if (which.idx == TypeIndex::TYPE)                                                       \
+        return new AggregateFunctionTemplate<                                               \
+                Data<LeadAndLagData<TYPE, is_nullable, false, StoreType, init_with_null>>>( \
+                argument_types);
     FOR_NUMERIC_TYPES(DISPATCH)
 #undef DISPATCH
 
     if (which.is_decimal()) {
         return new AggregateFunctionTemplate<
-                Data<LeadAndLagData<Int128, is_nullable, false, StoreType>>>(argument_types);
+                Data<LeadAndLagData<Int128, is_nullable, false, StoreType, init_with_null>>>(
+                argument_types);
     }
     if (which.is_date_or_datetime()) {
         return new AggregateFunctionTemplate<
-                Data<LeadAndLagData<Int64, is_nullable, false, StoreType>>>(argument_types);
+                Data<LeadAndLagData<Int64, is_nullable, false, StoreType, init_with_null>>>(
+                argument_types);
     }
     if (which.is_string_or_fixed_string()) {
         return new AggregateFunctionTemplate<
-                Data<LeadAndLagData<StringRef, is_nullable, true, StoreType>>>(argument_types);
+                Data<LeadAndLagData<StringRef, is_nullable, true, StoreType, init_with_null>>>(
+                argument_types);
     }
     DCHECK(false) << "with unknowed type, failed in  create_aggregate_function_leadlag";
     return nullptr;
 }
 
-template <bool is_nullable, bool is_copy>
+template <bool is_nullable, bool is_copy, bool replace_if_not_null = false>
 AggregateFunctionPtr create_aggregate_function_first(const std::string& name,
                                                      const DataTypes& argument_types,
                                                      const Array& parameters,
                                                      bool result_is_nullable) {
-    return AggregateFunctionPtr(
-            create_function_single_value<WindowFunctionData, WindowFunctionFirstData, is_nullable,
-                                         is_copy>(name, argument_types, parameters));
+    if constexpr (replace_if_not_null) {
+        return AggregateFunctionPtr(
+                create_function_single_value<WindowFunctionData, WindowFunctionFirstNonNullData,
+                                             is_nullable, is_copy, true>(name, argument_types,
+                                                                         parameters));
+    } else {
+        return AggregateFunctionPtr(
+                create_function_single_value<WindowFunctionData, WindowFunctionFirstData,
+                                             is_nullable, is_copy>(name, argument_types,
+                                                                   parameters));
+    }
 }
 
-template <bool is_nullable, bool is_copy>
+template <bool is_nullable, bool is_copy, bool replace_if_not_null = false>
 AggregateFunctionPtr create_aggregate_function_last(const std::string& name,

Review Comment:
   maybe should split the register `last_value` and `last_not_null_value`



##########
be/src/vec/aggregate_functions/aggregate_function_window.h:
##########
@@ -192,6 +194,9 @@ struct LeadAndLagData {
         _default_value.reset();
         _is_init = false;
         _has_value = false;
+        if constexpr (init_with_null) {

Review Comment:
   the logic only use in child class, should not do in parent.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[GitHub] [incubator-doris] github-actions[bot] commented on pull request #9376: [BUG][Vectorized] fix `replace_if_not_null` in vectorized compaction

Posted by GitBox <gi...@apache.org>.
github-actions[bot] commented on PR #9376:
URL: https://github.com/apache/incubator-doris/pull/9376#issuecomment-1119318618

   PR approved by anyone and no changes requested.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[GitHub] [incubator-doris] github-actions[bot] commented on pull request #9376: [BUG][Vectorized] fix `replace_if_not_null` in vectorized compaction

Posted by GitBox <gi...@apache.org>.
github-actions[bot] commented on PR #9376:
URL: https://github.com/apache/incubator-doris/pull/9376#issuecomment-1119318590

   PR approved by at least one committer and no changes requested.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[GitHub] [incubator-doris] yiguolei merged pull request #9376: [BUG][Vectorized] fix `replace_if_not_null` in vectorized compaction

Posted by GitBox <gi...@apache.org>.
yiguolei merged PR #9376:
URL: https://github.com/apache/incubator-doris/pull/9376


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[GitHub] [incubator-doris] BiteTheDDDDt commented on a diff in pull request #9376: [BUG][Vectorized] fix `replace_if_not_null` in vectorized compaction

Posted by GitBox <gi...@apache.org>.
BiteTheDDDDt commented on code in PR #9376:
URL: https://github.com/apache/incubator-doris/pull/9376#discussion_r865707004


##########
be/src/vec/aggregate_functions/aggregate_function_window.h:
##########
@@ -348,6 +353,44 @@ struct WindowFunctionFirstData : Data {
     static const char* name() { return "first_value"; }
 };
 
+template <typename Data>
+struct WindowFunctionFirstNonNullData : Data {
+    void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
+                                int64_t frame_end, const IColumn** columns) {
+        if (this->has_set_value()) {
+            return;
+        }
+        if (frame_start < frame_end &&
+            frame_end <= partition_start) { //rewrite last_value when under partition
+            this->set_is_null();            //so no need more judge
+            return;
+        }
+        frame_start = std::max<int64_t>(frame_start, partition_start);
+        frame_end = std::min<int64_t>(frame_end, partition_end);
+        for (int i = frame_start; i < frame_end; i++) {
+            if (const auto* nullable_column = check_and_get_column<ColumnNullable>(columns[0])) {

Review Comment:
   Why not move check_and_get_column outside the loop to improve performance?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[GitHub] [incubator-doris] HappenLee commented on a diff in pull request #9376: [BUG][Vectorized] fix `replace_if_not_null` in vectorized compaction

Posted by GitBox <gi...@apache.org>.
HappenLee commented on code in PR #9376:
URL: https://github.com/apache/incubator-doris/pull/9376#discussion_r866507672


##########
be/src/vec/aggregate_functions/aggregate_function_window.h:
##########
@@ -348,6 +355,48 @@ struct WindowFunctionFirstData : Data {
     static const char* name() { return "first_value"; }
 };
 
+template <typename Data>
+struct WindowFunctionFirstNonNullData : Data {
+    void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
+                                int64_t frame_end, const IColumn** columns) {
+        if (this->has_set_value()) {
+            return;
+        }
+        if (frame_start < frame_end &&
+            frame_end <= partition_start) { //rewrite last_value when under partition
+            this->set_is_null();            //so no need more judge
+            return;
+        }
+        this->set_null_if_need();
+        frame_start = std::max<int64_t>(frame_start, partition_start);
+        frame_end = std::min<int64_t>(frame_end, partition_end);
+        if (const auto* nullable_column = check_and_get_column<ColumnNullable>(columns[0])) {

Review Comment:
   use if constexpr to speed up the function



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[GitHub] [incubator-doris] Gabriel39 commented on a diff in pull request #9376: [BUG][Vectorized] fix `replace_if_not_null` in vectorized compaction

Posted by GitBox <gi...@apache.org>.
Gabriel39 commented on code in PR #9376:
URL: https://github.com/apache/incubator-doris/pull/9376#discussion_r865723943


##########
be/src/vec/aggregate_functions/aggregate_function_window.h:
##########
@@ -348,6 +353,44 @@ struct WindowFunctionFirstData : Data {
     static const char* name() { return "first_value"; }
 };
 
+template <typename Data>
+struct WindowFunctionFirstNonNullData : Data {
+    void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
+                                int64_t frame_end, const IColumn** columns) {
+        if (this->has_set_value()) {
+            return;
+        }
+        if (frame_start < frame_end &&
+            frame_end <= partition_start) { //rewrite last_value when under partition
+            this->set_is_null();            //so no need more judge
+            return;
+        }
+        frame_start = std::max<int64_t>(frame_start, partition_start);
+        frame_end = std::min<int64_t>(frame_end, partition_end);
+        for (int i = frame_start; i < frame_end; i++) {
+            if (const auto* nullable_column = check_and_get_column<ColumnNullable>(columns[0])) {

Review Comment:
   Done, thanks for remind



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org


[GitHub] [incubator-doris] Gabriel39 commented on a diff in pull request #9376: [BUG][Vectorized] fix `replace_if_not_null` in vectorized compaction

Posted by GitBox <gi...@apache.org>.
Gabriel39 commented on code in PR #9376:
URL: https://github.com/apache/incubator-doris/pull/9376#discussion_r866489799


##########
be/src/vec/aggregate_functions/aggregate_function_window.h:
##########
@@ -408,61 +489,78 @@ class WindowFunctionData final
 };
 
 template <template <typename> class AggregateFunctionTemplate, template <typename> class Data,
-          bool is_nullable, bool is_copy = false>
+          bool is_nullable, bool is_copy = false, bool init_with_null = false>
 static IAggregateFunction* create_function_single_value(const String& name,
                                                         const DataTypes& argument_types,
                                                         const Array& parameters) {
     using StoreType = std::conditional_t<is_copy, CopiedValue, Value>;
 
     assert_arity_at_most<3>(name, argument_types);
 
-    auto type = argument_types[0].get();
-    if (type->is_nullable()) {
-        type = assert_cast<const DataTypeNullable*>(type)->get_nested_type().get();
-    }
+    auto type = remove_nullable(argument_types[0]);
     WhichDataType which(*type);
 
-#define DISPATCH(TYPE)                        \
-    if (which.idx == TypeIndex::TYPE)         \
-        return new AggregateFunctionTemplate< \
-                Data<LeadAndLagData<TYPE, is_nullable, false, StoreType>>>(argument_types);
+#define DISPATCH(TYPE)                                                                      \
+    if (which.idx == TypeIndex::TYPE)                                                       \
+        return new AggregateFunctionTemplate<                                               \
+                Data<LeadAndLagData<TYPE, is_nullable, false, StoreType, init_with_null>>>( \
+                argument_types);
     FOR_NUMERIC_TYPES(DISPATCH)
 #undef DISPATCH
 
     if (which.is_decimal()) {
         return new AggregateFunctionTemplate<
-                Data<LeadAndLagData<Int128, is_nullable, false, StoreType>>>(argument_types);
+                Data<LeadAndLagData<Int128, is_nullable, false, StoreType, init_with_null>>>(
+                argument_types);
     }
     if (which.is_date_or_datetime()) {
         return new AggregateFunctionTemplate<
-                Data<LeadAndLagData<Int64, is_nullable, false, StoreType>>>(argument_types);
+                Data<LeadAndLagData<Int64, is_nullable, false, StoreType, init_with_null>>>(
+                argument_types);
     }
     if (which.is_string_or_fixed_string()) {
         return new AggregateFunctionTemplate<
-                Data<LeadAndLagData<StringRef, is_nullable, true, StoreType>>>(argument_types);
+                Data<LeadAndLagData<StringRef, is_nullable, true, StoreType, init_with_null>>>(
+                argument_types);
     }
     DCHECK(false) << "with unknowed type, failed in  create_aggregate_function_leadlag";
     return nullptr;
 }
 
-template <bool is_nullable, bool is_copy>
+template <bool is_nullable, bool is_copy, bool replace_if_not_null = false>
 AggregateFunctionPtr create_aggregate_function_first(const std::string& name,
                                                      const DataTypes& argument_types,
                                                      const Array& parameters,
                                                      bool result_is_nullable) {
-    return AggregateFunctionPtr(
-            create_function_single_value<WindowFunctionData, WindowFunctionFirstData, is_nullable,
-                                         is_copy>(name, argument_types, parameters));
+    if constexpr (replace_if_not_null) {
+        return AggregateFunctionPtr(
+                create_function_single_value<WindowFunctionData, WindowFunctionFirstNonNullData,
+                                             is_nullable, is_copy, true>(name, argument_types,
+                                                                         parameters));
+    } else {
+        return AggregateFunctionPtr(
+                create_function_single_value<WindowFunctionData, WindowFunctionFirstData,
+                                             is_nullable, is_copy>(name, argument_types,
+                                                                   parameters));
+    }
 }
 
-template <bool is_nullable, bool is_copy>
+template <bool is_nullable, bool is_copy, bool replace_if_not_null = false>
 AggregateFunctionPtr create_aggregate_function_last(const std::string& name,

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org