You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/02/24 13:47:25 UTC

[GitHub] [arrow] lidavidm commented on a change in pull request #12080: ARROW-15118: [C++] Avoid bitmap buffer if all inputs are all valid for Scalar Kernels

lidavidm commented on a change in pull request #12080:
URL: https://github.com/apache/arrow/pull/12080#discussion_r813887878



##########
File path: cpp/src/arrow/compute/exec.cc
##########
@@ -772,14 +769,27 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
     return Status::OK();
   }
 
-  Status SetupPreallocation(int64_t total_length) {
+  Status SetupPreallocation(int64_t total_length, const std::vector<Datum>& args) {
     output_num_buffers_ = static_cast<int>(output_descr_.type->layout().buffers.size());
-
-    // Decide if we need to preallocate memory for this kernel
-    validity_preallocated_ =
-        (kernel_->null_handling != NullHandling::COMPUTED_NO_PREALLOCATE &&
-         kernel_->null_handling != NullHandling::OUTPUT_NOT_NULL &&
-         output_descr_.type->id() != Type::NA);
+    auto out_type_id = output_descr_.type->id();
+    // Default to no validity pre-allocation for following cases:
+    // - Output Array is NullArray
+    // - kernel_->null_handling is COMPUTED_NO_PREALLOCATE or OUTPUT_NOT_NULL
+    validity_preallocated_ = false;
+    if (out_type_id != Type::NA) {
+      if (kernel_->null_handling == NullHandling::COMPUTED_PREALLOCATE) {
+        // Override the flag if kernel asks for pre-allocation
+        validity_preallocated_ = true;
+      } else if (kernel_->null_handling == NullHandling::INTERSECTION) {
+        bool are_all_inputs_valid = true;
+        for (auto& arg : args) {
+          auto null_gen = NullGeneralization::Get(arg) == NullGeneralization::ALL_VALID;
+          are_all_inputs_valid = are_all_inputs_valid && null_gen;
+        }
+        validity_preallocated_ =
+            !(are_all_inputs_valid || kernel_->can_write_into_slices);

Review comment:
       Why does `can_write_into_slices` disable preallocation?

##########
File path: cpp/src/arrow/compute/exec.cc
##########
@@ -792,7 +802,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
     preallocate_contiguous_ =
         (exec_context()->preallocate_contiguous() && kernel_->can_write_into_slices &&
          validity_preallocated_ && !is_nested(output_descr_.type->id()) &&
-         !is_dictionary(output_descr_.type->id()) &&
+         !is_dictionary(out_type_id) &&

Review comment:
       nit: if we're going to update this here, also update the usage of `output_descr_.type->id()` on the previous line?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org