You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/06/18 21:14:49 UTC

[GitHub] [arrow] nirandaperera commented on a change in pull request #10487: ARROW-13010: [C++][Compute] Support outputting to slices from kleene kernels

nirandaperera commented on a change in pull request #10487:
URL: https://github.com/apache/arrow/pull/10487#discussion_r654451587



##########
File path: cpp/src/arrow/util/bit_util_test.cc
##########
@@ -2156,5 +2187,169 @@ TEST(Bitmap, VisitWordsAnd) {
   }
 }
 
+void random_bool_vector(std::vector<bool>& vec, int64_t size, double p = 0.5) {
+  vec.reserve(size);
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::bernoulli_distribution d(p);
+
+  for (int n = 0; n < size; ++n) {
+    vec.push_back(d(gen));
+  }
+}
+
+std::string VectorToString(const std::vector<bool>& v) {
+  std::string out(v.size() + +((v.size() - 1) / 8), ' ');
+  for (size_t i = 0; i < v.size(); ++i) {
+    out[i + (i / 8)] = v[i] ? '1' : '0';
+  }
+  return out;
+}
+
+void VerifyBoolVectorAndBitmap(const Bitmap& bitmap, const std::vector<bool>& expected) {
+  arrow::BooleanBuilder boolean_builder;
+  ASSERT_OK(boolean_builder.AppendValues(expected));
+  ASSERT_OK_AND_ASSIGN(auto arr, boolean_builder.Finish());
+
+  ASSERT_TRUE(BitmapEquals(bitmap.buffer()->data(), bitmap.offset(),
+                           arr->data()->buffers[1]->data(), 0, expected.size()))
+      << "exp: " << VectorToString(expected) << "\ngot: " << bitmap.ToString();
+}
+
+class TestBitmapVisitAndWriteOutputNoOffset : public ::testing::TestWithParam<int32_t> {};
+
+TEST_P(TestBitmapVisitAndWriteOutputNoOffset, Test1) {
+  auto part = GetParam();
+  int64_t bits = 4 * part;
+  std::vector<bool> data;
+  random_bool_vector(data, bits);

Review comment:
       I dont know why I didn't do that previously! :joy: 

##########
File path: cpp/src/arrow/compute/kernels/scalar_if_else.cc
##########
@@ -72,18 +72,6 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum&
   Bitmap cond_valid{cond.buffers[0], cond.offset, cond.length};
   Bitmap left_valid = GetBitmap(left_d, 0);
   Bitmap right_valid = GetBitmap(right_d, 0);
-  // sometimes Bitmaps will be ignored, in which case we replace access to them with
-  // duplicated (probably elided) access to cond_data
-  const Bitmap& _ = cond_data;
-
-  // lambda function that will be used inside the visitor
-  uint64_t* out_validity = nullptr;
-  int64_t i = 0;
-  auto apply = [&](uint64_t c_valid, uint64_t c_data, uint64_t l_valid,
-                   uint64_t r_valid) {
-    out_validity[i] = c_valid & ((c_data & l_valid) | (~c_data & r_valid));
-    i++;
-  };
 
   // cond.valid & (cond.data & left.valid | ~cond.data & right.valid)
   // In the following cases, we dont need to allocate out_valid bitmap

Review comment:
       One thing to note here is, `if-else` kernels have the following, 
   ```c++
       kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
       kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
   ```
   So, I allocating buffers are entirely handled by the kernel. So, I believe writing into slices should work without a problem IMO. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org