You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/06/01 04:54:00 UTC

[GitHub] [arrow] westonpace commented on a diff in pull request #13179: ARROW-16599 [C++] Implementation of ExecuteScalarExpressionOverhead benchmarks without arrow for comparision

westonpace commented on code in PR #13179:
URL: https://github.com/apache/arrow/pull/13179#discussion_r886316520


##########
cpp/src/arrow/compute/exec/expression_benchmark.cc:
##########
@@ -152,20 +176,96 @@ BENCHMARK_CAPTURE(BindAndEvaluate, nested_array,
 BENCHMARK_CAPTURE(BindAndEvaluate, nested_scalar,
                   field_ref(FieldRef("struct_scalar", "float")));
 
+/// \brief Baseline benchmark for complex_expression implemented without arrow
+struct ComplexExpressionBaseline {
+ public:
+  ComplexExpressionBaseline(size_t input_size) {
+    /* hack - cuts off a few elemets if the input size is not a multiple of 64 for
+     * simplicity. We can't use std::vector<bool> here since it slows down things
+     * massively */
+    less_20.resize(input_size / 64);
+    greater_0.resize(input_size / 64);
+    output.resize(input_size / 64);
+  }
+  void Exec(const std::vector<int64_t>& input) {
+    size_t input_size = input.size();
+
+    for (size_t index = 0; index < input_size / 64; index++) {
+      size_t value = 0;
+      for (size_t bit = 0; bit < 64; bit++) {
+        value |= input[index * 64 + bit] > 0;
+      }

Review Comment:
   ```suggestion
           value |= input[index * 64 + bit] > 0;
           value <<= 1;
         }
   ```
   
   I think you need to shift this or else it will just overwrite (and I think the compiler might be simplifying to skip every 63 of every 64 iterations).



##########
cpp/src/arrow/compute/exec/expression_benchmark.cc:
##########
@@ -152,20 +176,96 @@ BENCHMARK_CAPTURE(BindAndEvaluate, nested_array,
 BENCHMARK_CAPTURE(BindAndEvaluate, nested_scalar,
                   field_ref(FieldRef("struct_scalar", "float")));
 
+/// \brief Baseline benchmark for complex_expression implemented without arrow
+struct ComplexExpressionBaseline {
+ public:
+  ComplexExpressionBaseline(size_t input_size) {
+    /* hack - cuts off a few elemets if the input size is not a multiple of 64 for
+     * simplicity. We can't use std::vector<bool> here since it slows down things
+     * massively */
+    less_20.resize(input_size / 64);
+    greater_0.resize(input_size / 64);
+    output.resize(input_size / 64);
+  }
+  void Exec(const std::vector<int64_t>& input) {
+    size_t input_size = input.size();
+
+    for (size_t index = 0; index < input_size / 64; index++) {
+      size_t value = 0;
+      for (size_t bit = 0; bit < 64; bit++) {
+        value |= input[index * 64 + bit] > 0;
+      }
+      greater_0[index] = value;
+    }
+    for (size_t index = 0; index < input_size / 64; index++) {
+      size_t value = 0;
+      for (size_t bit = 0; bit < 64; bit++) {
+        value |= input[index * 64 + bit] < 20;
+      }

Review Comment:
   ```suggestion
           value |= input[index * 64 + bit] < 20;
           value <<= 1;
         }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org