You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2022/07/19 12:05:26 UTC
[arrow] branch master updated: ARROW-17096: [C++][Compute] Fix mode kernel error on boolean array (#13646)
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 3a5284fc29 ARROW-17096: [C++][Compute] Fix mode kernel error on boolean array (#13646)
3a5284fc29 is described below
commit 3a5284fc29a2b6906ecdb0d89a581051645d7549
Author: Yibo Cai <yi...@arm.com>
AuthorDate: Tue Jul 19 20:05:21 2022 +0800
ARROW-17096: [C++][Compute] Fix mode kernel error on boolean array (#13646)
Authored-by: Yibo Cai <yi...@arm.com>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
cpp/src/arrow/compute/kernels/aggregate_mode.cc | 25 +++++++++++++++----------
cpp/src/arrow/compute/kernels/aggregate_test.cc | 19 +++++++++++++++++++
python/pyarrow/tests/test_compute.py | 6 ++++++
3 files changed, 40 insertions(+), 10 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/aggregate_mode.cc b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
index c67ca31422..263eced9e3 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_mode.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
@@ -26,6 +26,7 @@
#include "arrow/result.h"
#include "arrow/stl_allocator.h"
#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
namespace arrow {
namespace compute {
@@ -58,7 +59,8 @@ Result<std::pair<CType*, int64_t*>> PrepareOutput(int64_t n, KernelContext* ctx,
int64_t* count_buffer = nullptr;
if (n > 0) {
- ARROW_ASSIGN_OR_RAISE(mode_data->buffers[1], ctx->Allocate(n * sizeof(CType)));
+ const auto mode_buffer_size = bit_util::BytesForBits(n * mode_type->bit_width());
+ ARROW_ASSIGN_OR_RAISE(mode_data->buffers[1], ctx->Allocate(mode_buffer_size));
ARROW_ASSIGN_OR_RAISE(count_data->buffers[1], ctx->Allocate(n * sizeof(int64_t)));
mode_buffer = mode_data->template GetMutableValues<CType>(1);
count_buffer = count_data->template GetMutableValues<int64_t>(1);
@@ -201,18 +203,21 @@ struct CountModer<BooleanType> {
const int64_t distinct_values = (this->counts[0] != 0) + (this->counts[1] != 0);
const int64_t n = std::min(options.n, distinct_values);
- bool* mode_buffer;
+ uint8_t* mode_buffer;
int64_t* count_buffer;
ARROW_ASSIGN_OR_RAISE(std::tie(mode_buffer, count_buffer),
- PrepareOutput<BooleanType>(n, ctx, type, out));
+ (PrepareOutput<BooleanType, uint8_t>(n, ctx, type, out)));
if (n >= 1) {
- const bool index = counts[1] > counts[0];
- mode_buffer[0] = index;
- count_buffer[0] = counts[index];
+ // at most two bits are useful in mode buffer
+ mode_buffer[0] = 0;
+ const bool first_mode = counts[true] > counts[false];
+ bit_util::SetBitTo(mode_buffer, 0, first_mode);
+ count_buffer[0] = counts[first_mode];
if (n == 2) {
- mode_buffer[1] = !index;
- count_buffer[1] = counts[!index];
+ const bool second_mode = !first_mode;
+ bit_util::SetBitTo(mode_buffer, 1, second_mode);
+ count_buffer[1] = counts[second_mode];
}
}
@@ -224,7 +229,7 @@ struct CountModer<BooleanType> {
const ModeOptions& options = ModeState::Get(ctx);
if ((!options.skip_nulls && values.GetNullCount() > 0) ||
(values.length - values.null_count < options.min_count)) {
- return PrepareOutput<BooleanType>(/*n=*/0, ctx, *out->type(), out).status();
+ return PrepareOutput<BooleanType, uint8_t>(0, ctx, *out->type(), out).status();
}
UpdateCounts(values);
return WrapResult(ctx, options, *out->type(), out);
@@ -236,7 +241,7 @@ struct CountModer<BooleanType> {
ExecResult result;
if ((!options.skip_nulls && values.null_count() > 0) ||
(values.length() - values.null_count() < options.min_count)) {
- RETURN_NOT_OK(PrepareOutput<BooleanType>(/*n=*/0, ctx, *out->type(), &result));
+ RETURN_NOT_OK((PrepareOutput<BooleanType, uint8_t>(0, ctx, *out->type(), &result)));
} else {
UpdateCounts(values);
RETURN_NOT_OK(WrapResult(ctx, options, *out->type(), &result));
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index abd5b5210a..c7909487fb 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -2548,6 +2548,24 @@ void CheckModes(const Datum& array, const ModeOptions options,
}
}
+template <>
+void CheckModes<bool>(const Datum& array, const ModeOptions options,
+ const std::vector<bool>& expected_modes,
+ const std::vector<int64_t>& expected_counts) {
+ ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, options));
+ ValidateOutput(out);
+ const StructArray out_array(out.array());
+ ASSERT_EQ(out_array.length(), expected_modes.size());
+ ASSERT_EQ(out_array.num_fields(), 2);
+
+ const uint8_t* out_modes = out_array.field(0)->data()->GetValues<uint8_t>(1);
+ const int64_t* out_counts = out_array.field(1)->data()->GetValues<int64_t>(1);
+ for (int i = 0; i < out_array.length(); ++i) {
+ ASSERT_TRUE(expected_modes[i] == bit_util::GetBit(out_modes, i));
+ ASSERT_EQ(expected_counts[i], out_counts[i]);
+ }
+}
+
template <typename T>
class TestPrimitiveModeKernel : public ::testing::Test {
public:
@@ -2642,6 +2660,7 @@ TEST_F(TestBooleanModeKernel, Basics) {
this->AssertModeIs({"[true, null]", "[]", "[null, false]"}, false, 1);
this->AssertModesEmpty({"[null, null]", "[]", "[null]"});
+ this->AssertModesAre("[true, false]", 2, {false, true}, {1, 1});
this->AssertModesAre("[false, false, true, true, true, false]", 2, {false, true},
{3, 3});
this->AssertModesAre("[true, null, false, false, null, true, null, null, true]", 100,
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 6664f2f824..2bdec412f1 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -370,6 +370,12 @@ def test_mode_array():
mode = pc.mode(arr, skip_nulls=False, min_count=5)
assert len(mode) == 0
+ arr = pa.array([True, False])
+ mode = pc.mode(arr, n=2)
+ assert len(mode) == 2
+ assert mode[0].as_py() == {"mode": False, "count": 1}
+ assert mode[1].as_py() == {"mode": True, "count": 1}
+
def test_mode_chunked_array():
# ARROW-9917