You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2022/07/27 16:30:07 UTC
[arrow] 02/02: ARROW-17227: [C++] Extend hash-join unit tests to cover both empty and length=0 batches (#13725)
This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch maint-9.0.0
in repository https://gitbox.apache.org/repos/asf/arrow.git
commit be145411f0963598eda1b14fb694f8ea93780bf0
Author: Weston Pace <we...@gmail.com>
AuthorDate: Wed Jul 27 06:22:33 2022 -1000
ARROW-17227: [C++] Extend hash-join unit tests to cover both empty and length=0 batches (#13725)
Authored-by: Weston Pace <we...@gmail.com>
Signed-off-by: Krisztián Szűcs <sz...@gmail.com>
---
cpp/src/arrow/compute/exec/hash_join_node_test.cc | 88 ++++++++++++-----------
1 file changed, 47 insertions(+), 41 deletions(-)
diff --git a/cpp/src/arrow/compute/exec/hash_join_node_test.cc b/cpp/src/arrow/compute/exec/hash_join_node_test.cc
index d06b76159d..8cb1f8b92c 100644
--- a/cpp/src/arrow/compute/exec/hash_join_node_test.cc
+++ b/cpp/src/arrow/compute/exec/hash_join_node_test.cc
@@ -1240,7 +1240,9 @@ void TestHashJoinDictionaryHelper(
// side.
int expected_num_r_no_match,
// Whether to swap two inputs to the hash join
- bool swap_sides) {
+ bool swap_sides,
+ // If true, send length=0 batches, if false, skip these batches
+ bool send_empty_batches = true) {
int64_t l_length = l_key.is_array() ? l_key.array()->length
: l_payload.is_array() ? l_payload.array()->length
: -1;
@@ -1298,12 +1300,12 @@ void TestHashJoinDictionaryHelper(
}
}
- // Instead of sending 2 batches of size 0 we should not send any batches
- // at all to more accurately simulate real world use cases
- if (l_length == 0) {
+ // When the input is empty we can either send length=0 batches
+ // or bypass the batches entirely
+ if (l_length == 0 && !send_empty_batches) {
l_batches.batches.resize(0);
}
- if (r_length == 0) {
+ if (r_length == 0 && !send_empty_batches) {
r_batches.batches.resize(0);
}
@@ -1509,23 +1511,25 @@ TEST(HashJoin, Dictionary) {
for (auto parallel : {false, true}) {
for (auto swap_sides : {false, true}) {
for (auto cmp : {JoinKeyCmp::IS, JoinKeyCmp::EQ}) {
- TestHashJoinDictionaryHelper(
- JoinType::FULL_OUTER, cmp, parallel,
- // Input
- DictArrayFromJSON(l_key_dict_type, R"([2, 0, 1])", R"(["b", null, "a"])"),
- DictArrayFromJSON(l_payload_dict_type, R"([2, 2, 0])",
- R"(["x", "y", "z"])"),
- DictArrayFromJSON(r_key_dict_type, R"([])", R"([null, "b", "c"])"),
- DictArrayFromJSON(r_payload_dict_type, R"([])", R"(["p", "r", "s"])"),
- // Expected
- DictArrayFromJSON(l_key_dict_type, R"([2, 0, 1])", R"(["b", null, "a"])"),
- DictArrayFromJSON(l_payload_dict_type, R"([2, 2, 0])",
- R"(["x", "y", "z"])"),
- DictArrayFromJSON(r_key_dict_type, R"([null, null, null])",
- R"(["b", "c"])"),
- DictArrayFromJSON(r_payload_dict_type, R"([null, null, null])",
- R"(["p", "r", "s"])"),
- 0, swap_sides);
+ for (auto send_empty_batches : {false, true}) {
+ TestHashJoinDictionaryHelper(
+ JoinType::FULL_OUTER, cmp, parallel,
+ // Input
+ DictArrayFromJSON(l_key_dict_type, R"([2, 0, 1])", R"(["b", null, "a"])"),
+ DictArrayFromJSON(l_payload_dict_type, R"([2, 2, 0])",
+ R"(["x", "y", "z"])"),
+ DictArrayFromJSON(r_key_dict_type, R"([])", R"([null, "b", "c"])"),
+ DictArrayFromJSON(r_payload_dict_type, R"([])", R"(["p", "r", "s"])"),
+ // Expected
+ DictArrayFromJSON(l_key_dict_type, R"([2, 0, 1])", R"(["b", null, "a"])"),
+ DictArrayFromJSON(l_payload_dict_type, R"([2, 2, 0])",
+ R"(["x", "y", "z"])"),
+ DictArrayFromJSON(r_key_dict_type, R"([null, null, null])",
+ R"(["b", "c"])"),
+ DictArrayFromJSON(r_payload_dict_type, R"([null, null, null])",
+ R"(["p", "r", "s"])"),
+ 0, swap_sides, send_empty_batches);
+ }
}
}
}
@@ -1541,25 +1545,27 @@ TEST(HashJoin, Dictionary) {
for (auto parallel : {false, true}) {
for (auto swap_sides : {false, true}) {
for (auto cmp : {JoinKeyCmp::IS, JoinKeyCmp::EQ}) {
- TestHashJoinDictionaryHelper(
- JoinType::FULL_OUTER, cmp, parallel,
- // Input
- DictArrayFromJSON(l_key_dict_type, R"([])", R"(["b", null, "a"])"),
- DictArrayFromJSON(l_payload_dict_type, R"([])", R"(["x", "y", "z"])"),
- DictArrayFromJSON(r_key_dict_type, R"([2, 0, 1, null])",
- R"([null, "b", "c"])"),
- DictArrayFromJSON(r_payload_dict_type, R"([1, 1, null, 0])",
- R"(["p", "r", "s"])"),
- // Expected
- DictArrayFromJSON(l_key_dict_type, R"([null, null, null, null])",
- R"(["b", null, "a"])"),
- DictArrayFromJSON(l_payload_dict_type, R"([null, null, null, null])",
- R"(["x", "y", "z"])"),
- DictArrayFromJSON(r_key_dict_type, R"([1, null, 0, null])",
- R"(["b", "c"])"),
- DictArrayFromJSON(r_payload_dict_type, R"([1, 1, null, 0])",
- R"(["p", "r", "s"])"),
- 4, swap_sides);
+ for (auto send_empty_batches : {false, true}) {
+ TestHashJoinDictionaryHelper(
+ JoinType::FULL_OUTER, cmp, parallel,
+ // Input
+ DictArrayFromJSON(l_key_dict_type, R"([])", R"(["b", null, "a"])"),
+ DictArrayFromJSON(l_payload_dict_type, R"([])", R"(["x", "y", "z"])"),
+ DictArrayFromJSON(r_key_dict_type, R"([2, 0, 1, null])",
+ R"([null, "b", "c"])"),
+ DictArrayFromJSON(r_payload_dict_type, R"([1, 1, null, 0])",
+ R"(["p", "r", "s"])"),
+ // Expected
+ DictArrayFromJSON(l_key_dict_type, R"([null, null, null, null])",
+ R"(["b", null, "a"])"),
+ DictArrayFromJSON(l_payload_dict_type, R"([null, null, null, null])",
+ R"(["x", "y", "z"])"),
+ DictArrayFromJSON(r_key_dict_type, R"([1, null, 0, null])",
+ R"(["b", "c"])"),
+ DictArrayFromJSON(r_payload_dict_type, R"([1, 1, null, 0])",
+ R"(["p", "r", "s"])"),
+ 4, swap_sides, send_empty_batches);
+ }
}
}
}