You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "js8544 (via GitHub)" <gi...@apache.org> on 2023/06/28 06:50:06 UTC

[GitHub] [arrow] js8544 commented on a diff in pull request #36204: GH-36203: [C++] Support casting in both ways for is_in and index_in

js8544 commented on code in PR #36204:
URL: https://github.com/apache/arrow/pull/36204#discussion_r1244763406


##########
cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc:
##########
@@ -253,11 +253,11 @@ TEST_F(TestIsInKernel, TimeDuration) {
               "[true, false, false, true, true]", /*skip_nulls=*/true);
   }
 
-  // Different units, invalid cast
-  ASSERT_RAISES(Invalid, IsIn(ArrayFromJSON(duration(TimeUnit::SECOND), "[0, 1, 2]"),
-                              ArrayFromJSON(duration(TimeUnit::MILLI), "[0, 2]")));
+  // Different units, cast value_set to values
+  CheckIsIn(ArrayFromJSON(duration(TimeUnit::SECOND), "[0, 2]"),
+            ArrayFromJSON(duration(TimeUnit::MILLI), "[0, 1, 2000]"), "[true, true]");

Review Comment:
   Updated the test case. Since only safe casts are allowed, it will first try casting [1ms, 2ms, 2000ms] to [0s, 2s] and fail, then cast the other way around and succeed. The result is [null, true] as expected.



##########
cpp/src/arrow/compute/kernels/scalar_set_lookup.cc:
##########
@@ -215,16 +221,24 @@ struct InitStateVisitor {
       return Status::Invalid("Array type didn't match type of values set: ", *arg_type,
                              " vs ", *options.value_set.type());
     }
+
     if (!options.value_set.is_arraylike()) {
       return Status::Invalid("Set lookup value set must be Array or ChunkedArray");
     } else if (!options.value_set.type()->Equals(*arg_type)) {
-      ARROW_ASSIGN_OR_RAISE(
-          options.value_set,
+      auto cast_result =
           Cast(options.value_set, CastOptions::Safe(arg_type.GetSharedPtr()),
-               ctx->exec_context()));
+               ctx->exec_context());
+      if (cast_result.ok()) {
+        options.value_set = *cast_result;
+      } else if (CanCast(*arg_type.type, *options.value_set.type())) {
+        // Will try to cast input array to value set type during kernel exec
+      } else {
+        return Status::Invalid("Input type doesn't match type of values set: ", *arg_type,

Review Comment:
   Created #36345 



##########
cpp/src/arrow/compute/kernels/scalar_set_lookup.cc:
##########
@@ -359,13 +386,11 @@ struct IsInVisitor {
   }
 
   template <typename Type>
-  Status ProcessIsIn() {
+  Status ProcessIsIn(const SetLookupState<Type>& state, const ArraySpan& data) {

Review Comment:
   Changed `data` to `input`



##########
cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc:
##########
@@ -822,6 +822,37 @@ TEST_F(TestIndexInKernel, Boolean) {
   CheckIndexIn(boolean(), "[null, null, null, null]", "[null]", "[0, 0, 0, 0]");
 }
 
+TEST_F(TestIndexInKernel, ImplicitlyCastValueSet) {
+  auto input = ArrayFromJSON(int8(), "[0, 1, 2, 3, 4, 5, 6, 7, 8]");
+
+  SetLookupOptions opts{ArrayFromJSON(int32(), "[2, 3, 5, 7]")};
+  ASSERT_OK_AND_ASSIGN(Datum out, CallFunction("index_in", {input}, &opts));
+
+  auto expected = ArrayFromJSON(int32(), ("[null, null, 0, 1, null,"
+                                          "2, null, 3, null]"));
+  AssertArraysEqual(*expected, *out.make_array());
+
+  // Although value_set cannot be cast to int8, but int8 is castable to float
+  CheckIndexIn(input, ArrayFromJSON(float32(), "[1.0, 2.5, 3.1, 5.0]"),
+               "[null, 0, null, null, null, 3, null, null, null]");
+
+  // Allow implicit casts between binary types...
+  CheckIndexIn(ArrayFromJSON(binary(), R"(["aaa", "bbb", "ccc", null, "bbb"])"),
+               ArrayFromJSON(fixed_size_binary(3), R"(["aaa", "bbb"])"),
+               "[0, 1, null, null, 1]");
+  CheckIndexIn(ArrayFromJSON(utf8(), R"(["aaa", "bbb", "ccc", null, "bbb"])"),
+               ArrayFromJSON(large_utf8(), R"(["aaa", "bbb"])"), "[0, 1, null, null, 1]");

Review Comment:
   Done. I also explicitly forbid casting non-binary to binary, same as the tests down below.



##########
cpp/src/arrow/compute/kernels/scalar_set_lookup.cc:
##########
@@ -263,11 +277,8 @@ struct IndexInVisitor {
   }
 
   template <typename Type>
-  Status ProcessIndexIn() {
+  Status ProcessIndexIn(const SetLookupState<Type>& state, const ArraySpan& data) {

Review Comment:
   Changed `data` to `input`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org