You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "pitrou (via GitHub)" <gi...@apache.org> on 2023/06/28 16:24:45 UTC

[GitHub] [arrow] pitrou commented on a diff in pull request #36358: GH-36345: [C++] Prefer TypeError over Invalid in IsIn and IndexIn kernels

pitrou commented on code in PR #36358:
URL: https://github.com/apache/arrow/pull/36358#discussion_r1245466199


##########
cpp/src/arrow/compute/kernels/scalar_set_lookup.cc:
##########
@@ -326,9 +326,13 @@ struct IndexInVisitor {
     const auto& state = checked_cast<const SetLookupState<Type>&>(*ctx->state());
     if (!data.type->Equals(state.value_set_type)) {
       auto materialized_input = data.ToArrayData();
-      ARROW_ASSIGN_OR_RAISE(auto casted_input,
-                            Cast(*materialized_input, state.value_set_type,
-                                 CastOptions::Safe(), ctx->exec_context()));
+      auto cast_result = Cast(*materialized_input, state.value_set_type,
+                              CastOptions::Safe(), ctx->exec_context());
+      if (ARROW_PREDICT_FALSE(!cast_result.ok())) {

Review Comment:
   Well, we want to keep the original error. This is precisely to distinguish the different kinds of errors:
   * `TypeError`: casting from type X to type Y is not supported at all
   * `Invalid`: casting from type X to type Y _is_ supported, but the actual values fail casting (e.g. because of truncation or overflow)
   



##########
cpp/src/arrow/compute/kernels/scalar_set_lookup.cc:
##########
@@ -326,9 +326,13 @@ struct IndexInVisitor {
     const auto& state = checked_cast<const SetLookupState<Type>&>(*ctx->state());
     if (!data.type->Equals(state.value_set_type)) {
       auto materialized_input = data.ToArrayData();
-      ARROW_ASSIGN_OR_RAISE(auto casted_input,
-                            Cast(*materialized_input, state.value_set_type,
-                                 CastOptions::Safe(), ctx->exec_context()));
+      auto cast_result = Cast(*materialized_input, state.value_set_type,
+                              CastOptions::Safe(), ctx->exec_context());
+      if (ARROW_PREDICT_FALSE(!cast_result.ok())) {

Review Comment:
   So you could perhaps refine this as:
   ```c++
         if (!cast_result.ok()) {
           if (cast_result.IsNotImplemented()) {
             return Status::TypeError("Array type didn't match type of values set: ",
                                      *data.type, " vs ", *state.value_set_type);
           }
           return cast_result;
         }
   ```



##########
cpp/src/arrow/compute/kernels/scalar_set_lookup.cc:
##########
@@ -423,11 +427,15 @@ struct IsInVisitor {
     const auto& state = checked_cast<const SetLookupState<Type>&>(*ctx->state());
 
     if (!data.type->Equals(state.value_set_type)) {
-      auto materialized_data = data.ToArrayData();
-      ARROW_ASSIGN_OR_RAISE(auto casted_data,
-                            Cast(*materialized_data, state.value_set_type,
-                                 CastOptions::Safe(), ctx->exec_context()));
-      return ProcessIsIn(state, *casted_data.array());
+      auto materialized_input = data.ToArrayData();
+      auto cast_result = Cast(*materialized_input, state.value_set_type,
+                              CastOptions::Safe(), ctx->exec_context());
+      if (ARROW_PREDICT_FALSE(!cast_result.ok())) {

Review Comment:
   Same here of course.



##########
cpp/src/arrow/compute/kernels/scalar_set_lookup.cc:
##########
@@ -326,9 +326,13 @@ struct IndexInVisitor {
     const auto& state = checked_cast<const SetLookupState<Type>&>(*ctx->state());
     if (!data.type->Equals(state.value_set_type)) {
       auto materialized_input = data.ToArrayData();
-      ARROW_ASSIGN_OR_RAISE(auto casted_input,
-                            Cast(*materialized_input, state.value_set_type,
-                                 CastOptions::Safe(), ctx->exec_context()));
+      auto cast_result = Cast(*materialized_input, state.value_set_type,
+                              CastOptions::Safe(), ctx->exec_context());
+      if (ARROW_PREDICT_FALSE(!cast_result.ok())) {

Review Comment:
   Note that `Cast` also does the distinction, with the caveat that it returns `NotImplemented` instead of `TypeError` (which IMHO is not great):
   ```python
   >>> a = pa.array([[1000,2000], [3000]])
   >>> a.type
   ListType(list<item: int64>)
   >>> pc.cast(a, pa.list_(pa.int32()))
   <pyarrow.lib.ListArray object at 0x7f1ccf65ab60>
   [
     [
       1000,
       2000
     ],
     [
       3000
     ]
   ]
   >>> pc.cast(a, pa.list_(pa.int8()))
   [...]
   ArrowInvalid: Integer value 1000 not in range: -128 to 127
   >>> pc.cast(a, pa.utf8())
   [...]
   ArrowNotImplementedError: Unsupported cast from list<item: int64> to utf8 using function cast_string
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org