You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/02/01 14:00:08 UTC

[GitHub] [arrow] lidavidm commented on a change in pull request #12162: ARROW-15089: [C++][Compute] Implement kernel to lookup a MapArray item for a given key

lidavidm commented on a change in pull request #12162:
URL: https://github.com/apache/arrow/pull/12162#discussion_r796613939



##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -1350,5 +1375,20 @@ ARROW_EXPORT Result<Datum> AssumeTimezone(const Datum& values,
                                           AssumeTimezoneOptions options,
                                           ExecContext* ctx = NULLPTR);
 
+/// \brief Finds either the FIRST, LAST, or ALL items with a key that matches the given
+/// query key in a map array.
+///
+/// Returns an array of items for FIRST and LAST, and an array of list of items for ALL.
+///
+/// \param[in] map_array to look in
+/// \param[in] options to pass a query key and choose which matching keys to return
+/// (FIRST, LAST or ALL)
+/// \param[in] ctx the function execution context, optional
+///
+/// \return the resulting datum

Review comment:
       ```suggestion
   /// \return the resulting datum
   /// \since 8.0.0
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -428,6 +429,276 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a StructArray",
                                    "specified through MakeStructOptions."),
                                   {"*args"},
                                   "MakeStructOptions"};
+template <typename KeyType>
+struct MapArrayLookupFunctor {
+  static Result<int64_t> GetOneMatchingIndex(const Array& keys,
+                                             const Scalar& query_key_scalar,
+                                             const bool* from_back) {
+    int64_t match_index = -1;
+    RETURN_NOT_OK(
+        FindMatchingIndices(keys, query_key_scalar, [&](int64_t index) -> Status {
+          match_index = index;
+          if (*from_back) {
+            return Status::OK();
+          } else {
+            return Status::Cancelled("Found key match for FIRST");
+          }
+        }));
+
+    return match_index;
+  }
+
+  static Status BuildItemsArray(const Array& keys, const Array& items,
+                                const Scalar& query_key_scalar,
+                                bool* found_at_least_one_key, ArrayBuilder* builder) {
+    RETURN_NOT_OK(
+        FindMatchingIndices(keys, query_key_scalar, [&](int64_t index) -> Status {
+          *found_at_least_one_key = true;
+          RETURN_NOT_OK(builder->AppendArraySlice(*items.data(), index, 1));
+          return Status::OK();
+        }));
+    return Status::OK();
+  }
+
+  template <typename FoundItem>
+  static Status FindMatchingIndices(const Array& keys, const Scalar& query_key_scalar,
+                                    FoundItem callback) {
+    const auto query_key = UnboxScalar<KeyType>::Unbox(query_key_scalar);
+    int64_t index = 0;
+    Status status = VisitArrayValuesInline<KeyType>(
+        *keys.data(),
+        [&](decltype(query_key) key) -> Status {
+          if (key == query_key) {
+            return callback(index++);
+          }
+          ++index;
+          return Status::OK();
+        },
+        [&]() -> Status {
+          ++index;
+          return Status::OK();
+        });
+    if (!status.ok() && !status.IsCancelled()) {
+      return status;
+    }
+    return Status::OK();
+  }
+
+  static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+    const auto& query_key = options.query_key;
+    const auto& occurrence = options.occurrence;
+    const MapArray map_array(batch[0].array());
+
+    std::unique_ptr<ArrayBuilder> builder;
+    if (occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(),
+                                list(map_array.map_type()->item_type()), &builder));
+      auto list_builder = checked_cast<ListBuilder*>(builder.get());
+      auto value_builder = list_builder->value_builder();
+
+      for (int64_t map_array_idx = 0; map_array_idx < map_array.length();
+           ++map_array_idx) {
+        if (!map_array.IsValid(map_array_idx)) {
+          RETURN_NOT_OK(list_builder->AppendNull());
+          continue;
+        }
+
+        auto map = map_array.value_slice(map_array_idx);
+        auto keys = checked_cast<const StructArray&>(*map).field(0);
+        auto items = checked_cast<const StructArray&>(*map).field(1);
+        bool found_at_least_one_key = false;
+        RETURN_NOT_OK(
+            FindMatchingIndices(*keys, *query_key, [&](int64_t index) -> Status {
+              if (!found_at_least_one_key) RETURN_NOT_OK(list_builder->Append(true));
+              found_at_least_one_key = true;
+              RETURN_NOT_OK(value_builder->AppendArraySlice(*items->data(), index, 1));
+              return Status::OK();
+            }));
+        if (!found_at_least_one_key) {
+          RETURN_NOT_OK(list_builder->AppendNull());
+        }
+      }
+      ARROW_ASSIGN_OR_RAISE(auto result, list_builder->Finish());
+      out->value = result->data();
+    } else { /* occurrence == FIRST || LAST */
+      RETURN_NOT_OK(
+          MakeBuilder(ctx->memory_pool(), map_array.map_type()->item_type(), &builder));
+      RETURN_NOT_OK(builder->Reserve(batch.length));
+      for (int64_t map_array_idx = 0; map_array_idx < map_array.length();
+           ++map_array_idx) {
+        if (!map_array.IsValid(map_array_idx)) {
+          RETURN_NOT_OK(builder->AppendNull());
+          continue;
+        }
+
+        auto map = map_array.value_slice(map_array_idx);
+        auto keys = checked_cast<const StructArray&>(*map).field(0);
+        auto items = checked_cast<const StructArray&>(*map).field(1);
+        bool from_back = (occurrence == MapArrayLookupOptions::LAST);
+        ARROW_ASSIGN_OR_RAISE(int64_t key_match_idx,
+                              GetOneMatchingIndex(*keys, *query_key, &from_back));
+
+        if (key_match_idx != -1) {
+          RETURN_NOT_OK(builder->AppendArraySlice(*items->data(), key_match_idx, 1));
+        } else {
+          RETURN_NOT_OK(builder->AppendNull());
+        }
+      }
+      ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+      out->value = result->data();
+    }
+
+    return Status::OK();
+  }
+
+  static Status ExecMapScalar(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+    const auto& query_key = options.query_key;
+    const auto& occurrence = options.occurrence;
+
+    std::shared_ptr<DataType> item_type =
+        checked_cast<const MapType&>(*batch[0].type()).item_type();
+    const auto& map_scalar = batch[0].scalar_as<MapScalar>();
+
+    if (ARROW_PREDICT_FALSE(!map_scalar.is_valid)) {
+      if (options.occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+        out->value = MakeNullScalar(list(item_type));
+      } else {
+        out->value = MakeNullScalar(item_type);
+      }
+      return Status::OK();
+    }
+
+    const auto& struct_array = checked_cast<const StructArray&>(*map_scalar.value);
+    const std::shared_ptr<Array> keys = struct_array.field(0);
+    const std::shared_ptr<Array> items = struct_array.field(1);
+
+    if (occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+      bool found_at_least_one_key = false;
+      std::unique_ptr<ArrayBuilder> builder;
+      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), items->type(), &builder));
+      RETURN_NOT_OK(BuildItemsArray(*keys, *items, *query_key, &found_at_least_one_key,
+                                    builder.get()));
+
+      if (!found_at_least_one_key) {
+        out->value = MakeNullScalar(list(items->type()));
+      } else {
+        ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+        ARROW_ASSIGN_OR_RAISE(out->value, MakeScalar(list(items->type()), result));
+      }
+    } else { /* occurrence == FIRST || LAST */
+      bool from_back = (occurrence == MapArrayLookupOptions::LAST);
+
+      ARROW_ASSIGN_OR_RAISE(int64_t key_match_idx,
+                            GetOneMatchingIndex(*keys, *query_key, &from_back));
+      if (key_match_idx != -1) {
+        ARROW_ASSIGN_OR_RAISE(out->value, items->GetScalar(key_match_idx));
+      } else {
+        out->value = MakeNullScalar(items->type());
+      }
+    }
+    return Status::OK();
+  }
+};
+
+Result<ValueDescr> ResolveMapArrayLookupType(KernelContext* ctx,
+                                             const std::vector<ValueDescr>& descrs) {
+  const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+  std::shared_ptr<DataType> type = descrs.front().type;
+  std::shared_ptr<DataType> item_type = checked_cast<const MapType&>(*type).item_type();
+  std::shared_ptr<DataType> key_type = checked_cast<const MapType&>(*type).key_type();
+
+  if (!options.query_key) {
+    return Status::TypeError("map_array_lookup: query_key can't be empty.");
+  } else if (!options.query_key->is_valid) {
+    return Status::TypeError("map_array_lookup: query_key can't be null.");

Review comment:
       nit: these two probably make more sense as `Invalid` instead of `TypeError`

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -428,6 +429,276 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a StructArray",
                                    "specified through MakeStructOptions."),
                                   {"*args"},
                                   "MakeStructOptions"};
+template <typename KeyType>
+struct MapArrayLookupFunctor {
+  static Result<int64_t> GetOneMatchingIndex(const Array& keys,
+                                             const Scalar& query_key_scalar,
+                                             const bool* from_back) {
+    int64_t match_index = -1;
+    RETURN_NOT_OK(
+        FindMatchingIndices(keys, query_key_scalar, [&](int64_t index) -> Status {
+          match_index = index;
+          if (*from_back) {
+            return Status::OK();
+          } else {
+            return Status::Cancelled("Found key match for FIRST");
+          }
+        }));
+
+    return match_index;
+  }
+
+  static Status BuildItemsArray(const Array& keys, const Array& items,
+                                const Scalar& query_key_scalar,
+                                bool* found_at_least_one_key, ArrayBuilder* builder) {
+    RETURN_NOT_OK(
+        FindMatchingIndices(keys, query_key_scalar, [&](int64_t index) -> Status {
+          *found_at_least_one_key = true;
+          RETURN_NOT_OK(builder->AppendArraySlice(*items.data(), index, 1));
+          return Status::OK();
+        }));
+    return Status::OK();
+  }
+
+  template <typename FoundItem>
+  static Status FindMatchingIndices(const Array& keys, const Scalar& query_key_scalar,
+                                    FoundItem callback) {
+    const auto query_key = UnboxScalar<KeyType>::Unbox(query_key_scalar);
+    int64_t index = 0;
+    Status status = VisitArrayValuesInline<KeyType>(
+        *keys.data(),
+        [&](decltype(query_key) key) -> Status {
+          if (key == query_key) {
+            return callback(index++);
+          }
+          ++index;
+          return Status::OK();
+        },
+        [&]() -> Status {
+          ++index;
+          return Status::OK();
+        });
+    if (!status.ok() && !status.IsCancelled()) {
+      return status;
+    }
+    return Status::OK();
+  }
+
+  static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+    const auto& query_key = options.query_key;
+    const auto& occurrence = options.occurrence;
+    const MapArray map_array(batch[0].array());
+
+    std::unique_ptr<ArrayBuilder> builder;
+    if (occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(),
+                                list(map_array.map_type()->item_type()), &builder));
+      auto list_builder = checked_cast<ListBuilder*>(builder.get());
+      auto value_builder = list_builder->value_builder();
+
+      for (int64_t map_array_idx = 0; map_array_idx < map_array.length();
+           ++map_array_idx) {
+        if (!map_array.IsValid(map_array_idx)) {
+          RETURN_NOT_OK(list_builder->AppendNull());
+          continue;
+        }
+
+        auto map = map_array.value_slice(map_array_idx);
+        auto keys = checked_cast<const StructArray&>(*map).field(0);
+        auto items = checked_cast<const StructArray&>(*map).field(1);
+        bool found_at_least_one_key = false;
+        RETURN_NOT_OK(
+            FindMatchingIndices(*keys, *query_key, [&](int64_t index) -> Status {
+              if (!found_at_least_one_key) RETURN_NOT_OK(list_builder->Append(true));
+              found_at_least_one_key = true;
+              RETURN_NOT_OK(value_builder->AppendArraySlice(*items->data(), index, 1));
+              return Status::OK();
+            }));

Review comment:
       Yeah, we can inline it then.

##########
File path: docs/source/cpp/compute.rst
##########
@@ -1639,17 +1639,19 @@ in the respective option classes.
 Structural transforms
 ~~~~~~~~~~~~~~~~~~~~~
 
-+---------------------+------------+-------------------------------------+------------------+------------------------------+--------+
-| Function name       | Arity      | Input types                         | Output type      | Options class                | Notes  |
-+=====================+============+=====================================+==================+==============================+========+
-| list_element        | Binary     | List-like (Arg 0), Integral (Arg 1) | List value type  |                              | \(1)   |
-+---------------------+------------+-------------------------------------+------------------+------------------------------+--------+
-| list_flatten        | Unary      | List-like                           | List value type  |                              | \(2)   |
-+---------------------+------------+-------------------------------------+------------------+------------------------------+--------+
-| list_parent_indices | Unary      | List-like                           | Int64            |                              | \(3)   |
-+---------------------+------------+-------------------------------------+------------------+------------------------------+--------+
-| struct_field        | Unary      | Struct or Union                     | Computed         | :struct:`StructFieldOptions` | \(4)   |
-+---------------------+------------+-------------------------------------+------------------+------------------------------+--------+
++---------------------+------------+-------------------------------------+------------------+---------------------------------+--------+
+| Function name       | Arity      | Input types                         | Output type      | Options class                   | Notes  |
++=====================+============+=====================================+==================+=================================+========+
+| list_element        | Binary     | List-like (Arg 0), Integral (Arg 1) | List value type  |                                 | \(1)   |
++---------------------+------------+-------------------------------------+------------------+---------------------------------+--------+
+| list_flatten        | Unary      | List-like                           | List value type  |                                 | \(2)   |
++---------------------+------------+-------------------------------------+------------------+---------------------------------+--------+
+| list_parent_indices | Unary      | List-like                           | Int64            |                                 | \(3)   |
++---------------------+------------+-------------------------------------+------------------+---------------------------------+--------+
+| map_array_lookup    | Unary      | Map                                 | Computed         | :struct:`MapArrayLookupOptions` | \(4)   |

Review comment:
       I'm really sorry to bikeshed here, but now that I look at it…would "map_lookup" be a better kernel name? I think we only use "array" in a kernel name when the kernel only works on arrays, but this works on arrays and scalars.

##########
File path: cpp/src/arrow/compute/kernels/scalar_nested.cc
##########
@@ -428,6 +429,276 @@ const FunctionDoc make_struct_doc{"Wrap Arrays into a StructArray",
                                    "specified through MakeStructOptions."),
                                   {"*args"},
                                   "MakeStructOptions"};
+template <typename KeyType>
+struct MapArrayLookupFunctor {
+  static Result<int64_t> GetOneMatchingIndex(const Array& keys,
+                                             const Scalar& query_key_scalar,
+                                             const bool* from_back) {
+    int64_t match_index = -1;
+    RETURN_NOT_OK(
+        FindMatchingIndices(keys, query_key_scalar, [&](int64_t index) -> Status {
+          match_index = index;
+          if (*from_back) {
+            return Status::OK();
+          } else {
+            return Status::Cancelled("Found key match for FIRST");
+          }
+        }));
+
+    return match_index;
+  }
+
+  static Status BuildItemsArray(const Array& keys, const Array& items,
+                                const Scalar& query_key_scalar,
+                                bool* found_at_least_one_key, ArrayBuilder* builder) {
+    RETURN_NOT_OK(
+        FindMatchingIndices(keys, query_key_scalar, [&](int64_t index) -> Status {
+          *found_at_least_one_key = true;
+          RETURN_NOT_OK(builder->AppendArraySlice(*items.data(), index, 1));
+          return Status::OK();
+        }));
+    return Status::OK();
+  }
+
+  template <typename FoundItem>
+  static Status FindMatchingIndices(const Array& keys, const Scalar& query_key_scalar,
+                                    FoundItem callback) {
+    const auto query_key = UnboxScalar<KeyType>::Unbox(query_key_scalar);
+    int64_t index = 0;
+    Status status = VisitArrayValuesInline<KeyType>(
+        *keys.data(),
+        [&](decltype(query_key) key) -> Status {
+          if (key == query_key) {
+            return callback(index++);
+          }
+          ++index;
+          return Status::OK();
+        },
+        [&]() -> Status {
+          ++index;
+          return Status::OK();
+        });
+    if (!status.ok() && !status.IsCancelled()) {
+      return status;
+    }
+    return Status::OK();
+  }
+
+  static Status ExecMapArray(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+    const auto& query_key = options.query_key;
+    const auto& occurrence = options.occurrence;
+    const MapArray map_array(batch[0].array());
+
+    std::unique_ptr<ArrayBuilder> builder;
+    if (occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(),
+                                list(map_array.map_type()->item_type()), &builder));
+      auto list_builder = checked_cast<ListBuilder*>(builder.get());
+      auto value_builder = list_builder->value_builder();
+
+      for (int64_t map_array_idx = 0; map_array_idx < map_array.length();
+           ++map_array_idx) {
+        if (!map_array.IsValid(map_array_idx)) {
+          RETURN_NOT_OK(list_builder->AppendNull());
+          continue;
+        }
+
+        auto map = map_array.value_slice(map_array_idx);
+        auto keys = checked_cast<const StructArray&>(*map).field(0);
+        auto items = checked_cast<const StructArray&>(*map).field(1);
+        bool found_at_least_one_key = false;
+        RETURN_NOT_OK(
+            FindMatchingIndices(*keys, *query_key, [&](int64_t index) -> Status {
+              if (!found_at_least_one_key) RETURN_NOT_OK(list_builder->Append(true));
+              found_at_least_one_key = true;
+              RETURN_NOT_OK(value_builder->AppendArraySlice(*items->data(), index, 1));
+              return Status::OK();
+            }));
+        if (!found_at_least_one_key) {
+          RETURN_NOT_OK(list_builder->AppendNull());
+        }
+      }
+      ARROW_ASSIGN_OR_RAISE(auto result, list_builder->Finish());
+      out->value = result->data();
+    } else { /* occurrence == FIRST || LAST */
+      RETURN_NOT_OK(
+          MakeBuilder(ctx->memory_pool(), map_array.map_type()->item_type(), &builder));
+      RETURN_NOT_OK(builder->Reserve(batch.length));
+      for (int64_t map_array_idx = 0; map_array_idx < map_array.length();
+           ++map_array_idx) {
+        if (!map_array.IsValid(map_array_idx)) {
+          RETURN_NOT_OK(builder->AppendNull());
+          continue;
+        }
+
+        auto map = map_array.value_slice(map_array_idx);
+        auto keys = checked_cast<const StructArray&>(*map).field(0);
+        auto items = checked_cast<const StructArray&>(*map).field(1);
+        bool from_back = (occurrence == MapArrayLookupOptions::LAST);
+        ARROW_ASSIGN_OR_RAISE(int64_t key_match_idx,
+                              GetOneMatchingIndex(*keys, *query_key, &from_back));
+
+        if (key_match_idx != -1) {
+          RETURN_NOT_OK(builder->AppendArraySlice(*items->data(), key_match_idx, 1));
+        } else {
+          RETURN_NOT_OK(builder->AppendNull());
+        }
+      }
+      ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+      out->value = result->data();
+    }
+
+    return Status::OK();
+  }
+
+  static Status ExecMapScalar(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+    const auto& query_key = options.query_key;
+    const auto& occurrence = options.occurrence;
+
+    std::shared_ptr<DataType> item_type =
+        checked_cast<const MapType&>(*batch[0].type()).item_type();
+    const auto& map_scalar = batch[0].scalar_as<MapScalar>();
+
+    if (ARROW_PREDICT_FALSE(!map_scalar.is_valid)) {
+      if (options.occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+        out->value = MakeNullScalar(list(item_type));
+      } else {
+        out->value = MakeNullScalar(item_type);
+      }
+      return Status::OK();
+    }
+
+    const auto& struct_array = checked_cast<const StructArray&>(*map_scalar.value);
+    const std::shared_ptr<Array> keys = struct_array.field(0);
+    const std::shared_ptr<Array> items = struct_array.field(1);
+
+    if (occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+      bool found_at_least_one_key = false;
+      std::unique_ptr<ArrayBuilder> builder;
+      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), items->type(), &builder));
+      RETURN_NOT_OK(BuildItemsArray(*keys, *items, *query_key, &found_at_least_one_key,
+                                    builder.get()));
+
+      if (!found_at_least_one_key) {
+        out->value = MakeNullScalar(list(items->type()));
+      } else {
+        ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+        ARROW_ASSIGN_OR_RAISE(out->value, MakeScalar(list(items->type()), result));
+      }
+    } else { /* occurrence == FIRST || LAST */
+      bool from_back = (occurrence == MapArrayLookupOptions::LAST);
+
+      ARROW_ASSIGN_OR_RAISE(int64_t key_match_idx,
+                            GetOneMatchingIndex(*keys, *query_key, &from_back));
+      if (key_match_idx != -1) {
+        ARROW_ASSIGN_OR_RAISE(out->value, items->GetScalar(key_match_idx));
+      } else {
+        out->value = MakeNullScalar(items->type());
+      }
+    }
+    return Status::OK();
+  }
+};
+
+Result<ValueDescr> ResolveMapArrayLookupType(KernelContext* ctx,
+                                             const std::vector<ValueDescr>& descrs) {
+  const auto& options = OptionsWrapper<MapArrayLookupOptions>::Get(ctx);
+  std::shared_ptr<DataType> type = descrs.front().type;
+  std::shared_ptr<DataType> item_type = checked_cast<const MapType&>(*type).item_type();
+  std::shared_ptr<DataType> key_type = checked_cast<const MapType&>(*type).key_type();
+
+  if (!options.query_key) {
+    return Status::TypeError("map_array_lookup: query_key can't be empty.");
+  } else if (!options.query_key->is_valid) {
+    return Status::TypeError("map_array_lookup: query_key can't be null.");
+  } else if (!options.query_key->type || !options.query_key->type->Equals(key_type)) {
+    return Status::TypeError(
+        "map_array_lookup: query_key type and MapArray key_type don't match. Expected "
+        "type: ",
+        *key_type, ", but got type: ", *options.query_key->type);
+  }
+
+  if (options.occurrence == MapArrayLookupOptions::Occurrence::ALL) {
+    return ValueDescr(list(item_type), descrs.front().shape);
+  } else { /* occurrence == FIRST || LAST */
+    return ValueDescr(item_type, descrs.front().shape);
+  }
+}
+
+struct ResolveMapArrayLookup {
+  KernelContext* ctx;
+  const ExecBatch& batch;
+  Datum* out;
+
+  template <typename KeyType>
+  Status Execute() {
+    if (batch[0].kind() == Datum::SCALAR) {
+      return MapArrayLookupFunctor<KeyType>::ExecMapScalar(ctx, batch, out);
+    }
+    return MapArrayLookupFunctor<KeyType>::ExecMapArray(ctx, batch, out);
+  }
+
+  template <typename KeyType>
+  enable_if_physical_integer<KeyType, Status> Visit(const KeyType& type) {
+    return Execute<KeyType>();
+  }
+
+  template <typename KeyType>
+  enable_if_decimal<KeyType, Status> Visit(const KeyType& type) {
+    return Execute<KeyType>();
+  }
+
+  template <typename KeyType>
+  enable_if_base_binary<KeyType, Status> Visit(const KeyType& type) {
+    return Execute<KeyType>();
+  }
+
+  template <typename KeyType>
+  enable_if_boolean<KeyType, Status> Visit(const KeyType& type) {
+    return Execute<KeyType>();
+  }
+
+  template <typename KeyType>
+  enable_if_same<KeyType, FixedSizeBinaryType, Status> Visit(const KeyType& key) {
+    return Execute<KeyType>();
+  }
+
+  Status Visit(const MonthDayNanoIntervalType& key) {
+    return Execute<MonthDayNanoIntervalType>();
+  }
+
+  Status Visit(const DataType& type) {
+    return Status::TypeError("Got unsupported type: ", type.ToString());
+  }
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    ResolveMapArrayLookup visitor{ctx, batch, out};
+    return VisitTypeInline(*checked_cast<const MapType&>(*batch[0].type()).key_type(),
+                           &visitor);
+  }
+};
+
+void AddMapArrayLookupKernels(ScalarFunction* func) {
+  for (const auto shape : {ValueDescr::ARRAY, ValueDescr::SCALAR}) {
+    ScalarKernel kernel(
+        {InputType(Type::MAP, shape)}, OutputType(ResolveMapArrayLookupType),
+        ResolveMapArrayLookup::Exec, OptionsWrapper<MapArrayLookupOptions>::Init);

Review comment:
       Since the Exec is the same regardless of the shape, we can remove the loop and just register one kernel for `InputType(Type::MAP)`

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -1333,6 +1333,43 @@ class IndexOptions(_IndexOptions):
         self._set_options(value)
 
 
+cdef class _MapArrayLookupOptions(FunctionOptions):
+    _occurrence_map = {
+        "ALL": CMapArrayLookupOccurrence_ALL,
+        "FIRST": CMapArrayLookupOccurrence_FIRST,
+        "LAST": CMapArrayLookupOccurrence_LAST,
+    }
+
+    def _set_options(self, scalar, occurrence):
+        try:
+            self.wrapped.reset(
+                new CMapArrayLookupOptions(
+                    pyarrow_unwrap_scalar(scalar),
+                    self._occurrence_map[occurrence]
+                )
+            )
+        except KeyError:
+            _raise_invalid_function_option(occurrence,
+                                           "Should either be FIRST, LAST or ALL")
+
+
+class MapArrayLookupOptions(_MapArrayLookupOptions):
+    """
+    Options for the `map_array_lookup` function.
+
+    Parameters
+    ----------
+    query_key : Scalar
+        The key to search for.
+    occurrence : str
+        The occurrence(s) to return from the MapArray
+        Accepted values are "FIRST", "LAST", "ALL".

Review comment:
       nit, we usually take lowercase options strings, e.g. see the round modes: https://github.com/apache/arrow/blob/10d108d6f0fae2a5265116ff23fe7a0623c2dc2f/python/pyarrow/_compute.pyx#L793-L814




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org