You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/10/19 15:23:07 UTC

[GitHub] [arrow] bkietz commented on a diff in pull request #14415: ARROW-17966: [C++] Adjust to new format for Substrait optional arguments

bkietz commented on code in PR #14415:
URL: https://github.com/apache/arrow/pull/14415#discussion_r999574162


##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -678,17 +693,48 @@ static EnumParser<OverflowBehavior> kOverflowParser =
     GetEnumParser<OverflowBehavior>(kOverflowOptions);
 
 template <typename Enum>
-Result<Enum> ParseEnumArg(const SubstraitCall& call, uint32_t arg_index,
+Result<std::optional<Enum>> ParseOption(const SubstraitCall& call,
+                                        std::string_view option_name,
+                                        const EnumParser<Enum>& parser,
+                                        const std::vector<Enum>& implemented_options) {
+  std::optional<std::vector<std::string> const*> enum_arg = call.GetOption(option_name);
+  if (!enum_arg.has_value()) {
+    return std::nullopt;
+  }
+  std::vector<std::string> const* prefs = *enum_arg;
+  for (const std::string& pref : *prefs) {
+    ARROW_ASSIGN_OR_RAISE(Enum parsed, parser(pref));
+    for (Enum implemented_opt : implemented_options) {
+      if (implemented_opt == parsed) {
+        return parsed;
+      }
+    }
+  }
+  // Prepare error message
+  std::stringstream joined_prefs;
+  for (std::size_t i = 0; i < prefs->size(); i++) {
+    joined_prefs << (*prefs)[i];
+    if (i < prefs->size() - 1) {
+      joined_prefs << ", ";
+    }
+  }
+  return Status::NotImplemented("During a call to a function with id ", call.id().uri,
+                                "#", call.id().name, " the plan requested the option ",
+                                option_name, " to be one of [", joined_prefs.str(),
+                                "] but none of those option values are supported");

Review Comment:
   ```suggestion
                                   "] but the only supported options are [",
                                   implemented_options_as_strings_somehow, "]");
   ```
   ?



##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -156,15 +156,33 @@ Result<compute::Expression> SubstraitCall::GetValueArg(uint32_t index) const {
   return value_arg_it->second;
 }
 
-bool SubstraitCall::HasValueArg(uint32_t index) const {
+bool SubstraitCall::HasValueArg(int index) const {
   return value_args_.find(index) != value_args_.end();
 }
 
-void SubstraitCall::SetValueArg(uint32_t index, compute::Expression value_arg) {
+void SubstraitCall::SetValueArg(int index, compute::Expression value_arg) {
   size_ = std::max(size_, index + 1);
   value_args_[index] = std::move(value_arg);
 }
 
+std::optional<std::vector<std::string> const*> SubstraitCall::GetOption(
+    std::string_view option_name) const {
+  auto opt = options_.find(std::string(option_name));
+  if (opt == options_.end()) {
+    return std::nullopt;
+  }
+  return &opt->second;
+}
+
+void SubstraitCall::SetOption(std::string_view option_name,
+                              const std::vector<std::string_view>& option_preferences) {
+  std::vector<std::string> prefs_copy;
+  std::transform(option_preferences.begin(), option_preferences.end(),
+                 std::back_inserter(prefs_copy),
+                 [](std::string_view pref) { return std::string(pref); });
+  options_[std::string(option_name)] = prefs_copy;

Review Comment:
   Nit: copying a temporary vector and using std algorithms
   ```suggestion
     auto& prefs = options_[std::string(option_name)];
     for (std::string_view pref : option_preferences) {
       prefs.emplace_back(pref);
     }
   ```



##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -645,22 +663,19 @@ struct ExtensionIdRegistryImpl : ExtensionIdRegistry {
 };
 
 template <typename Enum>
-using EnumParser = std::function<Result<Enum>(std::optional<std::string_view>)>;
+using EnumParser = std::function<Result<Enum>(std::string_view)>;
 
 template <typename Enum>
 EnumParser<Enum> GetEnumParser(const std::vector<std::string>& options) {
   std::unordered_map<std::string, Enum> parse_map;
   for (std::size_t i = 0; i < options.size(); i++) {
     parse_map[options[i]] = static_cast<Enum>(i + 1);
   }
-  return [parse_map](std::optional<std::string_view> enum_val) -> Result<Enum> {
-    if (!enum_val) {
-      // Assumes 0 is always kUnspecified in Enum
-      return static_cast<Enum>(0);
-    }
-    auto maybe_parsed = parse_map.find(std::string(*enum_val));
+  return [parse_map](std::string_view enum_val) -> Result<Enum> {

Review Comment:
   ```suggestion
     return [parse_map = std::move(parse_map)](std::string_view enum_val) -> Result<Enum> {
   ```



##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -645,22 +663,19 @@ struct ExtensionIdRegistryImpl : ExtensionIdRegistry {
 };
 
 template <typename Enum>
-using EnumParser = std::function<Result<Enum>(std::optional<std::string_view>)>;
+using EnumParser = std::function<Result<Enum>(std::string_view)>;
 
 template <typename Enum>
 EnumParser<Enum> GetEnumParser(const std::vector<std::string>& options) {
   std::unordered_map<std::string, Enum> parse_map;
   for (std::size_t i = 0; i < options.size(); i++) {
     parse_map[options[i]] = static_cast<Enum>(i + 1);
   }
-  return [parse_map](std::optional<std::string_view> enum_val) -> Result<Enum> {
-    if (!enum_val) {
-      // Assumes 0 is always kUnspecified in Enum
-      return static_cast<Enum>(0);
-    }
-    auto maybe_parsed = parse_map.find(std::string(*enum_val));
+  return [parse_map](std::string_view enum_val) -> Result<Enum> {
+    auto maybe_parsed = parse_map.find(std::string(enum_val));

Review Comment:
   Nit: it's a minor convention that `/mabe_\w+/` indicates a `Result<>` or sometimes an `optional<>`
   ```suggestion
       auto it = parse_map.find(std::string(enum_val));
   ```



##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -678,17 +693,48 @@ static EnumParser<OverflowBehavior> kOverflowParser =
     GetEnumParser<OverflowBehavior>(kOverflowOptions);
 
 template <typename Enum>
-Result<Enum> ParseEnumArg(const SubstraitCall& call, uint32_t arg_index,
+Result<std::optional<Enum>> ParseOption(const SubstraitCall& call,
+                                        std::string_view option_name,
+                                        const EnumParser<Enum>& parser,
+                                        const std::vector<Enum>& implemented_options) {
+  std::optional<std::vector<std::string> const*> enum_arg = call.GetOption(option_name);
+  if (!enum_arg.has_value()) {
+    return std::nullopt;
+  }
+  std::vector<std::string> const* prefs = *enum_arg;
+  for (const std::string& pref : *prefs) {
+    ARROW_ASSIGN_OR_RAISE(Enum parsed, parser(pref));
+    for (Enum implemented_opt : implemented_options) {
+      if (implemented_opt == parsed) {
+        return parsed;
+      }
+    }
+  }
+  // Prepare error message
+  std::stringstream joined_prefs;
+  for (std::size_t i = 0; i < prefs->size(); i++) {
+    joined_prefs << (*prefs)[i];
+    if (i < prefs->size() - 1) {
+      joined_prefs << ", ";
+    }
+  }
+  return Status::NotImplemented("During a call to a function with id ", call.id().uri,
+                                "#", call.id().name, " the plan requested the option ",
+                                option_name, " to be one of [", joined_prefs.str(),

Review Comment:
   Nit: use JoinStrings
   ```suggestion
     return Status::NotImplemented("During a call to a function with id ", call.id().uri,
                                   "#", call.id().name, " the plan requested the option ",
                                   option_name, " to be one of [",
                                   arrow::internal::JoinStrings(*prefs),
   ```



##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -698,12 +744,15 @@ Result<std::vector<compute::Expression>> GetValueArgs(const SubstraitCall& call,
 ExtensionIdRegistry::SubstraitCallToArrow DecodeOptionlessOverflowableArithmetic(
     const std::string& function_name) {
   return [function_name](const SubstraitCall& call) -> Result<compute::Expression> {
-    ARROW_ASSIGN_OR_RAISE(OverflowBehavior overflow_behavior,
-                          ParseEnumArg(call, 0, kOverflowParser));
+    ARROW_ASSIGN_OR_RAISE(
+        std::optional<OverflowBehavior> maybe_overflow_behavior,
+        ParseOption(call, "overflow", kOverflowParser,
+                    {OverflowBehavior::kSilent, OverflowBehavior::kError}));

Review Comment:
   Not sure how you feel about this, but: we could save some boilerplate by having ParseOption just return `implemented_options[0]` as the default in case the option is unspecified.
   ```suggestion
           OverflowBehavior overflow_behavior,
           ParseOption(call, "overflow", kOverflowParser,
                       {OverflowBehavior::kSilent, OverflowBehavior::kError}));
   ```



##########
cpp/src/arrow/engine/substrait/expression_internal.cc:
##########
@@ -90,6 +95,9 @@ Result<SubstraitCall> DecodeScalarFunction(
     ARROW_RETURN_NOT_OK(DecodeArg(scalar_fn.arguments(i), static_cast<uint32_t>(i), &call,
                                   ext_set, conversion_options));
   }
+  for (int i = 0; i < scalar_fn.options_size(); i++) {
+    DecodeOption(scalar_fn.options(i), &call);

Review Comment:
   ```suggestion
     for (const auto& option : scalar_fn.options()) {
       DecodeOption(option, &call);
   ```



##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -156,15 +156,33 @@ Result<compute::Expression> SubstraitCall::GetValueArg(uint32_t index) const {
   return value_arg_it->second;
 }
 
-bool SubstraitCall::HasValueArg(uint32_t index) const {
+bool SubstraitCall::HasValueArg(int index) const {
   return value_args_.find(index) != value_args_.end();
 }
 
-void SubstraitCall::SetValueArg(uint32_t index, compute::Expression value_arg) {
+void SubstraitCall::SetValueArg(int index, compute::Expression value_arg) {
   size_ = std::max(size_, index + 1);
   value_args_[index] = std::move(value_arg);
 }
 
+std::optional<std::vector<std::string> const*> SubstraitCall::GetOption(
+    std::string_view option_name) const {
+  auto opt = options_.find(std::string(option_name));
+  if (opt == options_.end()) {
+    return std::nullopt;

Review Comment:
   Nit: this could be simpler
   ```suggestion
   const std::vector<std::string>* SubstraitCall::GetOption(
       std::string_view option_name) const {
     auto opt = options_.find(std::string(option_name));
     if (opt == options_.end()) {
       return nullptr;
   ```



##########
cpp/src/arrow/engine/substrait/plan_internal.cc:
##########
@@ -132,10 +133,29 @@ Result<ExtensionSet> GetExtensionSetFromPlan(const substrait::Plan& plan,
                             conversion_options, registry);
 }
 
+namespace {
+
+// FIXME Is there some way to get these from the cmake files?

Review Comment:
   There does not appear to be a way to inspect a substrait clone and get the version numbers apart from parsing CHANGELOG.md. You could add `#cmakedefine ARROW_SUBSTRAIT_BUILD_VERSION` to config.h.cmake then parse that here, or parse it in cmake and add `#cmakedefine ARROW_SUBSTRAIT_BUILD_VERSION_MAJOR` etc to config.h.cmake. This is for example how ThirdpartyToolchain.cmake provides a `#define` to indicate that jemalloc is vendored https://github.com/drin/arrow/blob/dea465396f981f9bd9862e501dc6750ca4fee6d9/cpp/cmake_modules/ThirdpartyToolchain.cmake#L1864-L1865 -> https://github.com/drin/arrow/blob/dea465396f981f9bd9862e501dc6750ca4fee6d9/cpp/src/arrow/util/config.h.cmake#L48



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org