You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "westonpace (via GitHub)" <gi...@apache.org> on 2023/04/14 21:55:02 UTC

[GitHub] [arrow] westonpace commented on a diff in pull request #34651: GH-32763: [C++] Add FromProto for fetch & sort

westonpace commented on code in PR #34651:
URL: https://github.com/apache/arrow/pull/34651#discussion_r1167299286


##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -757,6 +774,95 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& rel, const ExtensionSet&
       return ProcessEmit(std::move(join), std::move(join_declaration),
                          std::move(join_schema));
     }
+    case substrait::Rel::RelTypeCase::kFetch: {
+      const auto& fetch = rel.fetch();
+      RETURN_NOT_OK(CheckRelCommon(fetch, conversion_options));
+
+      if (!fetch.has_input()) {
+        return Status::Invalid("substrait::FetchRel with no input relation");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto input,
+                            FromProto(fetch.input(), ext_set, conversion_options));
+
+      int64_t offset = fetch.offset();
+      int64_t count = fetch.count();
+
+      acero::Declaration fetch_dec{
+          "fetch", {input.declaration}, acero::FetchNodeOptions(offset, count)};
+
+      DeclarationInfo fetch_declaration{std::move(fetch_dec), input.output_schema};
+      return ProcessEmit(fetch, std::move(fetch_declaration),
+                         fetch_declaration.output_schema);
+    }
+    case substrait::Rel::RelTypeCase::kSort: {
+      const auto& sort = rel.sort();
+      RETURN_NOT_OK(CheckRelCommon(sort, conversion_options));
+
+      if (!sort.has_input()) {
+        return Status::Invalid("substrait::SortRel with no input relation");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto input,
+                            FromProto(sort.input(), ext_set, conversion_options));
+
+      if (sort.sorts_size() == 0) {
+        return Status::Invalid("substrait::SortRel with no sorts");
+      }
+
+      std::vector<compute::SortKey> sort_keys;
+      compute::NullPlacement null_placement;
+      bool first = true;
+      for (const auto& sort : sort.sorts()) {
+        if (sort.direction() == substrait::SortField::SortDirection::
+                                    SortField_SortDirection_SORT_DIRECTION_UNSPECIFIED) {
+          return Status::Invalid(
+              "substrait::SortRel with sort that had unspecified direction");
+        }
+        if (sort.direction() == substrait::SortField::SortDirection::
+                                    SortField_SortDirection_SORT_DIRECTION_CLUSTERED) {
+          return Status::NotImplemented(
+              "substrait::SortRel with sort with clustered sort direction");
+        }
+        // Substrait allows null placement to differ for each field.  Acero expects it to
+        // be consistent across all fields.  So we grab the null placement from the first
+        // key and verify all other keys have the same null placement
+        if (first) {

Review Comment:
   Good catch.  This was a bug.  I've added a test case to make sure we are correctly detecting and rejecting mixed null.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org