You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2023/04/05 06:29:36 UTC
[arrow] branch main updated: GH-33616: [C++] Reorder group_by so that keys/segment keys come before aggregates (#34551)

This is an automated email from the ASF dual-hosted git repository.

westonpace pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 379c1fb03a GH-33616: [C++] Reorder group_by so that keys/segment keys come before aggregates (#34551)
379c1fb03a is described below

commit 379c1fb03a78c108846516987b3b2583ef650cb8
Author: Weston Pace <we...@gmail.com>
AuthorDate: Tue Apr 4 23:29:29 2023 -0700

    GH-33616: [C++] Reorder group_by so that keys/segment keys come before aggregates (#34551)
    
    
    * Closes: #33616
    
    Lead-authored-by: Weston Pace <we...@gmail.com>
    Co-authored-by: Neal Richardson <ne...@gmail.com>
    Co-authored-by: Sutou Kouhei <ko...@clear-code.com>
    Signed-off-by: Weston Pace <we...@gmail.com>
---
 c_glib/test/test-execute-plan.rb                |   6 +-
 cpp/src/arrow/acero/aggregate_node.cc           |  59 ++-
 cpp/src/arrow/acero/groupby_test.cc             |  28 +-
 cpp/src/arrow/acero/hash_aggregate_test.cc      | 574 ++++++++++++------------
 cpp/src/arrow/acero/plan_test.cc                |  47 +-
 cpp/src/arrow/dataset/scanner_test.cc           |  10 +-
 cpp/src/arrow/engine/substrait/function_test.cc |   8 +-
 cpp/src/arrow/engine/substrait/serde_test.cc    |  18 +-
 python/pyarrow/table.pxi                        |  24 +-
 python/pyarrow/tests/test_acero.py              |   4 +-
 r/R/dplyr-collect.R                             |   4 +-
 r/R/query-engine.R                              |  19 +-
 r/tests/testthat/test-dataset-dplyr.R           |   1 -
 r/tests/testthat/test-dplyr-query.R             |   1 -
 r/tests/testthat/test-dplyr-summarize.R         |   6 +-
 ruby/red-arrow/test/test-group.rb               | 116 ++---
 16 files changed, 462 insertions(+), 463 deletions(-)

diff --git a/c_glib/test/test-execute-plan.rb b/c_glib/test/test-execute-plan.rb
index aeb9f90587..da1433db55 100644
--- a/c_glib/test/test-execute-plan.rb
+++ b/c_glib/test/test-execute-plan.rb
@@ -57,9 +57,9 @@ class TestExecutePlan < Test::Unit::TestCase
         Arrow::AggregateNodeOptions.new(aggregations, ["string"])
       end
       execute(plan) do
-        assert_equal(build_table("sum(number)" => build_int64_array([9, 6]),
-                                 "count(number)" => build_int64_array([3, 2]),
-                                 "string" => build_string_array(["a", "b"])),
+        assert_equal(build_table("string" => build_string_array(["a", "b"]),
+                                 "sum(number)" => build_int64_array([9, 6]),
+                                 "count(number)" => build_int64_array([3, 2])),
                      reader.read_all)
       end
     end
diff --git a/cpp/src/arrow/acero/aggregate_node.cc b/cpp/src/arrow/acero/aggregate_node.cc
index 6669d30bcc..bd97235df6 100644
--- a/cpp/src/arrow/acero/aggregate_node.cc
+++ b/cpp/src/arrow/acero/aggregate_node.cc
@@ -314,8 +314,15 @@ class ScalarAggregateNode : public ExecNode, public TracedNode {
     std::vector<const ScalarAggregateKernel*> kernels(aggregates.size());
     std::vector<std::vector<std::unique_ptr<KernelState>>> states(kernels.size());
     FieldVector fields(kernels.size() + segment_keys.size());
-    std::vector<std::vector<int>> target_fieldsets(kernels.size());
 
+    // Output the segment keys first, followed by the aggregates
+    for (size_t i = 0; i < segment_keys.size(); ++i) {
+      ARROW_ASSIGN_OR_RAISE(fields[i],
+                            segment_keys[i].GetOne(*inputs[0]->output_schema()));
+    }
+
+    std::vector<std::vector<int>> target_fieldsets(kernels.size());
+    std::size_t base = segment_keys.size();
     for (size_t i = 0; i < kernels.size(); ++i) {
       const auto& target_fieldset = aggregate_options.aggregates[i].target;
       for (const auto& target : target_fieldset) {
@@ -366,11 +373,8 @@ class ScalarAggregateNode : public ExecNode, public TracedNode {
       ARROW_ASSIGN_OR_RAISE(auto out_type, kernels[i]->signature->out_type().Resolve(
                                                &kernel_ctx, kernel_intypes[i]));
 
-      fields[i] = field(aggregate_options.aggregates[i].name, out_type.GetSharedPtr());
-    }
-    for (size_t i = 0; i < segment_keys.size(); ++i) {
-      ARROW_ASSIGN_OR_RAISE(fields[kernels.size() + i],
-                            segment_keys[i].GetOne(*inputs[0]->output_schema()));
+      fields[base + i] =
+          field(aggregate_options.aggregates[i].name, out_type.GetSharedPtr());
     }
 
     return plan->EmplaceNode<ScalarAggregateNode>(
@@ -485,6 +489,11 @@ class ScalarAggregateNode : public ExecNode, public TracedNode {
     ExecBatch batch{{}, 1};
     batch.values.resize(kernels_.size() + segment_field_ids_.size());
 
+    // First, insert segment keys
+    PlaceFields(batch, /*base=*/0, segmenter_values_);
+
+    // Followed by aggregate values
+    std::size_t base = segment_field_ids_.size();
     for (size_t i = 0; i < kernels_.size(); ++i) {
       arrow::util::tracing::Span span;
       START_COMPUTE_SPAN(span, aggs_[i].function,
@@ -495,9 +504,8 @@ class ScalarAggregateNode : public ExecNode, public TracedNode {
       KernelContext ctx{plan()->query_context()->exec_context()};
       ARROW_ASSIGN_OR_RAISE(auto merged, ScalarAggregateKernel::MergeAll(
                                              kernels_[i], &ctx, std::move(states_[i])));
-      RETURN_NOT_OK(kernels_[i]->finalize(&ctx, &batch.values[i]));
+      RETURN_NOT_OK(kernels_[i]->finalize(&ctx, &batch.values[base + i]));
     }
-    PlaceFields(batch, kernels_.size(), segmenter_values_);
 
     ARROW_RETURN_NOT_OK(output_->InputReceived(this, std::move(batch)));
     total_output_batches_++;
@@ -643,21 +651,23 @@ class GroupByNode : public ExecNode, public TracedNode {
     // Build field vector for output schema
     FieldVector output_fields{keys.size() + segment_keys.size() + aggs.size()};
 
-    // Aggregate fields come before key fields to match the behavior of GroupBy function
-    for (size_t i = 0; i < aggs.size(); ++i) {
-      output_fields[i] =
-          agg_result_fields[i]->WithName(aggregate_options.aggregates[i].name);
-    }
-    size_t base = aggs.size();
+    // First output is keys, followed by segment_keys, followed by aggregates themselves
+    // This matches the behavior described by Substrait and also tends to be the behavior
+    // in SQL engines
     for (size_t i = 0; i < keys.size(); ++i) {
       int key_field_id = key_field_ids[i];
-      output_fields[base + i] = input_schema->field(key_field_id);
+      output_fields[i] = input_schema->field(key_field_id);
     }
-    base += keys.size();
+    size_t base = keys.size();
     for (size_t i = 0; i < segment_keys.size(); ++i) {
       int segment_key_field_id = segment_key_field_ids[i];
       output_fields[base + i] = input_schema->field(segment_key_field_id);
     }
+    base += segment_keys.size();
+    for (size_t i = 0; i < aggs.size(); ++i) {
+      output_fields[base + i] =
+          agg_result_fields[i]->WithName(aggregate_options.aggregates[i].name);
+    }
 
     return input->plan()->EmplaceNode<GroupByNode>(
         input, schema(std::move(output_fields)), std::move(key_field_ids),
@@ -766,11 +776,18 @@ class GroupByNode : public ExecNode, public TracedNode {
     // If we never got any batches, then state won't have been initialized
     RETURN_NOT_OK(InitLocalStateIfNeeded(state));
 
+    // Allocate a batch for output
     ExecBatch out_data{{}, state->grouper->num_groups()};
     out_data.values.resize(agg_kernels_.size() + key_field_ids_.size() +
                            segment_key_field_ids_.size());
 
-    // Aggregate fields come before key fields to match the behavior of GroupBy function
+    // Keys come first
+    ARROW_ASSIGN_OR_RAISE(ExecBatch out_keys, state->grouper->GetUniques());
+    std::move(out_keys.values.begin(), out_keys.values.end(), out_data.values.begin());
+    // Followed by segment keys
+    PlaceFields(out_data, key_field_ids_.size(), segmenter_values_);
+    // And finally, the aggregates themselves
+    std::size_t base = segment_key_field_ids_.size() + key_field_ids_.size();
     for (size_t i = 0; i < agg_kernels_.size(); ++i) {
       arrow::util::tracing::Span span;
       START_COMPUTE_SPAN(span, aggs_[i].function,
@@ -780,15 +797,11 @@ class GroupByNode : public ExecNode, public TracedNode {
                           {"function.kind", std::string(kind_name()) + "::Finalize"}});
       KernelContext batch_ctx{plan_->query_context()->exec_context()};
       batch_ctx.SetState(state->agg_states[i].get());
-      RETURN_NOT_OK(agg_kernels_[i]->finalize(&batch_ctx, &out_data.values[i]));
+      RETURN_NOT_OK(agg_kernels_[i]->finalize(&batch_ctx, &out_data.values[i + base]));
       state->agg_states[i].reset();
     }
-
-    ARROW_ASSIGN_OR_RAISE(ExecBatch out_keys, state->grouper->GetUniques());
-    std::move(out_keys.values.begin(), out_keys.values.end(),
-              out_data.values.begin() + agg_kernels_.size());
-    PlaceFields(out_data, agg_kernels_.size() + key_field_ids_.size(), segmenter_values_);
     state->grouper.reset();
+
     return out_data;
   }
 
diff --git a/cpp/src/arrow/acero/groupby_test.cc b/cpp/src/arrow/acero/groupby_test.cc
index 1284dbae2b..5710ad2598 100644
--- a/cpp/src/arrow/acero/groupby_test.cc
+++ b/cpp/src/arrow/acero/groupby_test.cc
@@ -39,13 +39,15 @@ TEST(GroupByConvenienceFunc, Basic) {
   ])"});
 
   // One key, two aggregates, same values array
-  std::shared_ptr<Table> expected =
-      TableFromJSON(schema({field("value_sum", int64()), field("value_count", int64()),
-                            field("key1", utf8())}),
-                    {R"([
-        [1, 1, "x"],
-        [5, 2, "y"],
-        [9, 2, "z"]
+  std::shared_ptr<Table> expected = TableFromJSON(schema({
+                                                      field("key1", utf8()),
+                                                      field("value_sum", int64()),
+                                                      field("value_count", int64()),
+                                                  }),
+                                                  {R"([
+        ["x", 1, 1],
+        ["y", 5, 2],
+        ["z", 9, 2]
     ])"});
   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Table> actual,
                        TableGroupBy(in_table,
@@ -55,14 +57,14 @@ TEST(GroupByConvenienceFunc, Basic) {
   AssertTablesEqual(*expected, *actual);
 
   // Two keys, one aggregate
-  expected = TableFromJSON(schema({field("value_sum", int64()), field("key1", utf8()),
-                                   field("key2", int32())}),
+  expected = TableFromJSON(schema({field("key1", utf8()), field("key2", int32()),
+                                   field("value_sum", int64())}),
                            {
                                R"([
-        [1, "x", 1],
-        [2, "y", 1],
-        [3, "y", 2],
-        [9, "z", 2]
+        ["x", 1, 1],
+        ["y", 1, 2],
+        ["y", 2, 3],
+        ["z", 2, 9]
       ])"});
 
   ASSERT_OK_AND_ASSIGN(actual,
diff --git a/cpp/src/arrow/acero/hash_aggregate_test.cc b/cpp/src/arrow/acero/hash_aggregate_test.cc
index 68059dacbf..0ae06d0572 100644
--- a/cpp/src/arrow/acero/hash_aggregate_test.cc
+++ b/cpp/src/arrow/acero/hash_aggregate_test.cc
@@ -107,6 +107,17 @@ Result<Datum> NaiveGroupBy(std::vector<Datum> arguments, std::vector<Datum> keys
   ArrayVector out_columns;
   std::vector<std::string> out_names;
 
+  int key_idx = 0;
+  ARROW_ASSIGN_OR_RAISE(auto uniques, grouper->GetUniques());
+  std::vector<SortKey> sort_keys;
+  std::vector<std::shared_ptr<Field>> sort_table_fields;
+  for (const Datum& key : uniques.values) {
+    out_columns.push_back(key.make_array());
+    sort_keys.emplace_back(FieldRef(key_idx));
+    sort_table_fields.push_back(field("key_" + ToChars(key_idx), key.type()));
+    out_names.push_back("key_" + ToChars(key_idx++));
+  }
+
   for (size_t i = 0; i < arguments.size(); ++i) {
     out_names.push_back(aggregates[i].function);
 
@@ -132,17 +143,6 @@ Result<Datum> NaiveGroupBy(std::vector<Datum> arguments, std::vector<Datum> keys
     out_columns.push_back(aggregated_column.make_array());
   }
 
-  int i = 0;
-  ARROW_ASSIGN_OR_RAISE(auto uniques, grouper->GetUniques());
-  std::vector<SortKey> sort_keys;
-  std::vector<std::shared_ptr<Field>> sort_table_fields;
-  for (const Datum& key : uniques.values) {
-    out_columns.push_back(key.make_array());
-    sort_keys.emplace_back(FieldRef(i));
-    sort_table_fields.push_back(field("key_" + ToChars(i), key.type()));
-    out_names.push_back("key_" + ToChars(i++));
-  }
-
   // Return a struct array sorted by the keys
   SortOptions sort_options(std::move(sort_keys));
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<RecordBatch> sort_batch,
@@ -179,7 +179,7 @@ Result<Datum> MakeGroupByOutput(const std::vector<ExecBatch>& output_batches,
       StructArray::Make(std::move(out_arrays), output_schema->fields()));
 
   bool need_sort = !naive;
-  for (size_t i = num_aggregates; need_sort && i < out_arrays.size(); i++) {
+  for (size_t i = 0; need_sort && i < num_keys; i++) {
     if (output_schema->field(static_cast<int>(i))->type()->id() == Type::DICTIONARY) {
       need_sort = false;
     }
@@ -196,7 +196,7 @@ Result<Datum> MakeGroupByOutput(const std::vector<ExecBatch>& output_batches,
   std::vector<std::shared_ptr<Array>> key_columns;
   std::vector<SortKey> sort_keys;
   for (std::size_t i = 0; i < num_keys; i++) {
-    const std::shared_ptr<Array>& arr = out_arrays[i + num_aggregates];
+    const std::shared_ptr<Array>& arr = out_arrays[i];
     key_columns.push_back(arr);
     key_fields.push_back(field("name_does_not_matter", arr->type()));
     sort_keys.emplace_back(static_cast<int>(i));
@@ -206,7 +206,6 @@ Result<Datum> MakeGroupByOutput(const std::vector<ExecBatch>& output_batches,
   SortOptions sort_options(std::move(sort_keys));
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> sort_indices,
                         SortIndices(key_table, sort_options));
-
   return Take(struct_arr, sort_indices);
 }
 
@@ -1254,8 +1253,8 @@ TEST_P(GroupBy, NoBatches) {
                   },
                   /*use_threads=*/true));
   AssertDatumsEqual(ArrayFromJSON(struct_({
-                                      field("hash_count", int64()),
                                       field("key_0", int64()),
+                                      field("hash_count", int64()),
                                   }),
                                   R"([])"),
                     aggregated_and_grouped, /*verbose=*/true);
@@ -1319,18 +1318,16 @@ TEST_P(GroupBy, CountOnly) {
                     use_threads));
     SortBy({"key_0"}, &aggregated_and_grouped);
 
-    AssertDatumsEqual(ArrayFromJSON(struct_({
-                                        field("hash_count", int64()),
-                                        field("key_0", int64()),
-                                    }),
-                                    R"([
-    [2,   1],
-    [3,   2],
-    [0,   3],
-    [2,   null]
+    AssertDatumsEqual(
+        ArrayFromJSON(struct_({field("key_0", int64()), field("hash_count", int64())}),
+                      R"([
+    [1, 2],
+    [2, 3],
+    [3, 0],
+    [null, 2]
   ])"),
-                      aggregated_and_grouped,
-                      /*verbose=*/true);
+        aggregated_and_grouped,
+        /*verbose=*/true);
   }
 }
 
@@ -1360,15 +1357,15 @@ TEST_P(GroupBy, CountScalar) {
                                  use_threads));
 
     Datum expected = ArrayFromJSON(struct_({
+                                       field("key", int64()),
                                        field("hash_count", int64()),
                                        field("hash_count", int64()),
                                        field("hash_count", int64()),
-                                       field("key", int64()),
                                    }),
                                    R"([
-      [3, 2, 5, 1],
-      [2, 1, 3, 2],
-      [2, 1, 3, 3]
+      [1, 3, 2, 5],
+      [2, 2, 1, 3],
+      [3, 2, 1, 3]
     ])");
     AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
   }
@@ -1406,14 +1403,14 @@ TEST_P(GroupBy, SumOnly) {
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsEqual(ArrayFromJSON(struct_({
-                                        field("hash_sum", float64()),
                                         field("key_0", int64()),
+                                        field("hash_sum", float64()),
                                     }),
                                     R"([
-    [4.25,   1],
-    [-0.125, 2],
-    [null,   3],
-    [4.75,   null]
+    [1, 4.25],
+    [2, -0.125],
+    [3, null],
+    [null, 4.75]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -1474,20 +1471,20 @@ TEST_P(GroupBy, SumMeanProductDecimal) {
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("key_0", int64()),
                                         field("hash_sum", decimal128(3, 2)),
                                         field("hash_sum", decimal256(3, 2)),
                                         field("hash_mean", decimal128(3, 2)),
                                         field("hash_mean", decimal256(3, 2)),
                                         field("hash_product", decimal128(3, 2)),
                                         field("hash_product", decimal256(3, 2)),
-                                        field("key_0", int64()),
                                     }),
                                     R"([
-    ["4.25",  "4.25",  "2.13",  "2.13",  "3.25", "3.25", 1],
-    ["-0.13", "-0.13", "-0.04", "-0.04", "0.00", "0.00", 2],
-    [null,    null,    null,    null,    null,   null,   3],
-    ["4.05",  "4.05",  "1.01",  "1.01",  "1.05", "1.05", 4],
-    ["4.75",  "4.75",  "2.38",  "2.38",  "3.00", "3.00", null]
+    [1, "4.25",  "4.25",  "2.13",  "2.13",  "3.25", "3.25"],
+    [2, "-0.13", "-0.13", "-0.04", "-0.04", "0.00", "0.00"],
+    [3, null,    null,    null,    null,    null,   null],
+    [4, "4.05",  "4.05",  "1.01",  "1.01",  "1.05", "1.05"],
+    [null, "4.75",  "4.75",  "2.38",  "2.38",  "3.00", "3.00"]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -1530,15 +1527,15 @@ TEST_P(GroupBy, MeanOnly) {
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                              field("key_0", int64()),
                                               field("hash_mean", float64()),
                                               field("hash_mean", float64()),
-                                              field("key_0", int64()),
                                           }),
                                           R"([
-    [2.125,                 null,                  1],
-    [-0.041666666666666664, -0.041666666666666664, 2],
-    [null,                  null,                  3],
-    [2.375,                 null,                  null]
+    [1,    2.125,                 null                 ],
+    [2,    -0.041666666666666664, -0.041666666666666664],
+    [3,    null,                  null                 ],
+    [null, 2.375,                 null                 ]
   ])"),
                             aggregated_and_grouped,
                             /*verbose=*/true);
@@ -1569,15 +1566,15 @@ TEST_P(GroupBy, SumMeanProductScalar) {
                    },
                    use_threads));
     Datum expected = ArrayFromJSON(struct_({
+                                       field("key", int64()),
                                        field("hash_sum", int64()),
                                        field("hash_mean", float64()),
                                        field("hash_product", int64()),
-                                       field("key", int64()),
                                    }),
                                    R"([
-      [4, 1.333333, 2, 1],
-      [4, 2,        3, 2],
-      [5, 2.5,      4, 3]
+      [1, 4, 1.333333, 2],
+      [2, 4, 2,        3],
+      [3, 5, 2.5,      4]
     ])");
     AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
   }
@@ -1615,15 +1612,15 @@ TEST_P(GroupBy, VarianceAndStddev) {
                            false));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                            field("key_0", int64()),
                                             field("hash_variance", float64()),
                                             field("hash_stddev", float64()),
-                                            field("key_0", int64()),
                                         }),
                                         R"([
-    [1.0,                 1.0,                1],
-    [0.22222222222222224, 0.4714045207910317, 2],
-    [null,                null,               3],
-    [2.25,                1.5,                null]
+    [1,    1.0,                 1.0               ],
+    [2,    0.22222222222222224, 0.4714045207910317],
+    [3,    null,                null              ],
+    [null, 2.25,                1.5               ]
   ])"),
                           aggregated_and_grouped,
                           /*verbose=*/true);
@@ -1658,15 +1655,15 @@ TEST_P(GroupBy, VarianceAndStddev) {
                                                    false));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                            field("key_0", int64()),
                                             field("hash_variance", float64()),
                                             field("hash_stddev", float64()),
-                                            field("key_0", int64()),
                                         }),
                                         R"([
-    [1.0,                 1.0,                1],
-    [0.22222222222222224, 0.4714045207910317, 2],
-    [null,                null,               3],
-    [2.25,                1.5,                null]
+    [1,    1.0,                 1.0               ],
+    [2,    0.22222222222222224, 0.4714045207910317],
+    [3,    null,                null              ],
+    [null, 2.25,                1.5               ]
   ])"),
                           aggregated_and_grouped,
                           /*verbose=*/true);
@@ -1690,15 +1687,15 @@ TEST_P(GroupBy, VarianceAndStddev) {
                            false));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                            field("key_0", int64()),
                                             field("hash_variance", float64()),
                                             field("hash_stddev", float64()),
-                                            field("key_0", int64()),
                                         }),
                                         R"([
-    [null,                null,               1],
-    [0.6666666666666667,  0.816496580927726,  2],
-    [null,                null,               3],
-    [null,                null,               null]
+    [1,    null,                null             ],
+    [2,    0.6666666666666667,  0.816496580927726],
+    [3,    null,                null             ],
+    [null, null,                null             ]
   ])"),
                           aggregated_and_grouped,
                           /*verbose=*/true);
@@ -1740,16 +1737,16 @@ TEST_P(GroupBy, VarianceAndStddevDecimal) {
                            false));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                            field("key_0", int64()),
                                             field("hash_variance", float64()),
                                             field("hash_stddev", float64()),
                                             field("hash_variance", float64()),
                                             field("hash_stddev", float64()),
-                                            field("key_0", int64()),
                                         }),
                                         R"([
-    [1.0,                 1.0,                1.0,                 1.0,                1],
-    [0.22222222222222224, 0.4714045207910317, 0.22222222222222224, 0.4714045207910317, 2],
-    [2.25,                1.5,                2.25,                1.5,                null]
+    [1,    1.0,                 1.0,                1.0,                 1.0               ],
+    [2,    0.22222222222222224, 0.4714045207910317, 0.22222222222222224, 0.4714045207910317],
+    [null, 2.25,                1.5,                2.25,                1.5               ]
   ])"),
                           aggregated_and_grouped,
                           /*verbose=*/true);
@@ -1813,20 +1810,20 @@ TEST_P(GroupBy, TDigest) {
 
   AssertDatumsApproxEqual(
       ArrayFromJSON(struct_({
+                        field("key_0", int64()),
                         field("hash_tdigest", fixed_size_list(float64(), 1)),
                         field("hash_tdigest", fixed_size_list(float64(), 3)),
                         field("hash_tdigest", fixed_size_list(float64(), 3)),
                         field("hash_tdigest", fixed_size_list(float64(), 1)),
                         field("hash_tdigest", fixed_size_list(float64(), 1)),
                         field("hash_tdigest", fixed_size_list(float64(), 1)),
-                        field("key_0", int64()),
                     }),
                     R"([
-    [[1.0],  [1.0, 3.0, 3.0],    [1.0, 3.0, 3.0],    [null], [null], [null], 1],
-    [[0.0],  [0.0, 0.0, 0.0],    [0.0, 0.0, 0.0],    [0.0],  [0.0],  [0.0],  2],
-    [[null], [null, null, null], [null, null, null], [null], [null], [null], 3],
-    [[1.0],  [1.0, 1.0, 1.0],    [1.0, 1.0, 1.0],    [null], [1.0],  [null], 4],
-    [[1.0],  [1.0, 4.0, 4.0],    [1.0, 4.0, 4.0],    [1.0],  [null], [null], null]
+    [1,    [1.0],  [1.0, 3.0, 3.0],    [1.0, 3.0, 3.0],    [null], [null], [null]],
+    [2,    [0.0],  [0.0, 0.0, 0.0],    [0.0, 0.0, 0.0],    [0.0],  [0.0],  [0.0] ],
+    [3,    [null], [null, null, null], [null, null, null], [null], [null], [null]],
+    [4,    [1.0],  [1.0, 1.0, 1.0],    [1.0, 1.0, 1.0],    [null], [1.0],  [null]],
+    [null, [1.0],  [1.0, 4.0, 4.0],    [1.0, 4.0, 4.0],    [1.0],  [null], [null]]
   ])"),
       aggregated_and_grouped,
       /*verbose=*/true);
@@ -1862,14 +1859,14 @@ TEST_P(GroupBy, TDigestDecimal) {
 
   AssertDatumsApproxEqual(
       ArrayFromJSON(struct_({
+                        field("key_0", int64()),
                         field("hash_tdigest", fixed_size_list(float64(), 1)),
                         field("hash_tdigest", fixed_size_list(float64(), 1)),
-                        field("key_0", int64()),
                     }),
                     R"([
-    [[1.01], [1.01], 1],
-    [[0.0],  [0.0],  2],
-    [[1.85], [1.85], null]
+    [1,    [1.01], [1.01]],
+    [2,    [0.0],  [0.0] ],
+    [null, [1.85], [1.85]]
   ])"),
       aggregated_and_grouped,
       /*verbose=*/true);
@@ -1923,18 +1920,18 @@ TEST_P(GroupBy, ApproximateMedian) {
                              false));
 
     AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                              field("key_0", int64()),
                                               field("hash_approximate_median", float64()),
                                               field("hash_approximate_median", float64()),
                                               field("hash_approximate_median", float64()),
                                               field("hash_approximate_median", float64()),
-                                              field("key_0", int64()),
                                           }),
                                           R"([
-    [1.0,  null, null, null, 1],
-    [0.0,  0.0,  0.0,  0.0,  2],
-    [null, null, null, null, 3],
-    [1.0,  null, 1.0,  null, 4],
-    [1.0,  1.0,  null, null, null]
+    [1,    1.0,  null, null, null],
+    [2,    0.0,  0.0,  0.0,  0.0 ],
+    [3,    null, null, null, null],
+    [4,    1.0,  null, 1.0,  null],
+    [null, 1.0,  1.0,  null, null]
   ])"),
                             aggregated_and_grouped,
                             /*verbose=*/true);
@@ -1973,18 +1970,18 @@ TEST_P(GroupBy, StddevVarianceTDigestScalar) {
                    use_threads));
     Datum expected =
         ArrayFromJSON(struct_({
+                          field("key", int64()),
                           field("hash_stddev", float64()),
                           field("hash_variance", float64()),
                           field("hash_tdigest", fixed_size_list(float64(), 1)),
                           field("hash_stddev", float64()),
                           field("hash_variance", float64()),
                           field("hash_tdigest", fixed_size_list(float64(), 1)),
-                          field("key", int64()),
                       }),
                       R"([
-         [0.4714045, 0.222222, [1.0], 0.4714045, 0.222222, [1.0], 1],
-         [1.0,       1.0,      [1.0], 1.0,       1.0,      [1.0], 2],
-         [1.5,       2.25,     [1.0], 1.5,       2.25,     [1.0], 3]
+         [1, 0.4714045, 0.222222, [1.0], 0.4714045, 0.222222, [1.0]],
+         [2, 1.0,       1.0,      [1.0], 1.0,       1.0,      [1.0]],
+         [3, 1.5,       2.25,     [1.0], 1.5,       2.25,     [1.0]]
        ])");
     AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
   }
@@ -2034,19 +2031,19 @@ TEST_P(GroupBy, VarianceOptions) {
             },
             use_threads));
     Datum expected = ArrayFromJSON(struct_({
+                                       field("key", int64()),
                                        field("hash_stddev", float64()),
                                        field("hash_stddev", float64()),
                                        field("hash_stddev", float64()),
                                        field("hash_variance", float64()),
                                        field("hash_variance", float64()),
                                        field("hash_variance", float64()),
-                                       field("key", int64()),
                                    }),
                                    R"([
-         [null,    0.471405, null,    null,   0.222222, null,   1],
-         [1.29904, 1.29904,  1.29904, 1.6875, 1.6875,   1.6875, 2],
-         [0.0,     null,     null,    0.0,    null,     null,   3],
-         [null,    0.471405, null,    null,   0.222222, null,   4]
+         [1, null,    0.471405, null,    null,   0.222222, null  ],
+         [2, 1.29904, 1.29904,  1.29904, 1.6875, 1.6875,   1.6875],
+         [3, 0.0,     null,     null,    0.0,    null,     null  ],
+         [4, null,    0.471405, null,    null,   0.222222, null  ]
        ])");
     ValidateOutput(expected);
     AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
@@ -2065,19 +2062,19 @@ TEST_P(GroupBy, VarianceOptions) {
             },
             use_threads));
     expected = ArrayFromJSON(struct_({
+                                 field("key", int64()),
                                  field("hash_stddev", float64()),
                                  field("hash_stddev", float64()),
                                  field("hash_stddev", float64()),
                                  field("hash_variance", float64()),
                                  field("hash_variance", float64()),
                                  field("hash_variance", float64()),
-                                 field("key", int64()),
                              }),
                              R"([
-         [null,    0.471405, null,    null,   0.222222, null,   1],
-         [1.29904, 1.29904,  1.29904, 1.6875, 1.6875,   1.6875, 2],
-         [0.0,     null,     null,    0.0,    null,     null,   3],
-         [null,    0.471405, null,    null,   0.222222, null,   4]
+         [1, null,    0.471405, null,    null,   0.222222, null  ],
+         [2, 1.29904, 1.29904,  1.29904, 1.6875, 1.6875,   1.6875],
+         [3, 0.0,     null,     null,    0.0,    null,     null  ],
+         [4, null,    0.471405, null,    null,   0.222222, null  ]
        ])");
     ValidateOutput(expected);
     AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
@@ -2129,6 +2126,7 @@ TEST_P(GroupBy, MinMaxOnly) {
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("key_0", int64()),
                                         field("hash_min_max", struct_({
                                                                   field("min", float64()),
                                                                   field("max", float64()),
@@ -2141,13 +2139,12 @@ TEST_P(GroupBy, MinMaxOnly) {
                                                                   field("min", boolean()),
                                                                   field("max", boolean()),
                                                               })),
-                                        field("key_0", int64()),
                                     }),
                                     R"([
-    [{"min": 1.0,   "max": 3.25},  {"min": null, "max": null}, {"min": true, "max": true},   1],
-    [{"min": -0.25, "max": 0.125}, {"min": null, "max": null}, {"min": false, "max": false}, 2],
-    [{"min": null,  "max": null},  {"min": null, "max": null}, {"min": false, "max": true},  3],
-    [{"min": 0.75,  "max": 4.0},   {"min": null, "max": null}, {"min": true, "max": true},   null]
+    [1, {"min": 1.0,   "max": 3.25},  {"min": null, "max": null}, {"min": true, "max": true}   ],
+    [2, {"min": -0.25, "max": 0.125}, {"min": null, "max": null}, {"min": false, "max": false} ],
+    [3, {"min": null,  "max": null},  {"min": null, "max": null}, {"min": false, "max": true}  ],
+    [null, {"min": 0.75,  "max": 4.0},   {"min": null, "max": null}, {"min": true, "max": true}]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -2200,20 +2197,20 @@ TEST_P(GroupBy, MinMaxTypes) {
 
   const std::string default_expected =
       R"([
-    [{"min": 1, "max": 3},       1],
-    [{"min": 0, "max": 0},       2],
-    [{"min": null, "max": null}, 3],
-    [{"min": 3, "max": 5},       4],
-    [{"min": 1, "max": 4},       null]
+    [1,    {"min": 1, "max": 3}      ],
+    [2,    {"min": 0, "max": 0}      ],
+    [3,    {"min": null, "max": null}],
+    [4,    {"min": 3, "max": 5}      ],
+    [null, {"min": 1, "max": 4}   ]
     ])";
 
   const std::string date64_expected =
       R"([
-    [{"min": 86400000, "max": 259200000},       1],
-    [{"min": 0, "max": 0},       2],
-    [{"min": null, "max": null}, 3],
-    [{"min": 259200000, "max": 432000000},       4],
-    [{"min": 86400000, "max": 345600000},       null]
+    [1,    {"min": 86400000, "max": 259200000} ],
+    [2,    {"min": 0, "max": 0}                ],
+    [3,    {"min": null, "max": null}          ],
+    [4,    {"min": 259200000, "max": 432000000}],
+    [null, {"min": 86400000, "max": 345600000} ]
     ])";
 
   for (const auto& ty : types) {
@@ -2233,8 +2230,8 @@ TEST_P(GroupBy, MinMaxTypes) {
     AssertDatumsEqual(
         ArrayFromJSON(
             struct_({
-                field("hash_min_max", struct_({field("min", ty), field("max", ty)})),
                 field("key_0", int64()),
+                field("hash_min_max", struct_({field("min", ty), field("max", ty)})),
             }),
             (ty->name() == "date64") ? date64_expected : default_expected),
         aggregated_and_grouped,
@@ -2287,6 +2284,7 @@ TEST_P(GroupBy, MinMaxDecimal) {
 
     AssertDatumsEqual(
         ArrayFromJSON(struct_({
+                          field("key_0", int64()),
                           field("hash_min_max", struct_({
                                                     field("min", decimal128(3, 2)),
                                                     field("max", decimal128(3, 2)),
@@ -2295,14 +2293,13 @@ TEST_P(GroupBy, MinMaxDecimal) {
                                                     field("min", decimal256(3, 2)),
                                                     field("max", decimal256(3, 2)),
                                                 })),
-                          field("key_0", int64()),
                       }),
                       R"([
-    [{"min": "1.01", "max": "3.25"},   {"min": "1.01", "max": "3.25"},   1],
-    [{"min": "-0.25", "max": "0.12"},  {"min": "-0.25", "max": "0.12"},  2],
-    [{"min": null, "max": null},       {"min": null, "max": null},       3],
-    [{"min": "-5.25", "max": "-3.25"}, {"min": "-5.25", "max": "-3.25"}, 4],
-    [{"min": "0.75", "max": "4.01"},   {"min": "0.75", "max": "4.01"},   null]
+    [1,    {"min": "1.01", "max": "3.25"},   {"min": "1.01", "max": "3.25"}  ],
+    [2,    {"min": "-0.25", "max": "0.12"},  {"min": "-0.25", "max": "0.12"} ],
+    [3,    {"min": null, "max": null},       {"min": null, "max": null}      ],
+    [4,    {"min": "-5.25", "max": "-3.25"}, {"min": "-5.25", "max": "-3.25"}],
+    [null, {"min": "0.75", "max": "4.01"},   {"min": "0.75", "max": "4.01"}  ]
   ])"),
         aggregated_and_grouped,
         /*verbose=*/true);
@@ -2345,14 +2342,14 @@ TEST_P(GroupBy, MinMaxBinary) {
       AssertDatumsEqual(
           ArrayFromJSON(
               struct_({
-                  field("hash_min_max", struct_({field("min", ty), field("max", ty)})),
                   field("key_0", int64()),
+                  field("hash_min_max", struct_({field("min", ty), field("max", ty)})),
               }),
               R"([
-    [{"min": "aaaa", "max": "d"},    1],
-    [{"min": "babcd", "max": "bcd"}, 2],
-    [{"min": null, "max": null},     3],
-    [{"min": "123", "max": "2"},     null]
+    [1,    {"min": "aaaa", "max": "d"}   ],
+    [2,    {"min": "babcd", "max": "bcd"}],
+    [3,    {"min": null, "max": null}    ],
+    [null, {"min": "123", "max": "2"}    ]
   ])"),
           aggregated_and_grouped,
           /*verbose=*/true);
@@ -2396,14 +2393,14 @@ TEST_P(GroupBy, MinMaxFixedSizeBinary) {
     AssertDatumsEqual(
         ArrayFromJSON(
             struct_({
-                field("hash_min_max", struct_({field("min", ty), field("max", ty)})),
                 field("key_0", int64()),
+                field("hash_min_max", struct_({field("min", ty), field("max", ty)})),
             }),
             R"([
-    [{"min": "aaa", "max": "ddd"}, 1],
-    [{"min": "bab", "max": "bcd"}, 2],
-    [{"min": null, "max": null},   3],
-    [{"min": "123", "max": "234"}, null]
+    [1,    {"min": "aaa", "max": "ddd"}],
+    [2,    {"min": "bab", "max": "bcd"}],
+    [3,    {"min": null, "max": null}  ],
+    [null, {"min": "123", "max": "234"}]
   ])"),
         aggregated_and_grouped,
         /*verbose=*/true);
@@ -2448,16 +2445,16 @@ TEST_P(GroupBy, MinOrMax) {
   SortBy({"key_0"}, &aggregated_and_grouped);
 
   AssertDatumsEqual(ArrayFromJSON(struct_({
+                                      field("key_0", int64()),
                                       field("hash_min", float64()),
                                       field("hash_max", float64()),
-                                      field("key_0", int64()),
                                   }),
                                   R"([
-    [1.0,   3.25,  1],
-    [-0.25, 0.125, 2],
-    [null,  null,  3],
-    [-Inf,  Inf,   4],
-    [0.75,  4.0,   null]
+    [1,    1.0,   3.25 ],
+    [2,    -0.25, 0.125],
+    [3,    null,  null ],
+    [4,    -Inf,  Inf  ],
+    [null, 0.75,  4.0  ]
   ])"),
                     aggregated_and_grouped,
                     /*verbose=*/true);
@@ -2483,14 +2480,14 @@ TEST_P(GroupBy, MinMaxScalar) {
                    use_threads));
     Datum expected =
         ArrayFromJSON(struct_({
+                          field("key", int64()),
                           field("hash_min_max",
                                 struct_({field("min", int32()), field("max", int32())})),
-                          field("key", int64()),
                       }),
                       R"([
-      [{"min": -1, "max": 2}, 1],
-      [{"min": -1, "max": 3}, 2],
-      [{"min": -1, "max": 4}, 3]
+      [1, {"min": -1, "max": 2}],
+      [2, {"min": -1, "max": 3}],
+      [3, {"min": -1, "max": 4}]
     ])");
     AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
   }
@@ -2562,6 +2559,7 @@ TEST_P(GroupBy, AnyAndAll) {
     // Group 5: trues
     // Group null: falses
     AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("key_0", int64()),
                                         field("hash_any", boolean()),
                                         field("hash_any", boolean()),
                                         field("hash_any", boolean()),
@@ -2570,15 +2568,14 @@ TEST_P(GroupBy, AnyAndAll) {
                                         field("hash_all", boolean()),
                                         field("hash_all", boolean()),
                                         field("hash_all", boolean()),
-                                        field("key_0", int64()),
                                     }),
                                     R"([
-    [true,  null, true,  null, true,  null,  null,  null,  1],
-    [true,  true, true,  true, false, false, false, false, 2],
-    [false, null, null,  null, true,  null,  null,  null,  3],
-    [false, null, null,  null, false, null,  false, null,  4],
-    [true,  null, true,  null, true,  null,  true,  null,  5],
-    [false, null, false, null, false, null,  false, null,  null]
+    [1,    true,  null, true,  null, true,  null,  null,  null ],
+    [2,    true,  true, true,  true, false, false, false, false],
+    [3,    false, null, null,  null, true,  null,  null,  null ],
+    [4,    false, null, null,  null, false, null,  false, null ],
+    [5,    true,  null, true,  null, true,  null,  true,  null ],
+    [null, false, null, false, null, false, null,  false, null ]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -2611,16 +2608,16 @@ TEST_P(GroupBy, AnyAllScalar) {
                                     },
                                     use_threads));
     Datum expected = ArrayFromJSON(struct_({
+                                       field("key", int64()),
                                        field("hash_any", boolean()),
                                        field("hash_all", boolean()),
                                        field("hash_any", boolean()),
                                        field("hash_all", boolean()),
-                                       field("key", int64()),
                                    }),
                                    R"([
-      [true, true,  true, null,  1],
-      [true, false, true, false, 2],
-      [true, true,  true, null,  3]
+      [1, true, true,  true, null ],
+      [2, true, false, true, false],
+      [3, true, true,  true, null ]
     ])");
     AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
   }
@@ -2686,17 +2683,17 @@ TEST_P(GroupBy, CountDistinct) {
     ValidateOutput(aggregated_and_grouped);
 
     AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("key_0", int64()),
                                         field("hash_count_distinct", int64()),
                                         field("hash_count_distinct", int64()),
                                         field("hash_count_distinct", int64()),
-                                        field("key_0", int64()),
                                     }),
                                     R"([
-    [1, 1, 0, 1],
-    [2, 2, 0, 2],
-    [3, 2, 1, 3],
-    [1, 0, 1, 4],
-    [4, 4, 0, null]
+    [1,    1, 1, 0],
+    [2,    2, 2, 0],
+    [3,    3, 2, 1],
+    [4,    1, 0, 1],
+    [null, 4, 4, 0]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -2754,17 +2751,17 @@ TEST_P(GroupBy, CountDistinct) {
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("key_0", int64()),
                                         field("hash_count_distinct", int64()),
                                         field("hash_count_distinct", int64()),
                                         field("hash_count_distinct", int64()),
-                                        field("key_0", int64()),
                                     }),
                                     R"([
-    [1, 1, 0, 1],
-    [2, 2, 0, 2],
-    [3, 2, 1, 3],
-    [1, 0, 1, 4],
-    [4, 4, 0, null]
+    [1,    1, 1, 0],
+    [2,    2, 2, 0],
+    [3,    3, 2, 1],
+    [4,    1, 0, 1],
+    [null, 4, 4, 0]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -2802,14 +2799,14 @@ TEST_P(GroupBy, CountDistinct) {
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("key_0", int64()),
                                         field("hash_count_distinct", int64()),
                                         field("hash_count_distinct", int64()),
                                         field("hash_count_distinct", int64()),
-                                        field("key_0", int64()),
                                     }),
                                     R"([
-    [1, 1, 0, 1],
-    [2, 2, 0, 2]
+    [1, 1, 1, 0],
+    [2, 2, 2, 0]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -2883,7 +2880,7 @@ TEST_P(GroupBy, Distinct) {
 
     auto struct_arr = aggregated_and_grouped.array_as<StructArray>();
 
-    auto all_arr = checked_pointer_cast<ListArray>(struct_arr->field(0));
+    auto all_arr = checked_pointer_cast<ListArray>(struct_arr->field(1));
     AssertDatumsEqual(ArrayFromJSON(utf8(), R"(["foo"])"), sort(*all_arr->value_slice(0)),
                       /*verbose=*/true);
     AssertDatumsEqual(ArrayFromJSON(utf8(), R"(["bar", "spam"])"),
@@ -2895,7 +2892,7 @@ TEST_P(GroupBy, Distinct) {
     AssertDatumsEqual(ArrayFromJSON(utf8(), R"(["a", "b", "baz", "eggs"])"),
                       sort(*all_arr->value_slice(4)), /*verbose=*/true);
 
-    auto valid_arr = checked_pointer_cast<ListArray>(struct_arr->field(1));
+    auto valid_arr = checked_pointer_cast<ListArray>(struct_arr->field(2));
     AssertDatumsEqual(ArrayFromJSON(utf8(), R"(["foo"])"),
                       sort(*valid_arr->value_slice(0)), /*verbose=*/true);
     AssertDatumsEqual(ArrayFromJSON(utf8(), R"(["bar", "spam"])"),
@@ -2907,7 +2904,7 @@ TEST_P(GroupBy, Distinct) {
     AssertDatumsEqual(ArrayFromJSON(utf8(), R"(["a", "b", "baz", "eggs"])"),
                       sort(*valid_arr->value_slice(4)), /*verbose=*/true);
 
-    auto null_arr = checked_pointer_cast<ListArray>(struct_arr->field(2));
+    auto null_arr = checked_pointer_cast<ListArray>(struct_arr->field(3));
     AssertDatumsEqual(ArrayFromJSON(utf8(), R"([])"), sort(*null_arr->value_slice(0)),
                       /*verbose=*/true);
     AssertDatumsEqual(ArrayFromJSON(utf8(), R"([])"), sort(*null_arr->value_slice(1)),
@@ -2950,12 +2947,12 @@ TEST_P(GroupBy, Distinct) {
 
     AssertDatumsEqual(
         ArrayFromJSON(struct_({
+                          field("key_0", int64()),
                           field("hash_distinct", list(utf8())),
                           field("hash_distinct", list(utf8())),
                           field("hash_distinct", list(utf8())),
-                          field("key_0", int64()),
                       }),
-                      R"([[["foo"], ["foo"], [], 1], [["bar"], ["bar"], [], 2]])"),
+                      R"([[1, ["foo"], ["foo"], []], [2, ["bar"], ["bar"], []]])"),
         aggregated_and_grouped,
         /*verbose=*/true);
   }
@@ -3016,12 +3013,11 @@ TEST_P(GroupBy, OneMiscTypes) {
 
     const auto& struct_arr = aggregated_and_grouped.array_as<StructArray>();
     //  Check the key column
-    AssertDatumsEqual(ArrayFromJSON(int64(), R"([1, 2, 3, null])"),
-                      struct_arr->field(struct_arr->num_fields() - 1));
+    AssertDatumsEqual(ArrayFromJSON(int64(), R"([1, 2, 3, null])"), struct_arr->field(0));
 
     //  Check values individually
     auto col_0_type = float64();
-    const auto& col_0 = struct_arr->field(0);
+    const auto& col_0 = struct_arr->field(1);
     EXPECT_THAT(col_0->GetScalar(0), ResultWith(AnyOfJSON(col_0_type, R"([1.0, 3.25])")));
     EXPECT_THAT(col_0->GetScalar(1),
                 ResultWith(AnyOfJSON(col_0_type, R"([0.0, 0.125, -0.25])")));
@@ -3029,14 +3025,14 @@ TEST_P(GroupBy, OneMiscTypes) {
     EXPECT_THAT(col_0->GetScalar(3), ResultWith(AnyOfJSON(col_0_type, R"([4.0, 0.75])")));
 
     auto col_1_type = null();
-    const auto& col_1 = struct_arr->field(1);
+    const auto& col_1 = struct_arr->field(2);
     EXPECT_THAT(col_1->GetScalar(0), ResultWith(AnyOfJSON(col_1_type, R"([null])")));
     EXPECT_THAT(col_1->GetScalar(1), ResultWith(AnyOfJSON(col_1_type, R"([null])")));
     EXPECT_THAT(col_1->GetScalar(2), ResultWith(AnyOfJSON(col_1_type, R"([null])")));
     EXPECT_THAT(col_1->GetScalar(3), ResultWith(AnyOfJSON(col_1_type, R"([null])")));
 
     auto col_2_type = boolean();
-    const auto& col_2 = struct_arr->field(2);
+    const auto& col_2 = struct_arr->field(3);
     EXPECT_THAT(col_2->GetScalar(0), ResultWith(AnyOfJSON(col_2_type, R"([true])")));
     EXPECT_THAT(col_2->GetScalar(1), ResultWith(AnyOfJSON(col_2_type, R"([false])")));
     EXPECT_THAT(col_2->GetScalar(2),
@@ -3045,7 +3041,7 @@ TEST_P(GroupBy, OneMiscTypes) {
                 ResultWith(AnyOfJSON(col_2_type, R"([true, null])")));
 
     auto col_3_type = decimal128(3, 2);
-    const auto& col_3 = struct_arr->field(3);
+    const auto& col_3 = struct_arr->field(4);
     EXPECT_THAT(col_3->GetScalar(0),
                 ResultWith(AnyOfJSON(col_3_type, R"(["1.01", "3.25"])")));
     EXPECT_THAT(col_3->GetScalar(1),
@@ -3055,7 +3051,7 @@ TEST_P(GroupBy, OneMiscTypes) {
                 ResultWith(AnyOfJSON(col_3_type, R"(["4.01", "0.75"])")));
 
     auto col_4_type = decimal256(3, 2);
-    const auto& col_4 = struct_arr->field(4);
+    const auto& col_4 = struct_arr->field(5);
     EXPECT_THAT(col_4->GetScalar(0),
                 ResultWith(AnyOfJSON(col_4_type, R"(["1.01", "3.25"])")));
     EXPECT_THAT(col_4->GetScalar(1),
@@ -3065,7 +3061,7 @@ TEST_P(GroupBy, OneMiscTypes) {
                 ResultWith(AnyOfJSON(col_4_type, R"(["4.01", "0.75"])")));
 
     auto col_5_type = fixed_size_binary(3);
-    const auto& col_5 = struct_arr->field(5);
+    const auto& col_5 = struct_arr->field(6);
     EXPECT_THAT(col_5->GetScalar(0),
                 ResultWith(AnyOfJSON(col_5_type, R"(["aaa", "ddd"])")));
     EXPECT_THAT(col_5->GetScalar(1),
@@ -3137,10 +3133,10 @@ TEST_P(GroupBy, OneNumericTypes) {
       const auto& struct_arr = aggregated_and_grouped.array_as<StructArray>();
       //  Check the key column
       AssertDatumsEqual(ArrayFromJSON(int64(), R"([1, 2, 3, 4, null])"),
-                        struct_arr->field(struct_arr->num_fields() - 1));
+                        struct_arr->field(0));
 
       //  Check values individually
-      const auto& col = struct_arr->field(0);
+      const auto& col = struct_arr->field(1);
       if (type->name() == "date64") {
         EXPECT_THAT(col->GetScalar(0),
                     ResultWith(AnyOfJSON(type, R"([86400000, 259200000])")));
@@ -3197,9 +3193,9 @@ TEST_P(GroupBy, OneBinaryTypes) {
       const auto& struct_arr = aggregated_and_grouped.array_as<StructArray>();
       //  Check the key column
       AssertDatumsEqual(ArrayFromJSON(int64(), R"([1, 2, 3, null])"),
-                        struct_arr->field(struct_arr->num_fields() - 1));
+                        struct_arr->field(0));
 
-      const auto& col = struct_arr->field(0);
+      const auto& col = struct_arr->field(1);
       EXPECT_THAT(col->GetScalar(0), ResultWith(AnyOfJSON(type, R"(["aaaa", "d"])")));
       EXPECT_THAT(col->GetScalar(1),
                   ResultWith(AnyOfJSON(type, R"(["bcd", "bc", "babcd"])")));
@@ -3229,10 +3225,9 @@ TEST_P(GroupBy, OneScalar) {
 
     const auto& struct_arr = actual.array_as<StructArray>();
     //  Check the key column
-    AssertDatumsEqual(ArrayFromJSON(int64(), R"([1, 2, 3])"),
-                      struct_arr->field(struct_arr->num_fields() - 1));
+    AssertDatumsEqual(ArrayFromJSON(int64(), R"([1, 2, 3])"), struct_arr->field(0));
 
-    const auto& col = struct_arr->field(0);
+    const auto& col = struct_arr->field(1);
     EXPECT_THAT(col->GetScalar(0), ResultWith(AnyOfJSON(int32(), R"([-1, 22])")));
     EXPECT_THAT(col->GetScalar(1), ResultWith(AnyOfJSON(int32(), R"([3])")));
     EXPECT_THAT(col->GetScalar(2), ResultWith(AnyOfJSON(int32(), R"([4])")));
@@ -3301,7 +3296,7 @@ TEST_P(GroupBy, ListNumeric) {
 
         auto struct_arr = aggregated_and_grouped.array_as<StructArray>();
 
-        auto list_arr = checked_pointer_cast<ListArray>(struct_arr->field(0));
+        auto list_arr = checked_pointer_cast<ListArray>(struct_arr->field(1));
         AssertDatumsEqual(ArrayFromJSON(type, R"([99, 99])"),
                           sort(*list_arr->value_slice(0)),
                           /*verbose=*/true);
@@ -3373,7 +3368,7 @@ TEST_P(GroupBy, ListNumeric) {
 
         auto struct_arr = aggregated_and_grouped.array_as<StructArray>();
 
-        auto list_arr = checked_pointer_cast<ListArray>(struct_arr->field(0));
+        auto list_arr = checked_pointer_cast<ListArray>(struct_arr->field(1));
         AssertDatumsEqual(ArrayFromJSON(type, R"([99, 99])"),
                           sort(*list_arr->value_slice(0)),
                           /*verbose=*/true);
@@ -3444,9 +3439,9 @@ TEST_P(GroupBy, ListBinaryTypes) {
         const auto& struct_arr = aggregated_and_grouped.array_as<StructArray>();
         // Check the key column
         AssertDatumsEqual(ArrayFromJSON(int64(), R"([1, 2, 3, null])"),
-                          struct_arr->field(struct_arr->num_fields() - 1));
+                          struct_arr->field(0));
 
-        auto list_arr = checked_pointer_cast<ListArray>(struct_arr->field(0));
+        auto list_arr = checked_pointer_cast<ListArray>(struct_arr->field(1));
         AssertDatumsEqual(ArrayFromJSON(type, R"(["aaaa", "d", null])"),
                           sort(*list_arr->value_slice(0)),
                           /*verbose=*/true);
@@ -3507,9 +3502,9 @@ TEST_P(GroupBy, ListBinaryTypes) {
         const auto& struct_arr = aggregated_and_grouped.array_as<StructArray>();
         // Check the key column
         AssertDatumsEqual(ArrayFromJSON(int64(), R"([1, 2, 3, null])"),
-                          struct_arr->field(struct_arr->num_fields() - 1));
+                          struct_arr->field(0));
 
-        auto list_arr = checked_pointer_cast<ListArray>(struct_arr->field(0));
+        auto list_arr = checked_pointer_cast<ListArray>(struct_arr->field(1));
         AssertDatumsEqual(ArrayFromJSON(type, R"(["aaaa", "d", "y"])"),
                           sort(*list_arr->value_slice(0)),
                           /*verbose=*/true);
@@ -3587,12 +3582,11 @@ TEST_P(GroupBy, ListMiscTypes) {
 
     const auto& struct_arr = aggregated_and_grouped.array_as<StructArray>();
     //  Check the key column
-    AssertDatumsEqual(ArrayFromJSON(int64(), R"([1, 2, 3, null])"),
-                      struct_arr->field(struct_arr->num_fields() - 1));
+    AssertDatumsEqual(ArrayFromJSON(int64(), R"([1, 2, 3, null])"), struct_arr->field(0));
 
     //  Check values individually
     auto type_0 = float64();
-    auto list_arr_0 = checked_pointer_cast<ListArray>(struct_arr->field(0));
+    auto list_arr_0 = checked_pointer_cast<ListArray>(struct_arr->field(1));
     AssertDatumsEqual(ArrayFromJSON(type_0, R"([1.0, 3.25, null])"),
                       sort(*list_arr_0->value_slice(0)),
                       /*verbose=*/true);
@@ -3607,7 +3601,7 @@ TEST_P(GroupBy, ListMiscTypes) {
                       /*verbose=*/true);
 
     auto type_1 = null();
-    auto list_arr_1 = checked_pointer_cast<ListArray>(struct_arr->field(1));
+    auto list_arr_1 = checked_pointer_cast<ListArray>(struct_arr->field(2));
     AssertDatumsEqual(ArrayFromJSON(type_1, R"([null, null, null])"),
                       sort(*list_arr_1->value_slice(0)),
                       /*verbose=*/true);
@@ -3622,7 +3616,7 @@ TEST_P(GroupBy, ListMiscTypes) {
                       /*verbose=*/true);
 
     auto type_2 = boolean();
-    auto list_arr_2 = checked_pointer_cast<ListArray>(struct_arr->field(2));
+    auto list_arr_2 = checked_pointer_cast<ListArray>(struct_arr->field(3));
     AssertDatumsEqual(ArrayFromJSON(type_2, R"([true, true, true])"),
                       sort(*list_arr_2->value_slice(0)),
                       /*verbose=*/true);
@@ -3637,7 +3631,7 @@ TEST_P(GroupBy, ListMiscTypes) {
                       /*verbose=*/true);
 
     auto type_3 = decimal128(3, 2);
-    auto list_arr_3 = checked_pointer_cast<ListArray>(struct_arr->field(3));
+    auto list_arr_3 = checked_pointer_cast<ListArray>(struct_arr->field(4));
     AssertDatumsEqual(ArrayFromJSON(type_3, R"(["1.01", "3.25", null])"),
                       sort(*list_arr_3->value_slice(0)),
                       /*verbose=*/true);
@@ -3652,7 +3646,7 @@ TEST_P(GroupBy, ListMiscTypes) {
                       /*verbose=*/true);
 
     auto type_4 = decimal256(3, 2);
-    auto list_arr_4 = checked_pointer_cast<ListArray>(struct_arr->field(4));
+    auto list_arr_4 = checked_pointer_cast<ListArray>(struct_arr->field(5));
     AssertDatumsEqual(ArrayFromJSON(type_4, R"(["1.01", "3.25", null])"),
                       sort(*list_arr_4->value_slice(0)),
                       /*verbose=*/true);
@@ -3667,7 +3661,7 @@ TEST_P(GroupBy, ListMiscTypes) {
                       /*verbose=*/true);
 
     auto type_5 = fixed_size_binary(3);
-    auto list_arr_5 = checked_pointer_cast<ListArray>(struct_arr->field(5));
+    auto list_arr_5 = checked_pointer_cast<ListArray>(struct_arr->field(6));
     AssertDatumsEqual(ArrayFromJSON(type_5, R"(["aaa", "ddd", null])"),
                       sort(*list_arr_5->value_slice(0)),
                       /*verbose=*/true);
@@ -3731,6 +3725,7 @@ TEST_P(GroupBy, CountAndSum) {
 
   AssertDatumsEqual(
       ArrayFromJSON(struct_({
+                        field("key_0", int64()),
                         field("hash_count", int64()),
                         field("hash_count", int64()),
                         field("hash_count", int64()),
@@ -3739,13 +3734,12 @@ TEST_P(GroupBy, CountAndSum) {
                         field("hash_sum", float64()),
                         field("hash_sum", float64()),
                         field("hash_sum", int64()),
-                        field("key_0", int64()),
                     }),
                     R"([
-    [2, 1, 3, 3, 4.25,   null,   3,    1],
-    [3, 0, 3, 3, -0.125, -0.125, 6,    2],
-    [0, 2, 2, 2, null,   null,   6,    3],
-    [2, 0, 2, 2, 4.75,   null,   null, null]
+    [1,    2, 1, 3, 3, 4.25,   null,   3   ],
+    [2,    3, 0, 3, 3, -0.125, -0.125, 6   ],
+    [3,    0, 2, 2, 2, null,   null,   6   ],
+    [null, 2, 0, 2, 2, 4.75,   null,   null]
   ])"),
       aggregated_and_grouped,
       /*verbose=*/true);
@@ -3780,14 +3774,14 @@ TEST_P(GroupBy, StandAloneNullaryCount) {
                            }));
 
   AssertDatumsEqual(ArrayFromJSON(struct_({
-                                      field("hash_count_all", int64()),
                                       field("key_0", int64()),
+                                      field("hash_count_all", int64()),
                                   }),
                                   R"([
-    [3, 1],
-    [3, 2],
-    [2, 3],
-    [2, null]
+    [1, 3   ],
+    [2, 3   ],
+    [3, 2   ],
+    [null, 2]
   ])"),
                     aggregated_and_grouped,
                     /*verbose=*/true);
@@ -3828,16 +3822,16 @@ TEST_P(GroupBy, Product) {
                            }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                            field("key_0", int64()),
                                             field("hash_product", float64()),
                                             field("hash_product", int64()),
                                             field("hash_product", float64()),
-                                            field("key_0", int64()),
                                         }),
                                         R"([
-    [-3.25, 1,    null, 1],
-    [-0.0,  8,    -0.0, 2],
-    [null,  9,    null, 3],
-    [3.0,   null, null, null]
+    [1,    -3.25, 1,    null],
+    [2,    -0.0,  8,    -0.0],
+    [3,    null,  9,    null],
+    [null, 3.0,   null, null]
   ])"),
                           aggregated_and_grouped,
                           /*verbose=*/true);
@@ -3863,10 +3857,10 @@ TEST_P(GroupBy, Product) {
                            }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
-                                            field("hash_product", int64()),
                                             field("key_0", int64()),
+                                            field("hash_product", int64()),
                                         }),
-                                        R"([[8589934592, 1]])"),
+                                        R"([[1, 8589934592]])"),
                           aggregated_and_grouped,
                           /*verbose=*/true);
 }
@@ -3913,19 +3907,19 @@ TEST_P(GroupBy, SumMeanProductKeepNulls) {
                            }));
 
   AssertDatumsApproxEqual(ArrayFromJSON(struct_({
+                                            field("key_0", int64()),
                                             field("hash_sum", float64()),
                                             field("hash_sum", float64()),
                                             field("hash_mean", float64()),
                                             field("hash_mean", float64()),
                                             field("hash_product", float64()),
                                             field("hash_product", float64()),
-                                            field("key_0", int64()),
                                         }),
                                         R"([
-    [null,   null,   null,       null,       null, null, 1],
-    [-0.125, -0.125, -0.0416667, -0.0416667, -0.0, -0.0, 2],
-    [null,   null,   null,       null,       null, null, 3],
-    [4.75,   null,   2.375,      null,       3.0,  null, null]
+    [1,    null,   null,   null,       null,       null, null],
+    [2,    -0.125, -0.125, -0.0416667, -0.0416667, -0.0, -0.0],
+    [3,    null,   null,   null,       null,       null, null],
+    [null, 4.75,   null,   2.375,      null,       3.0,  null]
   ])"),
                           aggregated_and_grouped,
                           /*verbose=*/true);
@@ -3958,14 +3952,14 @@ TEST_P(GroupBy, SumOnlyStringAndDictKeys) {
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsEqual(ArrayFromJSON(struct_({
-                                        field("hash_sum", float64()),
                                         field("key_0", key_type),
+                                        field("hash_sum", float64()),
                                     }),
                                     R"([
-    [4.25,   "alfa"],
-    [-0.125, "beta"],
-    [null,   "gama"],
-    [4.75,    null ]
+    ["alfa", 4.25  ],
+    ["beta", -0.125],
+    ["gama", null  ],
+    [null,   4.75  ]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -4088,19 +4082,19 @@ TEST_P(GroupBy, WithChunkedArray) {
                            }));
 
   AssertDatumsEqual(ArrayFromJSON(struct_({
+                                      field("key_0", int64()),
                                       field("hash_count", int64()),
                                       field("hash_sum", float64()),
                                       field("hash_min_max", struct_({
                                                                 field("min", float64()),
                                                                 field("max", float64()),
                                                             })),
-                                      field("key_0", int64()),
                                   }),
                                   R"([
-    [2, 4.25,   {"min": 1.0,   "max": 3.25},  1],
-    [3, -0.125, {"min": -0.25, "max": 0.125}, 2],
-    [0, null,   {"min": null,  "max": null},  3],
-    [2, 4.75,   {"min": 0.75,  "max": 4.0},   null]
+    [1,    2, 4.25,   {"min": 1.0,   "max": 3.25} ],
+    [2,    3, -0.125, {"min": -0.25, "max": 0.125}],
+    [3,    0, null,   {"min": null,  "max": null} ],
+    [null, 2, 4.75,   {"min": 0.75,  "max": 4.0}  ]
   ])"),
                     aggregated_and_grouped,
                     /*verbose=*/true);
@@ -4125,15 +4119,15 @@ TEST_P(GroupBy, MinMaxWithNewGroupsInChunkedArray) {
                            }));
 
   AssertDatumsEqual(ArrayFromJSON(struct_({
+                                      field("key_0", int64()),
                                       field("hash_min_max", struct_({
                                                                 field("min", int64()),
                                                                 field("max", int64()),
                                                             })),
-                                      field("key_0", int64()),
                                   }),
                                   R"([
-    [{"min": 1, "max": 1}, 0],
-    [{"min": 0, "max": 0}, 1]
+    [0, {"min": 1, "max": 1}],
+    [1, {"min": 0, "max": 0}]
   ])"),
                     aggregated_and_grouped,
                     /*verbose=*/true);
@@ -4161,14 +4155,14 @@ TEST_P(GroupBy, SmallChunkSizeSumOnly) {
                                   },
                                   small_chunksize_context()));
   AssertDatumsEqual(ArrayFromJSON(struct_({
-                                      field("hash_sum", float64()),
                                       field("key_0", int64()),
+                                      field("hash_sum", float64()),
                                   }),
                                   R"([
-    [4.25,   1],
-    [-0.125, 2],
-    [null,   3],
-    [4.75,   null]
+    [1,    4.25  ],
+    [2,    -0.125],
+    [3,    null  ],
+    [null, 4.75  ]
   ])"),
                     aggregated_and_grouped,
                     /*verbose=*/true);
@@ -4216,16 +4210,16 @@ TEST_P(GroupBy, CountWithNullType) {
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("key_0", int64()),
                                         field("hash_count", int64()),
                                         field("hash_count", int64()),
                                         field("hash_count", int64()),
-                                        field("key_0", int64()),
                                     }),
                                     R"([
-    [3, 0, 3, 1],
-    [3, 0, 3, 2],
-    [2, 0, 2, 3],
-    [2, 0, 2, null]
+    [1,    3, 0, 3],
+    [2,    3, 0, 3],
+    [3,    2, 0, 2],
+    [null, 2, 0, 2]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -4303,6 +4297,7 @@ TEST_P(GroupBy, SingleNullTypeKey) {
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("key_0", null()),
                                         field("hash_count", int64()),
                                         field("hash_sum", int64()),
                                         field("hash_mean", float64()),
@@ -4310,10 +4305,9 @@ TEST_P(GroupBy, SingleNullTypeKey) {
                                                                   field("min", int64()),
                                                                   field("max", int64()),
                                                               })),
-                                        field("key_0", null()),
                                     }),
                                     R"([
-    [8, 15, 1.875, {"min": 1, "max": 3}, null]
+    [null, 8, 15, 1.875, {"min": 1, "max": 3}]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -4360,20 +4354,20 @@ TEST_P(GroupBy, MultipleKeysIncludesNullType) {
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("key_0", utf8()),
+                                        field("key_1", null()),
                                         field("hash_count", int64()),
                                         field("hash_sum", float64()),
                                         field("hash_min_max", struct_({
                                                                   field("min", float64()),
                                                                   field("max", float64()),
                                                               })),
-                                        field("key_0", utf8()),
-                                        field("key_1", null()),
                                     }),
                                     R"([
-    [2, 4.25,   {"min": 1,     "max": 3.25},  "a",      null],
-    [0, null,   {"min": null,  "max": null},  "aa",     null],
-    [3, -0.125, {"min": -0.25, "max": 0.125}, "bcdefg", null],
-    [2, 4.75,   {"min": 0.75,  "max": 4},     null,     null]
+    ["a",      null, 2, 4.25,   {"min": 1,     "max": 3.25} ],
+    ["aa",     null, 0, null,   {"min": null,  "max": null} ],
+    ["bcdefg", null, 3, -0.125, {"min": -0.25, "max": 0.125}],
+    [null,     null, 2, 4.75,   {"min": 0.75,  "max": 4}    ]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -4429,17 +4423,17 @@ TEST_P(GroupBy, SumNullType) {
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("key_0", int64()),
                                         field("hash_sum", int64()),
                                         field("hash_sum", int64()),
                                         field("hash_sum", int64()),
                                         field("hash_sum", int64()),
-                                        field("key_0", int64()),
                                     }),
                                     R"([
-    [0, null, null, null, 1],
-    [0, null, null, null, 2],
-    [0, null, null, null, 3],
-    [0, null, null, null, null]
+    [1,    0, null, null, null],
+    [2,    0, null, null, null],
+    [3,    0, null, null, null],
+    [null, 0, null, null, null]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -4495,17 +4489,17 @@ TEST_P(GroupBy, ProductNullType) {
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("key_0", int64()),
                                         field("hash_product", int64()),
                                         field("hash_product", int64()),
                                         field("hash_product", int64()),
                                         field("hash_product", int64()),
-                                        field("key_0", int64()),
                                     }),
                                     R"([
-    [1, null, null, null, 1],
-    [1, null, null, null, 2],
-    [1, null, null, null, 3],
-    [1, null, null, null, null]
+    [1,    1, null, null, null],
+    [2,    1, null, null, null],
+    [3,    1, null, null, null],
+    [null, 1, null, null, null]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -4561,17 +4555,17 @@ TEST_P(GroupBy, MeanNullType) {
     SortBy({"key_0"}, &aggregated_and_grouped);
 
     AssertDatumsEqual(ArrayFromJSON(struct_({
+                                        field("key_0", int64()),
                                         field("hash_mean", float64()),
                                         field("hash_mean", float64()),
                                         field("hash_mean", float64()),
                                         field("hash_mean", float64()),
-                                        field("key_0", int64()),
                                     }),
                                     R"([
-    [0, null, null, null, 1],
-    [0, null, null, null, 2],
-    [0, null, null, null, 3],
-    [0, null, null, null, null]
+    [1,    0, null, null, null],
+    [2,    0, null, null, null],
+    [3,    0, null, null, null],
+    [null, 0, null, null, null]
   ])"),
                       aggregated_and_grouped,
                       /*verbose=*/true);
@@ -4608,11 +4602,11 @@ TEST_P(GroupBy, NullTypeEmptyTable) {
                              },
                              use_threads));
     auto struct_arr = aggregated_and_grouped.array_as<StructArray>();
-    AssertDatumsEqual(ArrayFromJSON(int64(), "[]"), struct_arr->field(0),
-                      /*verbose=*/true);
     AssertDatumsEqual(ArrayFromJSON(int64(), "[]"), struct_arr->field(1),
                       /*verbose=*/true);
-    AssertDatumsEqual(ArrayFromJSON(float64(), "[]"), struct_arr->field(2),
+    AssertDatumsEqual(ArrayFromJSON(int64(), "[]"), struct_arr->field(2),
+                      /*verbose=*/true);
+    AssertDatumsEqual(ArrayFromJSON(float64(), "[]"), struct_arr->field(3),
                       /*verbose=*/true);
   }
 }
@@ -4745,44 +4739,44 @@ Result<std::shared_ptr<Table>> GetSingleSegmentInputAsCombined() {
 
 Result<std::shared_ptr<ChunkedArray>> GetSingleSegmentScalarOutput() {
   return ChunkedArrayFromJSON(struct_({
+                                  field("key_0", int64()),
                                   field("count", int64()),
                                   field("sum", float64()),
                                   field("min_max", struct_({
                                                        field("min", float64()),
                                                        field("max", float64()),
                                                    })),
-                                  field("key_0", int64()),
                               }),
                               {R"([
-    [7, 8.875, {"min": -0.25, "max": 4.0}, 1]
+    [1, 7, 8.875, {"min": -0.25, "max": 4.0}]
   ])",
                                R"([
-    [7, 8.875, {"min": -0.25, "max": 4.0}, 0]
+    [0, 7, 8.875, {"min": -0.25, "max": 4.0}]
   ])"});
 }
 
 Result<std::shared_ptr<ChunkedArray>> GetSingleSegmentKeyOutput() {
   return ChunkedArrayFromJSON(struct_({
+                                  field("key_0", int64()),
+                                  field("key_1", int64()),
                                   field("hash_count", int64()),
                                   field("hash_sum", float64()),
                                   field("hash_min_max", struct_({
                                                             field("min", float64()),
                                                             field("max", float64()),
                                                         })),
-                                  field("key_0", int64()),
-                                  field("key_1", int64()),
                               }),
                               {R"([
-    [2, 4.25,   {"min": 1.0,   "max": 3.25},  1, 1],
-    [3, -0.125, {"min": -0.25, "max": 0.125}, 2, 1],
-    [0, null,   {"min": null,  "max": null},  3, 1],
-    [2, 4.75,   {"min": 0.75,  "max": 4.0},   null, 1]
+    [1,    1, 2, 4.25,   {"min": 1.0,   "max": 3.25} ],
+    [2,    1, 3, -0.125, {"min": -0.25, "max": 0.125}],
+    [3,    1, 0, null,   {"min": null,  "max": null} ],
+    [null, 1, 2, 4.75,   {"min": 0.75,  "max": 4.0}  ]
   ])",
                                R"([
-    [2, 4.25,   {"min": 1.0,   "max": 3.25},  1, 0],
-    [3, -0.125, {"min": -0.25, "max": 0.125}, 2, 0],
-    [0, null,   {"min": null,  "max": null},  3, 0],
-    [2, 4.75,   {"min": 0.75,  "max": 4.0},   null, 0]
+    [1,    0, 2, 4.25,   {"min": 1.0,   "max": 3.25} ],
+    [2,    0, 3, -0.125, {"min": -0.25, "max": 0.125}],
+    [3,    0, 0, null,   {"min": null,  "max": null} ],
+    [null, 0, 2, 4.75,   {"min": 0.75,  "max": 4.0}  ]
   ])"});
 }
 
@@ -4839,7 +4833,7 @@ Result<std::shared_ptr<Table>> GetEmptySegmentKeysInputAsCombined() {
 Result<std::shared_ptr<Array>> GetEmptySegmentKeyOutput() {
   ARROW_ASSIGN_OR_RAISE(auto chunked, GetSingleSegmentKeyOutput());
   ARROW_ASSIGN_OR_RAISE(auto table, Table::FromChunkedStructArray(chunked));
-  ARROW_ASSIGN_OR_RAISE(auto removed, table->RemoveColumn(table->num_columns() - 1));
+  ARROW_ASSIGN_OR_RAISE(auto removed, table->RemoveColumn(1));
   auto sliced = removed->Slice(0, 4);
   ARROW_ASSIGN_OR_RAISE(auto batch, sliced->CombineChunksToBatch());
   return batch->ToStructArray();
@@ -4885,10 +4879,12 @@ Result<std::shared_ptr<ChunkedArray>> GetMultiSegmentKeyOutput(
     const std::string& add_name) {
   ARROW_ASSIGN_OR_RAISE(auto chunked, GetSingleSegmentKeyOutput());
   ARROW_ASSIGN_OR_RAISE(auto table, Table::FromChunkedStructArray(chunked));
-  int last = table->num_columns() - 1;
-  auto add_field = field(add_name, table->schema()->field(last)->type());
+  int existing_key_field_idx = 1;
+  auto add_field =
+      field(add_name, table->schema()->field(existing_key_field_idx)->type());
   ARROW_ASSIGN_OR_RAISE(auto added,
-                        table->AddColumn(last + 1, add_field, table->column(last)));
+                        table->AddColumn(existing_key_field_idx + 1, add_field,
+                                         table->column(existing_key_field_idx)));
   ARROW_ASSIGN_OR_RAISE(auto batch, added->CombineChunksToBatch());
   ARROW_ASSIGN_OR_RAISE(auto array, batch->ToStructArray());
   return ChunkedArray::Make({array->Slice(0, 4), array->Slice(4, 4)}, array->type());
diff --git a/cpp/src/arrow/acero/plan_test.cc b/cpp/src/arrow/acero/plan_test.cc
index a3ba1946a1..3ce2ba2b8c 100644
--- a/cpp/src/arrow/acero/plan_test.cc
+++ b/cpp/src/arrow/acero/plan_test.cc
@@ -1113,11 +1113,11 @@ BatchesWithSchema MakeGroupableBatches(int multiplicity = 1) {
 
 TEST(ExecPlanExecution, SourceGroupedSum) {
   std::shared_ptr<Schema> out_schema =
-      schema({field("sum(i32)", int64()), field("str", utf8())});
+      schema({field("str", utf8()), field("sum(i32)", int64())});
   const std::shared_ptr<Table> expected_parallel =
-      TableFromJSON(out_schema, {R"([[800, "alfa"], [1000, "beta"], [400, "gama"]])"});
+      TableFromJSON(out_schema, {R"([["alfa", 800], ["beta", 1000], ["gama", 400]])"});
   const std::shared_ptr<Table> expected_single =
-      TableFromJSON(out_schema, {R"([[8, "alfa"], [10, "beta"], [4, "gama"]])"});
+      TableFromJSON(out_schema, {R"([["alfa", 8], ["beta", 10], ["gama", 4]])"});
 
   for (bool parallel : {false, true}) {
     SCOPED_TRACE(parallel ? "parallel/merged" : "serial");
@@ -1193,10 +1193,10 @@ TEST(ExecPlanExecution, NestedSourceProjectGroupedSum) {
 
     auto input = MakeNestedBatches();
     auto expected =
-        TableFromJSON(schema({field("x", int64()), field("y", boolean())}), {R"([
-      [null, true],
-      [17, false],
-      [5, null]
+        TableFromJSON(schema({field("bool", boolean()), field("i32", int64())}), {R"([
+      [true, null],
+      [false, 17],
+      [null, 5]
 ])"});
 
     Declaration plan = Declaration::Sequence(
@@ -1236,9 +1236,10 @@ TEST(ExecPlanExecution, SourceFilterProjectGroupedSumFilter) {
          {"filter", FilterNodeOptions{greater(field_ref("sum(multiply(i32, 2))"),
                                               literal(10 * batch_multiplicity))}}});
 
-    auto expected = TableFromJSON(schema({field("a", int64()), field("b", utf8())}),
-                                  {parallel ? R"([[3600, "alfa"], [2000, "beta"]])"
-                                            : R"([[36, "alfa"], [20, "beta"]])"});
+    auto expected = TableFromJSON(
+        schema({field("str", utf8()), field("sum(multiply(i32, 2))", int64())}),
+        {parallel ? R"([["alfa", 3600], ["beta", 2000]])"
+                  : R"([["alfa", 36], ["beta", 20]])"});
     ASSERT_OK_AND_ASSIGN(auto actual, DeclarationToTable(std::move(plan), parallel));
     AssertTablesEqualIgnoringOrder(expected, actual);
   }
@@ -1279,8 +1280,8 @@ TEST(ExecPlanExecution, SourceFilterProjectGroupedSumOrderBy) {
 
     ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
                 Finishes(ResultWith(ElementsAreArray({ExecBatchFromJSON(
-                    {int64(), utf8()}, parallel ? R"([[2000, "beta"], [3600, "alfa"]])"
-                                                : R"([[20, "beta"], [36, "alfa"]])")}))));
+                    {utf8(), int64()}, parallel ? R"([["beta", 2000], ["alfa", 3600]])"
+                                                : R"([["beta", 20], ["alfa", 36]])")}))));
   }
 }
 
@@ -1315,7 +1316,7 @@ TEST(ExecPlanExecution, SourceFilterProjectGroupedSumTopK) {
     ASSERT_THAT(
         StartAndCollect(plan.get(), sink_gen),
         Finishes(ResultWith(ElementsAreArray({ExecBatchFromJSON(
-            {int64(), utf8()}, parallel ? R"([[800, "gama"]])" : R"([[8, "gama"]])")}))));
+            {utf8(), int64()}, parallel ? R"([["gama", 800]])" : R"([["gama", 8]])")}))));
   }
 }
 
@@ -1374,8 +1375,8 @@ TEST(ExecPlanExecution, AggregationPreservesOptions) {
     }
 
     std::shared_ptr<Table> expected =
-        TableFromJSON(schema({field("count(i32)", int64()), field("str", utf8())}),
-                      {R"([[500, "alfa"], [200, "beta"], [200, "gama"]])"});
+        TableFromJSON(schema({field("str", utf8()), field("count(i32)", int64())}),
+                      {R"([["alfa", 500], ["beta", 200], ["gama", 200]])"});
 
     ASSERT_FINISHES_OK_AND_ASSIGN(std::shared_ptr<Table> actual, table_future);
     AssertTablesEqualIgnoringOrder(expected, actual);
@@ -1479,7 +1480,7 @@ TEST(ExecPlanExecution, ScalarSourceGroupedSum) {
 
   ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
               Finishes(ResultWith(UnorderedElementsAreArray({
-                  ExecBatchFromJSON({int64(), boolean()}, R"([[6, true], [18, false]])"),
+                  ExecBatchFromJSON({boolean(), int64()}, R"([[true, 6], [false, 18]])"),
               }))));
 }
 
@@ -1638,8 +1639,8 @@ TEST(ExecPlanExecution, SegmentedAggregationWithOneSegment) {
   ASSERT_OK_AND_ASSIGN(BatchesWithCommonSchema actual_batches,
                        DeclarationToExecBatches(std::move(plan), /*use_threads=*/false));
 
-  auto expected = ExecBatchFromJSON({int64(), float64(), int32(), int32()},
-                                    R"([[6, 2, 1, 1], [6, 2, 2, 1]])");
+  auto expected = ExecBatchFromJSON({int32(), int32(), int64(), float64()},
+                                    R"([[1, 1, 6, 2], [2, 1, 6, 2]])");
   AssertExecBatchesEqualIgnoringOrder(actual_batches.schema, actual_batches.batches,
                                       {expected});
 }
@@ -1663,13 +1664,13 @@ TEST(ExecPlanExecution, SegmentedAggregationWithTwoSegments) {
                                               {"hash_sum", nullptr, "c", "sum(c)"},
                                               {"hash_mean", nullptr, "c", "mean(c)"},
                                           },
-                                          /*keys=*/{"b"}, /*segment_leys=*/{"a"}}}});
+                                          /*keys=*/{"b"}, /*segment_keys=*/{"a"}}}});
   ASSERT_OK_AND_ASSIGN(BatchesWithCommonSchema actual_batches,
                        DeclarationToExecBatches(std::move(plan), /*use_threads=*/false));
 
   auto expected = ExecBatchFromJSON(
-      {int64(), float64(), int32(), int32()},
-      R"([[3, 1.5, 1, 1], [1, 1, 2, 1], [3, 3, 1, 2], [5, 2.5, 2, 2]])");
+      {int32(), int32(), int64(), float64()},
+      R"([[1, 1, 3, 1.5], [2, 1, 1, 1], [1, 2, 3, 3], [2, 2, 5, 2.5]])");
   AssertExecBatchesEqualIgnoringOrder(actual_batches.schema, actual_batches.batches,
                                       {expected});
 }
@@ -1697,8 +1698,8 @@ TEST(ExecPlanExecution, SegmentedAggregationWithBatchCrossingSegment) {
   ASSERT_OK_AND_ASSIGN(BatchesWithCommonSchema actual_batches,
                        DeclarationToExecBatches(std::move(plan), /*use_threads=*/false));
 
-  auto expected = ExecBatchFromJSON({int64(), float64(), int32(), int32()},
-                                    R"([[2, 1, 1, 1], [4, 2, 2, 2], [6, 3, 3, 3]])");
+  auto expected = ExecBatchFromJSON({int32(), int32(), int64(), float64()},
+                                    R"([[1, 1, 2, 1], [2, 2, 4, 2], [3, 3, 6, 3]])");
   AssertExecBatchesEqualIgnoringOrder(actual_batches.schema, actual_batches.batches,
                                       {expected});
 }
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 159d1ac033..cde3a725c4 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -2812,7 +2812,7 @@ TEST(ScanNode, MinimalGroupedAggEndToEnd) {
 
   // translate sink_gen (async) to sink_reader (sync)
   std::shared_ptr<RecordBatchReader> sink_reader = acero::MakeGeneratorReader(
-      schema({field("sum(a * 2)", int64()), field("b", boolean())}), std::move(sink_gen),
+      schema({field("b", boolean()), field("sum(a * 2)", int64())}), std::move(sink_gen),
       exec_context.memory_pool());
 
   // start the ExecPlan
@@ -2832,11 +2832,11 @@ TEST(ScanNode, MinimalGroupedAggEndToEnd) {
   ASSERT_TRUE(plan->finished().Wait(/*seconds=*/1)) << "ExecPlan didn't finish within 1s";
 
   auto expected = TableFromJSON(
-      schema({field("sum(a * 2)", int64()), field("b", boolean())}), {
+      schema({field("b", boolean()), field("sum(a * 2)", int64())}), {
                                                                          R"JSON([
-                                               {"sum(a * 2)": 4,  "b": true},
-                                               {"sum(a * 2)": 12, "b": null},
-                                               {"sum(a * 2)": 40, "b": false}
+                                               {"b": true, "sum(a * 2)": 4},
+                                               {"b": null, "sum(a * 2)": 12},
+                                               {"b": false, "sum(a * 2)": 40}
                                           ])JSON"});
   AssertTablesEqual(*expected, *sorted.table(), /*same_chunk_layout=*/false);
 }
diff --git a/cpp/src/arrow/engine/substrait/function_test.cc b/cpp/src/arrow/engine/substrait/function_test.cc
index bb9df20846..9164bf0a4b 100644
--- a/cpp/src/arrow/engine/substrait/function_test.cc
+++ b/cpp/src/arrow/engine/substrait/function_test.cc
@@ -661,14 +661,12 @@ void CheckGroupedAggregateCase(const AggregateTestCase& test_case) {
   ASSERT_OK_AND_ASSIGN(
       std::shared_ptr<Array> sort_indices,
       compute::SortIndices(output_table, compute::SortOptions({compute::SortKey(
-                                             output_table->num_columns() - 1,
-                                             compute::SortOrder::Ascending)})));
+                                             0, compute::SortOrder::Ascending)})));
   ASSERT_OK_AND_ASSIGN(Datum sorted_table_datum,
                        compute::Take(output_table, sort_indices));
   output_table = sorted_table_datum.table();
-  // TODO(ARROW-17245) We should be selecting N-1 here but Acero
-  // currently emits things in reverse order
-  ASSERT_OK_AND_ASSIGN(output_table, output_table->SelectColumns({0}));
+  ASSERT_OK_AND_ASSIGN(output_table,
+                       output_table->SelectColumns({output_table->num_columns() - 1}));
 
   std::shared_ptr<Table> expected_output =
       GetOutputTableForAggregateCase(test_case.output_type, test_case.group_outputs);
diff --git a/cpp/src/arrow/engine/substrait/serde_test.cc b/cpp/src/arrow/engine/substrait/serde_test.cc
index 02be82972d..ca96b2bf7c 100644
--- a/cpp/src/arrow/engine/substrait/serde_test.cc
+++ b/cpp/src/arrow/engine/substrait/serde_test.cc
@@ -3449,12 +3449,12 @@ TEST(SubstraitRoundTrip, AggregateRel) {
   ASSERT_OK_AND_ASSIGN(auto buf,
                        internal::SubstraitFromJSON("Plan", substrait_json,
                                                    /*ignore_unknown_fields=*/false));
-  auto output_schema = schema({field("aggregates", int64()), field("keys", int32())});
+  auto output_schema = schema({field("keys", int32()), field("aggregates", int64())});
   auto expected_table = TableFromJSON(output_schema, {R"([
-      [80, 10],
-      [90, 20],
-      [60, 30],
-      [60, 40]
+      [10, 80],
+      [20, 90],
+      [30, 60],
+      [40, 60]
   ])"});
 
   NamedTableProvider table_provider = AlwaysProvideSameTable(std::move(input_table));
@@ -3489,7 +3489,7 @@ TEST(SubstraitRoundTrip, AggregateRelEmit) {
         "aggregate": {
           "common": {
           "emit": {
-            "outputMapping": [0]
+            "outputMapping": [1]
           }
         },
           "input": {
@@ -5701,7 +5701,7 @@ TEST(Substrait, PlanWithSegmentedAggregateExtension) {
             }
           }
         },
-        "names": ["v", "k", "t"]
+        "names": ["k", "t", "v"]
       }
     }],
     "expectedTypeUrls": []
@@ -5724,9 +5724,9 @@ TEST(Substrait, PlanWithSegmentedAggregateExtension) {
   ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", substrait_json));
 
   std::shared_ptr<Schema> output_schema =
-      schema({field("v", float64()), field("k", int32()), field("t", int32())});
+      schema({field("k", int32()), field("t", int32()), field("v", float64())});
   auto expected_table =
-      TableFromJSON(output_schema, {"[[4, 1, 1], [2, 2, 1], [10, 2, 2], [5, 1, 2]]"});
+      TableFromJSON(output_schema, {"[[1, 1, 4], [2, 1, 2], [2, 2, 10], [1, 2, 5]]"});
   CheckRoundTripResult(std::move(expected_table), buf, {}, conversion_options);
 }
 
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 82b71eeb9a..9e1b00bee8 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -4879,11 +4879,11 @@ cdef class Table(_PandasConvertible):
         >>> table = pa.Table.from_pandas(df)
         >>> table.group_by('year').aggregate([('n_legs', 'sum')])
         pyarrow.Table
-        n_legs_sum: int64
         year: int64
+        n_legs_sum: int64
         ----
-        n_legs_sum: [[2,6,104,5]]
         year: [[2020,2022,2021,2019]]
+        n_legs_sum: [[2,6,104,5]]
         """
         return TableGroupBy(self, keys)
 
@@ -5486,11 +5486,11 @@ class TableGroupBy:
 
     >>> pa.TableGroupBy(t,"keys").aggregate([("values", "sum")])
     pyarrow.Table
-    values_sum: int64
     keys: string
+    values_sum: int64
     ----
-    values_sum: [[3,7,5]]
     keys: [["a","b","c"]]
+    values_sum: [[3,7,5]]
     """
 
     def __init__(self, table, keys):
@@ -5536,21 +5536,21 @@ list[tuple(str, str, FunctionOptions)]
 
         >>> t.group_by("keys").aggregate([("values", "sum")])
         pyarrow.Table
-        values_sum: int64
         keys: string
+        values_sum: int64
         ----
-        values_sum: [[3,7,5]]
         keys: [["a","b","c"]]
+        values_sum: [[3,7,5]]
 
         Count the rows over the grouped column "keys":
 
         >>> t.group_by("keys").aggregate([([], "count_all")])
         pyarrow.Table
-        count_all: int64
         keys: string
+        count_all: int64
         ----
-        count_all: [[2,2,1]]
         keys: [["a","b","c"]]
+        count_all: [[2,2,1]]
 
         Do multiple aggregations:
 
@@ -5559,13 +5559,13 @@ list[tuple(str, str, FunctionOptions)]
         ...    ("keys", "count")
         ... ])
         pyarrow.Table
+        keys: string
         values_sum: int64
         keys_count: int64
-        keys: string
         ----
+        keys: [["a","b","c"]]
         values_sum: [[3,7,5]]
         keys_count: [[2,2,1]]
-        keys: [["a","b","c"]]
 
         Count the number of non-null values for column "values"
         over the grouped column "keys":
@@ -5575,11 +5575,11 @@ list[tuple(str, str, FunctionOptions)]
         ...    ("values", "count", pc.CountOptions(mode="only_valid"))
         ... ])
         pyarrow.Table
-        values_count: int64
         keys: string
+        values_count: int64
         ----
-        values_count: [[2,2,1]]
         keys: [["a","b","c"]]
+        values_count: [[2,2,1]]
 
         Get a single row for each group in column "keys":
 
diff --git a/python/pyarrow/tests/test_acero.py b/python/pyarrow/tests/test_acero.py
index f32ca25a6c..7db4afd000 100644
--- a/python/pyarrow/tests/test_acero.py
+++ b/python/pyarrow/tests/test_acero.py
@@ -204,7 +204,7 @@ def test_aggregate_hash():
         table_source, Declaration("aggregate", aggr_opts)
     ])
     result = decl.to_table()
-    expected = pa.table({"count(a)": [1, 1], "b": ["foo", "bar"]})
+    expected = pa.table({"b": ["foo", "bar"], "count(a)": [1, 1]})
     assert result.equals(expected)
 
     # specify function options
@@ -215,7 +215,7 @@ def test_aggregate_hash():
         table_source, Declaration("aggregate", aggr_opts)
     ])
     result = decl.to_table()
-    expected_all = pa.table({"count(a)": [2, 1], "b": ["foo", "bar"]})
+    expected_all = pa.table({"b": ["foo", "bar"], "count(a)": [2, 1]})
     assert result.equals(expected_all)
 
     # specify keys as field references
diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index f45a9886ea..9205a31b14 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -181,9 +181,7 @@ implicit_schema <- function(.data) {
   } else {
     hash <- length(.data$group_by_vars) > 0
     # The output schema is based on the aggregations and any group_by vars.
-    # The group_by vars come first (this can't be done by summarize; they have
-    # to be last per the aggregate node signature, and they get projected to
-    # this order after aggregation)
+    # The group_by vars come first.
     new_fields <- c(
       group_types(.data, old_schm),
       aggregate_types(.data, hash, old_schm)
diff --git a/r/R/query-engine.R b/r/R/query-engine.R
index ea5a3f1c57..4b9b7ac459 100644
--- a/r/R/query-engine.R
+++ b/r/R/query-engine.R
@@ -127,20 +127,13 @@ ExecPlan <- R6Class("ExecPlan",
           key_names = group_vars
         )
 
-        if (grouped) {
-          # The result will have result columns first then the grouping cols.
-          # dplyr orders group cols first, so adapt the result to meet that expectation.
-          node <- node$Project(
-            make_field_refs(c(group_vars, names(.data$aggregations)))
+        if (grouped && getOption("arrow.summarise.sort", FALSE)) {
+          # Add sorting instructions for the rows too to match dplyr
+          # (see below about why sorting isn't itself a Node)
+          node$extras$sort <- list(
+            names = group_vars,
+            orders = rep(0L, length(group_vars))
           )
-          if (getOption("arrow.summarise.sort", FALSE)) {
-            # Add sorting instructions for the rows too to match dplyr
-            # (see below about why sorting isn't itself a Node)
-            node$extras$sort <- list(
-              names = group_vars,
-              orders = rep(0L, length(group_vars))
-            )
-          }
         }
       } else {
         # If any columns are derived, reordered, or renamed we need to Project
diff --git a/r/tests/testthat/test-dataset-dplyr.R b/r/tests/testthat/test-dataset-dplyr.R
index c8054b0c83..e20a6262b7 100644
--- a/r/tests/testthat/test-dataset-dplyr.R
+++ b/r/tests/testthat/test-dataset-dplyr.R
@@ -381,7 +381,6 @@ test_that("show_exec_plan(), show_query() and explain() with datasets", {
       show_exec_plan(),
     regexp = paste0(
       "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan
-      "ProjectNode.*", # output columns
       "GroupByNode.*", # group by node
       "keys=.*part.*", # key for aggregations
       "aggregates=.*hash_mean.*", # aggregations
diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R
index 00a9784e80..0b2b23ec86 100644
--- a/r/tests/testthat/test-dplyr-query.R
+++ b/r/tests/testthat/test-dplyr-query.R
@@ -508,7 +508,6 @@ test_that("show_exec_plan(), show_query() and explain()", {
       show_exec_plan(),
     regexp = paste0(
       "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan
-      "ProjectNode.*", # output columns
       "GroupByNode.*", # the group_by statement
       "keys=.*lgl.*", # the key for the aggregations
       "aggregates=.*hash_mean.*avg.*", # the aggregations
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index 6ee8982cc2..12ccec21ee 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -1136,14 +1136,14 @@ test_that("We don't add unnecessary ProjectNodes when aggregating", {
     0
   )
 
-  # 2 projections: one before, and one after in order to put grouping cols first
+  # Still just 1 projection
   expect_project_nodes(
     tab %>% group_by(lgl) %>% summarize(mean(int)),
-    2
+    1
   )
   expect_project_nodes(
     tab %>% count(lgl),
-    2
+    1
   )
 })
 
diff --git a/ruby/red-arrow/test/test-group.rb b/ruby/red-arrow/test/test-group.rb
index 2823977d5c..68e927df69 100644
--- a/ruby/red-arrow/test/test-group.rb
+++ b/ruby/red-arrow/test/test-group.rb
@@ -42,9 +42,9 @@ class GroupTest < Test::Unit::TestCase
       }
       table = Arrow::Table.new(raw_table)
       assert_equal(<<-TABLE, table.group(:time).count.to_s)
-	count(int)	                     time
-0	         1	#{time_values[0].iso8601}
-1	         1	#{time_values[1].iso8601}
+	                     time	count(int)
+0	#{time_values[0].iso8601}	         1
+1	#{time_values[1].iso8601}	         1
       TABLE
     end
   end
@@ -52,31 +52,31 @@ class GroupTest < Test::Unit::TestCase
   sub_test_case("#count") do
     test("single") do
       assert_equal(<<-TABLE, @table.group(:group_key1).count.to_s)
-	count(group_key2)	count(int)	count(uint)	count(float)	count(string)	group_key1
-0	                2	         2	          1	           1	            2	         1
-1	                1	         0	          1	           1	            1	         2
-2	                3	         3	          3	           3	            2	         3
+	group_key1	count(group_key2)	count(int)	count(uint)	count(float)	count(string)
+0	         1	                2	         2	          1	           1	            2
+1	         2	                1	         0	          1	           1	            1
+2	         3	                3	         3	          3	           3	            2
       TABLE
     end
 
     test("multiple") do
       assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).count.to_s)
-	count(int)	count(uint)	count(float)	count(string)	group_key1	group_key2
-0	         2	          1	           1	            2	         1	         1
-1	         0	          1	           1	            1	         2	         1
-2	         1	          1	           1	            0	         3	         1
-3	         2	          2	           2	            2	         3	         2
+	group_key1	group_key2	count(int)	count(uint)	count(float)	count(string)
+0	         1	         1	         2	          1	           1	            2
+1	         2	         1	         0	          1	           1	            1
+2	         3	         1	         1	          1	           1	            0
+3	         3	         2	         2	          2	           2	            2
       TABLE
     end
 
     test("column") do
       group = @table.group(:group_key1, :group_key2)
       assert_equal(<<-TABLE, group.count(:int, :uint).to_s)
-	count(int)	count(uint)	group_key1	group_key2
-0	         2	          1	         1	         1
-1	         0	          1	         2	         1
-2	         1	          1	         3	         1
-3	         2	          2	         3	         2
+	group_key1	group_key2	count(int)	count(uint)
+0	         1	         1	         2	          1
+1	         2	         1	         0	          1
+2	         3	         1	         1	          1
+3	         3	         2	         2	          2
       TABLE
     end
   end
@@ -84,20 +84,20 @@ class GroupTest < Test::Unit::TestCase
   sub_test_case("#sum") do
     test("single") do
       assert_equal(<<-TABLE, @table.group(:group_key1).sum.to_s)
-	sum(group_key2)	sum(int)	sum(uint)	sum(float)	group_key1
-0	              2	      -3	        1	  2.200000	         1
-1	              1	  (null)	        3	  3.300000	         2
-2	              5	     -15	       15	 16.500000	         3
+	group_key1	sum(group_key2)	sum(int)	sum(uint)	sum(float)
+0	         1	              2	      -3	        1	  2.200000
+1	         2	              1	  (null)	        3	  3.300000
+2	         3	              5	     -15	       15	 16.500000
       TABLE
     end
 
     test("multiple") do
       assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).sum.to_s)
-	sum(int)	sum(uint)	sum(float)	group_key1	group_key2
-0	      -3	        1	  2.200000	         1	         1
-1	  (null)	        3	  3.300000	         2	         1
-2	      -4	        4	  4.400000	         3	         1
-3	     -11	       11	 12.100000	         3	         2
+	group_key1	group_key2	sum(int)	sum(uint)	sum(float)
+0	         1	         1	      -3	        1	  2.200000
+1	         2	         1	  (null)	        3	  3.300000
+2	         3	         1	      -4	        4	  4.400000
+3	         3	         2	     -11	       11	 12.100000
       TABLE
     end
   end
@@ -105,20 +105,20 @@ class GroupTest < Test::Unit::TestCase
   sub_test_case("#mean") do
     test("single") do
       assert_equal(<<-TABLE, @table.group(:group_key1).mean.to_s)
-	mean(group_key2)	 mean(int)	mean(uint)	mean(float)	group_key1
-0	        1.000000	 -1.500000	  1.000000	   2.200000	         1
-1	        1.000000	    (null)	  3.000000	   3.300000	         2
-2	        1.666667	 -5.000000	  5.000000	   5.500000	         3
+	group_key1	mean(group_key2)	 mean(int)	mean(uint)	mean(float)
+0	         1	        1.000000	 -1.500000	  1.000000	   2.200000
+1	         2	        1.000000	    (null)	  3.000000	   3.300000
+2	         3	        1.666667	 -5.000000	  5.000000	   5.500000
       TABLE
     end
 
     test("multiple") do
       assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).mean.to_s)
-	 mean(int)	mean(uint)	mean(float)	group_key1	group_key2
-0	 -1.500000	  1.000000	   2.200000	         1	         1
-1	    (null)	  3.000000	   3.300000	         2	         1
-2	 -4.000000	  4.000000	   4.400000	         3	         1
-3	 -5.500000	  5.500000	   6.050000	         3	         2
+	group_key1	group_key2	 mean(int)	mean(uint)	mean(float)
+0	         1	         1	 -1.500000	  1.000000	   2.200000
+1	         2	         1	    (null)	  3.000000	   3.300000
+2	         3	         1	 -4.000000	  4.000000	   4.400000
+3	         3	         2	 -5.500000	  5.500000	   6.050000
       TABLE
     end
   end
@@ -126,20 +126,20 @@ class GroupTest < Test::Unit::TestCase
   sub_test_case("#min") do
     test("single") do
       assert_equal(<<-TABLE, @table.group(:group_key1).min.to_s)
-	min(group_key2)	min(int)	min(uint)	min(float)	group_key1
-0	              1	      -2	        1	  2.200000	         1
-1	              1	  (null)	        3	  3.300000	         2
-2	              1	      -6	        4	  4.400000	         3
+	group_key1	min(group_key2)	min(int)	min(uint)	min(float)
+0	         1	              1	      -2	        1	  2.200000
+1	         2	              1	  (null)	        3	  3.300000
+2	         3	              1	      -6	        4	  4.400000
       TABLE
     end
 
     test("multiple") do
       assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).min.to_s)
-	min(int)	min(uint)	min(float)	group_key1	group_key2
-0	      -2	        1	  2.200000	         1	         1
-1	  (null)	        3	  3.300000	         2	         1
-2	      -4	        4	  4.400000	         3	         1
-3	      -6	        5	  5.500000	         3	         2
+	group_key1	group_key2	min(int)	min(uint)	min(float)
+0	         1	         1	      -2	        1	  2.200000
+1	         2	         1	  (null)	        3	  3.300000
+2	         3	         1	      -4	        4	  4.400000
+3	         3	         2	      -6	        5	  5.500000
       TABLE
     end
   end
@@ -147,20 +147,20 @@ class GroupTest < Test::Unit::TestCase
   sub_test_case("#max") do
     test("single") do
       assert_equal(<<-TABLE, @table.group(:group_key1).max.to_s)
-	max(group_key2)	max(int)	max(uint)	max(float)	group_key1
-0	              1	      -1	        1	  2.200000	         1
-1	              1	  (null)	        3	  3.300000	         2
-2	              2	      -4	        6	  6.600000	         3
+	group_key1	max(group_key2)	max(int)	max(uint)	max(float)
+0	         1	              1	      -1	        1	  2.200000
+1	         2	              1	  (null)	        3	  3.300000
+2	         3	              2	      -4	        6	  6.600000
       TABLE
     end
 
     test("multiple") do
       assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).max.to_s)
-	max(int)	max(uint)	max(float)	group_key1	group_key2
-0	      -1	        1	  2.200000	         1	         1
-1	  (null)	        3	  3.300000	         2	         1
-2	      -4	        4	  4.400000	         3	         1
-3	      -5	        6	  6.600000	         3	         2
+	group_key1	group_key2	max(int)	max(uint)	max(float)
+0	         1	         1	      -1	        1	  2.200000
+1	         2	         1	  (null)	        3	  3.300000
+2	         3	         1	      -4	        4	  4.400000
+3	         3	         2	      -5	        6	  6.600000
       TABLE
     end
   end
@@ -169,11 +169,11 @@ class GroupTest < Test::Unit::TestCase
     test("function()") do
       group = @table.group(:group_key1, :group_key2)
       assert_equal(<<-TABLE, group.aggregate("count(int)", "sum(uint)").to_s)
-	count(int)	sum(uint)	group_key1	group_key2
-0	         2	        1	         1	         1
-1	         0	        3	         2	         1
-2	         1	        4	         3	         1
-3	         2	       11	         3	         2
+	group_key1	group_key2	count(int)	sum(uint)
+0	         1	         1	         2	        1
+1	         2	         1	         0	        3
+2	         3	         1	         1	        4
+3	         3	         2	         2	       11
       TABLE
     end
   end