You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2021/07/16 05:56:07 UTC

[arrow] branch master updated: ARROW-11206: [C++][Compute][Python] Rename 'project' to 'make_struct'

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a4222a0  ARROW-11206: [C++][Compute][Python] Rename 'project' to 'make_struct'
a4222a0 is described below

commit a4222a0f7a6188bde7b86b2f4a7350c2174e84ce
Author: Benjamin Kietzman <be...@gmail.com>
AuthorDate: Fri Jul 16 07:54:32 2021 +0200

    ARROW-11206: [C++][Compute][Python] Rename 'project' to 'make_struct'
    
    Closes #10728 from bkietz/11206-Consider-hiding-renaming-
    
    Authored-by: Benjamin Kietzman <be...@gmail.com>
    Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
 cpp/src/arrow/compute/api_scalar.cc                | 25 +++++----
 cpp/src/arrow/compute/api_scalar.h                 | 12 ++--
 cpp/src/arrow/compute/exec/expression.cc           |  5 +-
 cpp/src/arrow/compute/exec/expression_internal.h   |  7 ++-
 cpp/src/arrow/compute/function_test.cc             |  6 +-
 .../arrow/compute/kernels/scalar_if_else_test.cc   |  8 +--
 cpp/src/arrow/compute/kernels/scalar_nested.cc     | 57 +++++++++++--------
 .../arrow/compute/kernels/scalar_nested_test.cc    | 65 ++++++++++++----------
 cpp/src/arrow/dataset/scanner.cc                   |  2 +-
 cpp/src/arrow/dataset/scanner_internal.h           |  4 +-
 cpp/src/arrow/dataset/scanner_test.cc              |  3 +-
 docs/source/cpp/compute.rst                        |  2 +-
 python/pyarrow/_compute.pyx                        |  6 +-
 python/pyarrow/compute.py                          |  2 +-
 python/pyarrow/includes/libarrow.pxd               |  6 +-
 python/pyarrow/tests/test_compute.py               | 28 +++++++++-
 16 files changed, 141 insertions(+), 97 deletions(-)

diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index abf6f75..07e56d5 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -154,10 +154,10 @@ static auto kSliceOptionsType = GetFunctionOptionsType<SliceOptions>(
     DataMember("step", &SliceOptions::step));
 static auto kCompareOptionsType =
     GetFunctionOptionsType<CompareOptions>(DataMember("op", &CompareOptions::op));
-static auto kProjectOptionsType = GetFunctionOptionsType<ProjectOptions>(
-    DataMember("field_names", &ProjectOptions::field_names),
-    DataMember("field_nullability", &ProjectOptions::field_nullability),
-    DataMember("field_metadata", &ProjectOptions::field_metadata));
+static auto kMakeStructOptionsType = GetFunctionOptionsType<MakeStructOptions>(
+    DataMember("field_names", &MakeStructOptions::field_names),
+    DataMember("field_nullability", &MakeStructOptions::field_nullability),
+    DataMember("field_metadata", &MakeStructOptions::field_metadata));
 static auto kDayOfWeekOptionsType = GetFunctionOptionsType<DayOfWeekOptions>(
     DataMember("one_based_numbering", &DayOfWeekOptions::one_based_numbering),
     DataMember("week_start", &DayOfWeekOptions::week_start));
@@ -265,21 +265,22 @@ CompareOptions::CompareOptions(CompareOperator op)
 CompareOptions::CompareOptions() : CompareOptions(CompareOperator::EQUAL) {}
 constexpr char CompareOptions::kTypeName[];
 
-ProjectOptions::ProjectOptions(std::vector<std::string> n, std::vector<bool> r,
-                               std::vector<std::shared_ptr<const KeyValueMetadata>> m)
-    : FunctionOptions(internal::kProjectOptionsType),
+MakeStructOptions::MakeStructOptions(
+    std::vector<std::string> n, std::vector<bool> r,
+    std::vector<std::shared_ptr<const KeyValueMetadata>> m)
+    : FunctionOptions(internal::kMakeStructOptionsType),
       field_names(std::move(n)),
       field_nullability(std::move(r)),
       field_metadata(std::move(m)) {}
 
-ProjectOptions::ProjectOptions(std::vector<std::string> n)
-    : FunctionOptions(internal::kProjectOptionsType),
+MakeStructOptions::MakeStructOptions(std::vector<std::string> n)
+    : FunctionOptions(internal::kMakeStructOptionsType),
       field_names(std::move(n)),
       field_nullability(field_names.size(), true),
       field_metadata(field_names.size(), NULLPTR) {}
 
-ProjectOptions::ProjectOptions() : ProjectOptions(std::vector<std::string>()) {}
-constexpr char ProjectOptions::kTypeName[];
+MakeStructOptions::MakeStructOptions() : MakeStructOptions(std::vector<std::string>()) {}
+constexpr char MakeStructOptions::kTypeName[];
 
 DayOfWeekOptions::DayOfWeekOptions(bool one_based_numbering, uint32_t week_start)
     : FunctionOptions(internal::kDayOfWeekOptionsType),
@@ -304,7 +305,7 @@ void RegisterScalarOptions(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunctionOptionsType(kTrimOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kSliceOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kCompareOptionsType));
-  DCHECK_OK(registry->AddFunctionOptionsType(kProjectOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kMakeStructOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kDayOfWeekOptionsType));
 }
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index e8161dd..285e1eb 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -226,13 +226,13 @@ class ARROW_EXPORT CompareOptions : public FunctionOptions {
   enum CompareOperator op;
 };
 
-class ARROW_EXPORT ProjectOptions : public FunctionOptions {
+class ARROW_EXPORT MakeStructOptions : public FunctionOptions {
  public:
-  ProjectOptions(std::vector<std::string> n, std::vector<bool> r,
-                 std::vector<std::shared_ptr<const KeyValueMetadata>> m);
-  explicit ProjectOptions(std::vector<std::string> n);
-  ProjectOptions();
-  constexpr static char const kTypeName[] = "ProjectOptions";
+  MakeStructOptions(std::vector<std::string> n, std::vector<bool> r,
+                    std::vector<std::shared_ptr<const KeyValueMetadata>> m);
+  explicit MakeStructOptions(std::vector<std::string> n);
+  MakeStructOptions();
+  constexpr static char const kTypeName[] = "MakeStructOptions";
 
   /// Names for wrapped columns
   std::vector<std::string> field_names;
diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/exec/expression.cc
index bc9a910..4aab64a 100644
--- a/cpp/src/arrow/compute/exec/expression.cc
+++ b/cpp/src/arrow/compute/exec/expression.cc
@@ -166,7 +166,7 @@ std::string Expression::ToString() const {
     return binary(std::move(op));
   }
 
-  if (auto options = GetProjectOptions(*call)) {
+  if (auto options = GetMakeStructOptions(*call)) {
     std::string out = "{";
     auto argument = call->arguments.begin();
     for (const auto& field_name : options->field_names) {
@@ -1122,7 +1122,8 @@ Result<Expression> Deserialize(std::shared_ptr<Buffer> buffer) {
 }
 
 Expression project(std::vector<Expression> values, std::vector<std::string> names) {
-  return call("project", std::move(values), compute::ProjectOptions{std::move(names)});
+  return call("make_struct", std::move(values),
+              compute::MakeStructOptions{std::move(names)});
 }
 
 Expression equal(Expression lhs, Expression rhs) {
diff --git a/cpp/src/arrow/compute/exec/expression_internal.h b/cpp/src/arrow/compute/exec/expression_internal.h
index 51d242e..dc38924 100644
--- a/cpp/src/arrow/compute/exec/expression_internal.h
+++ b/cpp/src/arrow/compute/exec/expression_internal.h
@@ -220,9 +220,10 @@ inline bool IsSetLookup(const std::string& function) {
   return function == "is_in" || function == "index_in";
 }
 
-inline const compute::ProjectOptions* GetProjectOptions(const Expression::Call& call) {
-  if (call.function_name != "project") return nullptr;
-  return checked_cast<const compute::ProjectOptions*>(call.options.get());
+inline const compute::MakeStructOptions* GetMakeStructOptions(
+    const Expression::Call& call) {
+  if (call.function_name != "make_struct") return nullptr;
+  return checked_cast<const compute::MakeStructOptions*>(call.options.get());
 }
 
 /// A helper for unboxing an Expression composed of associative function calls.
diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc
index 752ade2..225f807 100644
--- a/cpp/src/arrow/compute/function_test.cc
+++ b/cpp/src/arrow/compute/function_test.cc
@@ -86,10 +86,10 @@ TEST(FunctionOptions, Equality) {
   options.emplace_back(new CompareOptions(CompareOperator::EQUAL));
   options.emplace_back(new CompareOptions(CompareOperator::LESS));
   // N.B. we never actually use field_nullability or field_metadata in Arrow
-  options.emplace_back(new ProjectOptions({"col1"}, {true}, {}));
-  options.emplace_back(new ProjectOptions({"col1"}, {false}, {}));
+  options.emplace_back(new MakeStructOptions({"col1"}, {true}, {}));
+  options.emplace_back(new MakeStructOptions({"col1"}, {false}, {}));
   options.emplace_back(
-      new ProjectOptions({"col1"}, {false}, {key_value_metadata({{"key", "val"}})}));
+      new MakeStructOptions({"col1"}, {false}, {key_value_metadata({{"key", "val"}})}));
   options.emplace_back(new DayOfWeekOptions(false, 1));
   options.emplace_back(new CastOptions(CastOptions::Safe(boolean())));
   options.emplace_back(new CastOptions(CastOptions::Unsafe(int64())));
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 4ebed60..8ff86f3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -510,13 +510,7 @@ class TestCaseWhenNumeric : public ::testing::Test {};
 TYPED_TEST_SUITE(TestCaseWhenNumeric, NumericBasedTypes);
 
 Datum MakeStruct(const std::vector<Datum>& conds) {
-  ProjectOptions options;
-  options.field_names.resize(conds.size());
-  options.field_metadata.resize(conds.size());
-  for (const auto& datum : conds) {
-    options.field_nullability.push_back(datum.null_count() > 0);
-  }
-  EXPECT_OK_AND_ASSIGN(auto result, CallFunction("project", conds, &options));
+  EXPECT_OK_AND_ASSIGN(auto result, CallFunction("make_struct", conds));
   return result;
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc b/cpp/src/arrow/compute/kernels/scalar_nested.cc
index e4ab3f9..e9f0696 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc
@@ -62,15 +62,23 @@ const FunctionDoc list_value_length_doc{
      "Null values emit a null in the output."),
     {"lists"}};
 
-Result<ValueDescr> ProjectResolve(KernelContext* ctx,
-                                  const std::vector<ValueDescr>& descrs) {
-  const auto& names = OptionsWrapper<ProjectOptions>::Get(ctx).field_names;
-  const auto& nullable = OptionsWrapper<ProjectOptions>::Get(ctx).field_nullability;
-  const auto& metadata = OptionsWrapper<ProjectOptions>::Get(ctx).field_metadata;
-
-  if (names.size() != descrs.size() || nullable.size() != descrs.size() ||
-      metadata.size() != descrs.size()) {
-    return Status::Invalid("project() was passed ", descrs.size(), " arguments but ",
+Result<ValueDescr> MakeStructResolve(KernelContext* ctx,
+                                     const std::vector<ValueDescr>& descrs) {
+  auto names = OptionsWrapper<MakeStructOptions>::Get(ctx).field_names;
+  auto nullable = OptionsWrapper<MakeStructOptions>::Get(ctx).field_nullability;
+  auto metadata = OptionsWrapper<MakeStructOptions>::Get(ctx).field_metadata;
+
+  if (names.size() == 0) {
+    names.resize(descrs.size());
+    nullable.resize(descrs.size(), true);
+    metadata.resize(descrs.size(), nullptr);
+    int i = 0;
+    for (auto& name : names) {
+      name = std::to_string(i++);
+    }
+  } else if (names.size() != descrs.size() || nullable.size() != descrs.size() ||
+             metadata.size() != descrs.size()) {
+    return Status::Invalid("make_struct() was passed ", descrs.size(), " arguments but ",
                            names.size(), " field names, ", nullable.size(),
                            " nullability bits, and ", metadata.size(),
                            " metadata dictionaries.");
@@ -94,15 +102,16 @@ Result<ValueDescr> ProjectResolve(KernelContext* ctx,
       }
     }
 
-    fields[i] = field(names[i], descr.type, nullable[i], metadata[i]);
+    fields[i] =
+        field(std::move(names[i]), descr.type, nullable[i], std::move(metadata[i]));
     ++i;
   }
 
   return ValueDescr{struct_(std::move(fields)), shape};
 }
 
-Status ProjectExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
-  ARROW_ASSIGN_OR_RAISE(auto descr, ProjectResolve(ctx, batch.GetDescriptors()));
+Status MakeStructExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  ARROW_ASSIGN_OR_RAISE(auto descr, MakeStructResolve(ctx, batch.GetDescriptors()));
 
   for (int i = 0; i < batch.num_values(); ++i) {
     const auto& field = checked_cast<const StructType&>(*descr.type).field(i);
@@ -139,11 +148,11 @@ Status ProjectExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
   return Status::OK();
 }
 
-const FunctionDoc project_doc{"Wrap Arrays into a StructArray",
-                              ("Names of the StructArray's fields are\n"
-                               "specified through ProjectOptions."),
-                              {"*args"},
-                              "ProjectOptions"};
+const FunctionDoc make_struct_doc{"Wrap Arrays into a StructArray",
+                                  ("Names of the StructArray's fields are\n"
+                                   "specified through MakeStructOptions."),
+                                  {"*args"},
+                                  "MakeStructOptions"};
 
 }  // namespace
 
@@ -156,15 +165,17 @@ void RegisterScalarNested(FunctionRegistry* registry) {
                                          ListValueLength<LargeListType>));
   DCHECK_OK(registry->AddFunction(std::move(list_value_length)));
 
-  auto project_function =
-      std::make_shared<ScalarFunction>("project", Arity::VarArgs(), &project_doc);
-  ScalarKernel kernel{KernelSignature::Make({InputType{}}, OutputType{ProjectResolve},
+  static MakeStructOptions kDefaultMakeStructOptions;
+  auto make_struct_function = std::make_shared<ScalarFunction>(
+      "make_struct", Arity::VarArgs(), &make_struct_doc, &kDefaultMakeStructOptions);
+
+  ScalarKernel kernel{KernelSignature::Make({InputType{}}, OutputType{MakeStructResolve},
                                             /*is_varargs=*/true),
-                      ProjectExec, OptionsWrapper<ProjectOptions>::Init};
+                      MakeStructExec, OptionsWrapper<MakeStructOptions>::Init};
   kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
   kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
-  DCHECK_OK(project_function->AddKernel(std::move(kernel)));
-  DCHECK_OK(registry->AddFunction(std::move(project_function)));
+  DCHECK_OK(make_struct_function->AddKernel(std::move(kernel)));
+  DCHECK_OK(registry->AddFunction(std::move(make_struct_function)));
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
index 42de9bc..ef48995 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
@@ -22,6 +22,7 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/result.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
 #include "arrow/util/key_value_metadata.h"
 
 namespace arrow {
@@ -39,48 +40,55 @@ TEST(TestScalarNested, ListValueLength) {
 }
 
 struct {
+  Result<Datum> operator()(std::vector<Datum> args) {
+    return CallFunction("make_struct", args);
+  }
+
   template <typename... Options>
   Result<Datum> operator()(std::vector<Datum> args, std::vector<std::string> field_names,
                            Options... options) {
-    ProjectOptions opts{field_names, options...};
-    return CallFunction("project", args, &opts);
+    MakeStructOptions opts{field_names, options...};
+    return CallFunction("make_struct", args, &opts);
   }
-} Project;
+} MakeStruct;
 
-TEST(Project, Scalar) {
+TEST(MakeStruct, Scalar) {
   auto i32 = MakeScalar(1);
   auto f64 = MakeScalar(2.5);
   auto str = MakeScalar("yo");
 
-  ASSERT_OK_AND_ASSIGN(auto expected,
-                       StructScalar::Make({i32, f64, str}, {"i", "f", "s"}));
-  ASSERT_OK_AND_EQ(Datum(expected), Project({i32, f64, str}, {"i", "f", "s"}));
+  EXPECT_THAT(MakeStruct({i32, f64, str}, {"i", "f", "s"}),
+              ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"i", "f", "s"}))));
 
-  // Three field names but one input value
-  ASSERT_RAISES(Invalid, Project({str}, {"i", "f", "s"}));
+  // Names default to field_index
+  EXPECT_THAT(MakeStruct({i32, f64, str}),
+              ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"0", "1", "2"}))));
 
   // No field names or input values is fine
-  expected.reset(new StructScalar{{}, struct_({})});
-  ASSERT_OK_AND_EQ(Datum(expected), Project(/*args=*/{}, /*field_names=*/{}));
+  EXPECT_THAT(MakeStruct({}), ResultWith(Datum(*StructScalar::Make({}, {}))));
+
+  // Three field names but one input value
+  EXPECT_THAT(MakeStruct({str}, {"i", "f", "s"}), Raises(StatusCode::Invalid));
 }
 
-TEST(Project, Array) {
+TEST(MakeStruct, Array) {
   std::vector<std::string> field_names{"i", "s"};
 
   auto i32 = ArrayFromJSON(int32(), "[42, 13, 7]");
   auto str = ArrayFromJSON(utf8(), R"(["aa", "aa", "aa"])");
-  ASSERT_OK_AND_ASSIGN(Datum expected, StructArray::Make({i32, str}, field_names));
 
-  ASSERT_OK_AND_EQ(expected, Project({i32, str}, field_names));
+  EXPECT_THAT(MakeStruct({i32, str}, {"i", "s"}),
+              ResultWith(Datum(*StructArray::Make({i32, str}, field_names))));
 
   // Scalars are broadcast to the length of the arrays
-  ASSERT_OK_AND_EQ(expected, Project({i32, MakeScalar("aa")}, field_names));
+  EXPECT_THAT(MakeStruct({i32, MakeScalar("aa")}, {"i", "s"}),
+              ResultWith(Datum(*StructArray::Make({i32, str}, field_names))));
 
   // Array length mismatch
-  ASSERT_RAISES(Invalid, Project({i32->Slice(1), str}, field_names));
+  EXPECT_THAT(MakeStruct({i32->Slice(1), str}, field_names), Raises(StatusCode::Invalid));
 }
 
-TEST(Project, NullableMetadataPassedThru) {
+TEST(MakeStruct, NullableMetadataPassedThru) {
   auto i32 = ArrayFromJSON(int32(), "[42, 13, 7]");
   auto str = ArrayFromJSON(utf8(), R"(["aa", "aa", "aa"])");
 
@@ -90,7 +98,7 @@ TEST(Project, NullableMetadataPassedThru) {
       key_value_metadata({"a", "b"}, {"ALPHA", "BRAVO"}), nullptr};
 
   ASSERT_OK_AND_ASSIGN(auto proj,
-                       Project({i32, str}, field_names, nullability, metadata));
+                       MakeStruct({i32, str}, field_names, nullability, metadata));
 
   AssertTypeEqual(*proj.type(), StructType({
                                     field("i", int32(), /*nullable=*/true, metadata[0]),
@@ -98,11 +106,12 @@ TEST(Project, NullableMetadataPassedThru) {
                                 }));
 
   // error: projecting an array containing nulls with nullable=false
-  str = ArrayFromJSON(utf8(), R"(["aa", null, "aa"])");
-  ASSERT_RAISES(Invalid, Project({i32, str}, field_names, nullability, metadata));
+  EXPECT_THAT(MakeStruct({i32, ArrayFromJSON(utf8(), R"(["aa", null, "aa"])")},
+                         field_names, nullability, metadata),
+              Raises(StatusCode::Invalid));
 }
 
-TEST(Project, ChunkedArray) {
+TEST(MakeStruct, ChunkedArray) {
   std::vector<std::string> field_names{"i", "s"};
 
   auto i32_0 = ArrayFromJSON(int32(), "[42, 13, 7]");
@@ -122,16 +131,16 @@ TEST(Project, ChunkedArray) {
   ASSERT_OK_AND_ASSIGN(Datum expected,
                        ChunkedArray::Make({expected_0, expected_1, expected_2}));
 
-  ASSERT_OK_AND_EQ(expected, Project({i32, str}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, str}, field_names));
 
   // Scalars are broadcast to the length of the arrays
-  ASSERT_OK_AND_EQ(expected, Project({i32, MakeScalar("aa")}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, MakeScalar("aa")}, field_names));
 
   // Array length mismatch
-  ASSERT_RAISES(Invalid, Project({i32->Slice(1), str}, field_names));
+  ASSERT_RAISES(Invalid, MakeStruct({i32->Slice(1), str}, field_names));
 }
 
-TEST(Project, ChunkedArrayDifferentChunking) {
+TEST(MakeStruct, ChunkedArrayDifferentChunking) {
   std::vector<std::string> field_names{"i", "s"};
 
   auto i32_0 = ArrayFromJSON(int32(), "[42, 13, 7]");
@@ -159,13 +168,13 @@ TEST(Project, ChunkedArrayDifferentChunking) {
 
   ASSERT_OK_AND_ASSIGN(Datum expected, ChunkedArray::Make(expected_chunks));
 
-  ASSERT_OK_AND_EQ(expected, Project({i32, str}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, str}, field_names));
 
   // Scalars are broadcast to the length of the arrays
-  ASSERT_OK_AND_EQ(expected, Project({i32, MakeScalar("aa")}, field_names));
+  ASSERT_OK_AND_EQ(expected, MakeStruct({i32, MakeScalar("aa")}, field_names));
 
   // Array length mismatch
-  ASSERT_RAISES(Invalid, Project({i32->Slice(1), str}, field_names));
+  ASSERT_RAISES(Invalid, MakeStruct({i32->Slice(1), str}, field_names));
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 2f7a115..0a28998 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -621,7 +621,7 @@ Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync(
                         compute::MakeFilterNode(scan, "filter", scan_options_->filter));
 
   auto exprs = scan_options_->projection.call()->arguments;
-  auto names = checked_cast<const compute::ProjectOptions*>(
+  auto names = checked_cast<const compute::MakeStructOptions*>(
                    scan_options_->projection.call()->options.get())
                    ->field_names;
   ARROW_ASSIGN_OR_RAISE(
diff --git a/cpp/src/arrow/dataset/scanner_internal.h b/cpp/src/arrow/dataset/scanner_internal.h
index 27b32aa..a7ba070 100644
--- a/cpp/src/arrow/dataset/scanner_internal.h
+++ b/cpp/src/arrow/dataset/scanner_internal.h
@@ -225,7 +225,7 @@ inline Status SetProjection(ScanOptions* options, const compute::Expression& pro
 
 inline Status SetProjection(ScanOptions* options, std::vector<compute::Expression> exprs,
                             std::vector<std::string> names) {
-  compute::ProjectOptions project_options{std::move(names)};
+  compute::MakeStructOptions project_options{std::move(names)};
 
   for (size_t i = 0; i < exprs.size(); ++i) {
     if (auto ref = exprs[i].field_ref()) {
@@ -239,7 +239,7 @@ inline Status SetProjection(ScanOptions* options, std::vector<compute::Expressio
   }
 
   return SetProjection(options,
-                       call("project", std::move(exprs), std::move(project_options)));
+                       call("make_struct", std::move(exprs), std::move(project_options)));
 }
 
 inline Status SetProjection(ScanOptions* options, std::vector<std::string> names) {
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 74f558d..5dc83c6 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -1362,7 +1362,8 @@ TEST(ScanNode, MinimalEndToEnd) {
   // just be a list of materialized field names)
   compute::Expression a_times_2 = call("multiply", {field_ref("a"), literal(2)});
   ASSERT_OK_AND_ASSIGN(a_times_2, a_times_2.Bind(*dataset->schema()));
-  options->projection = call("project", {a_times_2}, compute::ProjectOptions{{"a * 2"}});
+  options->projection =
+      call("make_struct", {a_times_2}, compute::MakeStructOptions{{"a * 2"}});
 
   // construct the scan node
   ASSERT_OK_AND_ASSIGN(compute::ExecNode * scan,
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index f7fd1fa..35011a7 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -926,7 +926,7 @@ Structural transforms
   (null if input is null).  Output type is Int32 for List, Int64 for LargeList.
 
 * \(10) The output struct's field types are the types of its arguments. The
-  field names are specified using an instance of :struct:`ProjectOptions`.
+  field names are specified using an instance of :struct:`MakeStructOptions`.
   The output shape will be scalar if all inputs are scalar, otherwise any
   scalars will be broadcast to arrays.
 
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index d3267dc..46cfdc4 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -862,16 +862,16 @@ class PartitionNthOptions(_PartitionNthOptions):
         self._set_options(pivot)
 
 
-cdef class _ProjectOptions(FunctionOptions):
+cdef class _MakeStructOptions(FunctionOptions):
     def _set_options(self, field_names):
         cdef:
             vector[c_string] c_field_names
         for n in field_names:
             c_field_names.push_back(tobytes(n))
-        self.wrapped.reset(new CProjectOptions(field_names))
+        self.wrapped.reset(new CMakeStructOptions(c_field_names))
 
 
-class ProjectOptions(_ProjectOptions):
+class MakeStructOptions(_MakeStructOptions):
     def __init__(self, field_names):
         self._set_options(field_names)
 
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 15d1adc..85f637f 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -41,7 +41,7 @@ from pyarrow._compute import (  # noqa
     ModeOptions,
     PadOptions,
     PartitionNthOptions,
-    ProjectOptions,
+    MakeStructOptions,
     QuantileOptions,
     ReplaceSliceOptions,
     ReplaceSubstringOptions,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 6977c26..bd3bdb2 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1971,9 +1971,9 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         CPartitionNthOptions(int64_t pivot)
         int64_t pivot
 
-    cdef cppclass CProjectOptions \
-            "arrow::compute::ProjectOptions"(CFunctionOptions):
-        CProjectOptions(vector[c_string] field_names)
+    cdef cppclass CMakeStructOptions \
+            "arrow::compute::MakeStructOptions"(CFunctionOptions):
+        CMakeStructOptions(vector[c_string] field_names)
         vector[c_string] field_names
 
     ctypedef enum CSortOrder" arrow::compute::SortOrder":
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index b659707..c98b3a2 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -119,7 +119,7 @@ def test_option_class_equality():
         pc.MatchSubstringOptions("pattern"),
         pc.PadOptions(5, " "),
         pc.PartitionNthOptions(1),
-        pc.ProjectOptions([b"field", b"names"]),
+        pc.MakeStructOptions(["field", "names"]),
         pc.DayOfWeekOptions(False, 0),
         pc.ReplaceSliceOptions(start=0, stop=1, replacement="a"),
         pc.ReplaceSubstringOptions("a", "b"),
@@ -1645,3 +1645,29 @@ def test_min_max_element_wise():
     assert result == pa.array([2, 3, None])
     result = pc.min_element_wise(arr1, arr3, skip_nulls=False)
     assert result == pa.array([1, 2, None])
+
+
+def test_make_struct():
+    assert pc.make_struct(1, 'a').as_py() == {'0': 1, '1': 'a'}
+
+    assert pc.make_struct(1, 'a', field_names=['i', 's']).as_py() == {
+        'i': 1, 's': 'a'}
+
+    assert pc.make_struct([1, 2, 3],
+                          "a b c".split()) == pa.StructArray.from_arrays([
+                              [1, 2, 3],
+                              "a b c".split()], names='0 1'.split())
+
+    with pytest.raises(ValueError, match="Array arguments must all "
+                                         "be the same length"):
+        pc.make_struct([1, 2, 3, 4], "a b c".split())
+
+    with pytest.raises(ValueError, match="0 arguments but 2 field names"):
+        pc.make_struct(field_names=['one', 'two'])
+
+
+def test_case_when():
+    assert pc.case_when(pc.make_struct([True, False, None],
+                                       [False, True, None]),
+                        [1, 2, 3],
+                        [11, 12, 13]) == pa.array([1, 12, None])