You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2021/07/16 05:56:07 UTC
[arrow] branch master updated: ARROW-11206: [C++][Compute][Python]
Rename 'project' to 'make_struct'
This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a4222a0 ARROW-11206: [C++][Compute][Python] Rename 'project' to 'make_struct'
a4222a0 is described below
commit a4222a0f7a6188bde7b86b2f4a7350c2174e84ce
Author: Benjamin Kietzman <be...@gmail.com>
AuthorDate: Fri Jul 16 07:54:32 2021 +0200
ARROW-11206: [C++][Compute][Python] Rename 'project' to 'make_struct'
Closes #10728 from bkietz/11206-Consider-hiding-renaming-
Authored-by: Benjamin Kietzman <be...@gmail.com>
Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
cpp/src/arrow/compute/api_scalar.cc | 25 +++++----
cpp/src/arrow/compute/api_scalar.h | 12 ++--
cpp/src/arrow/compute/exec/expression.cc | 5 +-
cpp/src/arrow/compute/exec/expression_internal.h | 7 ++-
cpp/src/arrow/compute/function_test.cc | 6 +-
.../arrow/compute/kernels/scalar_if_else_test.cc | 8 +--
cpp/src/arrow/compute/kernels/scalar_nested.cc | 57 +++++++++++--------
.../arrow/compute/kernels/scalar_nested_test.cc | 65 ++++++++++++----------
cpp/src/arrow/dataset/scanner.cc | 2 +-
cpp/src/arrow/dataset/scanner_internal.h | 4 +-
cpp/src/arrow/dataset/scanner_test.cc | 3 +-
docs/source/cpp/compute.rst | 2 +-
python/pyarrow/_compute.pyx | 6 +-
python/pyarrow/compute.py | 2 +-
python/pyarrow/includes/libarrow.pxd | 6 +-
python/pyarrow/tests/test_compute.py | 28 +++++++++-
16 files changed, 141 insertions(+), 97 deletions(-)
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index abf6f75..07e56d5 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -154,10 +154,10 @@ static auto kSliceOptionsType = GetFunctionOptionsType<SliceOptions>(
DataMember("step", &SliceOptions::step));
static auto kCompareOptionsType =
GetFunctionOptionsType<CompareOptions>(DataMember("op", &CompareOptions::op));
-static auto kProjectOptionsType = GetFunctionOptionsType<ProjectOptions>(
- DataMember("field_names", &ProjectOptions::field_names),
- DataMember("field_nullability", &ProjectOptions::field_nullability),
- DataMember("field_metadata", &ProjectOptions::field_metadata));
+static auto kMakeStructOptionsType = GetFunctionOptionsType<MakeStructOptions>(
+ DataMember("field_names", &MakeStructOptions::field_names),
+ DataMember("field_nullability", &MakeStructOptions::field_nullability),
+ DataMember("field_metadata", &MakeStructOptions::field_metadata));
static auto kDayOfWeekOptionsType = GetFunctionOptionsType<DayOfWeekOptions>(
DataMember("one_based_numbering", &DayOfWeekOptions::one_based_numbering),
DataMember("week_start", &DayOfWeekOptions::week_start));
@@ -265,21 +265,22 @@ CompareOptions::CompareOptions(CompareOperator op)
CompareOptions::CompareOptions() : CompareOptions(CompareOperator::EQUAL) {}
constexpr char CompareOptions::kTypeName[];
-ProjectOptions::ProjectOptions(std::vector<std::string> n, std::vector<bool> r,
- std::vector<std::shared_ptr<const KeyValueMetadata>> m)
- : FunctionOptions(internal::kProjectOptionsType),
+MakeStructOptions::MakeStructOptions(
+ std::vector<std::string> n, std::vector<bool> r,
+ std::vector<std::shared_ptr<const KeyValueMetadata>> m)
+ : FunctionOptions(internal::kMakeStructOptionsType),
field_names(std::move(n)),
field_nullability(std::move(r)),
field_metadata(std::move(m)) {}
-ProjectOptions::ProjectOptions(std::vector<std::string> n)
- : FunctionOptions(internal::kProjectOptionsType),
+MakeStructOptions::MakeStructOptions(std::vector<std::string> n)
+ : FunctionOptions(internal::kMakeStructOptionsType),
field_names(std::move(n)),
field_nullability(field_names.size(), true),
field_metadata(field_names.size(), NULLPTR) {}
-ProjectOptions::ProjectOptions() : ProjectOptions(std::vector<std::string>()) {}
-constexpr char ProjectOptions::kTypeName[];
+MakeStructOptions::MakeStructOptions() : MakeStructOptions(std::vector<std::string>()) {}
+constexpr char MakeStructOptions::kTypeName[];
DayOfWeekOptions::DayOfWeekOptions(bool one_based_numbering, uint32_t week_start)
: FunctionOptions(internal::kDayOfWeekOptionsType),
@@ -304,7 +305,7 @@ void RegisterScalarOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kTrimOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kSliceOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kCompareOptionsType));
- DCHECK_OK(registry->AddFunctionOptionsType(kProjectOptionsType));
+ DCHECK_OK(registry->AddFunctionOptionsType(kMakeStructOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kDayOfWeekOptionsType));
}
} // namespace internal
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index e8161dd..285e1eb 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -226,13 +226,13 @@ class ARROW_EXPORT CompareOptions : public FunctionOptions {
enum CompareOperator op;
};
-class ARROW_EXPORT ProjectOptions : public FunctionOptions {
+class ARROW_EXPORT MakeStructOptions : public FunctionOptions {
public:
- ProjectOptions(std::vector<std::string> n, std::vector<bool> r,
- std::vector<std::shared_ptr<const KeyValueMetadata>> m);
- explicit ProjectOptions(std::vector<std::string> n);
- ProjectOptions();
- constexpr static char const kTypeName[] = "ProjectOptions";
+ MakeStructOptions(std::vector<std::string> n, std::vector<bool> r,
+ std::vector<std::shared_ptr<const KeyValueMetadata>> m);
+ explicit MakeStructOptions(std::vector<std::string> n);
+ MakeStructOptions();
+ constexpr static char const kTypeName[] = "MakeStructOptions";
/// Names for wrapped columns
std::vector<std::string> field_names;
diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/exec/expression.cc
index bc9a910..4aab64a 100644
--- a/cpp/src/arrow/compute/exec/expression.cc
+++ b/cpp/src/arrow/compute/exec/expression.cc
@@ -166,7 +166,7 @@ std::string Expression::ToString() const {
return binary(std::move(op));
}
- if (auto options = GetProjectOptions(*call)) {
+ if (auto options = GetMakeStructOptions(*call)) {
std::string out = "{";
auto argument = call->arguments.begin();
for (const auto& field_name : options->field_names) {
@@ -1122,7 +1122,8 @@ Result<Expression> Deserialize(std::shared_ptr<Buffer> buffer) {
}
Expression project(std::vector<Expression> values, std::vector<std::string> names) {
- return call("project", std::move(values), compute::ProjectOptions{std::move(names)});
+ return call("make_struct", std::move(values),
+ compute::MakeStructOptions{std::move(names)});
}
Expression equal(Expression lhs, Expression rhs) {
diff --git a/cpp/src/arrow/compute/exec/expression_internal.h b/cpp/src/arrow/compute/exec/expression_internal.h
index 51d242e..dc38924 100644
--- a/cpp/src/arrow/compute/exec/expression_internal.h
+++ b/cpp/src/arrow/compute/exec/expression_internal.h
@@ -220,9 +220,10 @@ inline bool IsSetLookup(const std::string& function) {
return function == "is_in" || function == "index_in";
}
-inline const compute::ProjectOptions* GetProjectOptions(const Expression::Call& call) {
- if (call.function_name != "project") return nullptr;
- return checked_cast<const compute::ProjectOptions*>(call.options.get());
+inline const compute::MakeStructOptions* GetMakeStructOptions(
+ const Expression::Call& call) {
+ if (call.function_name != "make_struct") return nullptr;
+ return checked_cast<const compute::MakeStructOptions*>(call.options.get());
}
/// A helper for unboxing an Expression composed of associative function calls.
diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc
index 752ade2..225f807 100644
--- a/cpp/src/arrow/compute/function_test.cc
+++ b/cpp/src/arrow/compute/function_test.cc
@@ -86,10 +86,10 @@ TEST(FunctionOptions, Equality) {
options.emplace_back(new CompareOptions(CompareOperator::EQUAL));
options.emplace_back(new CompareOptions(CompareOperator::LESS));
// N.B. we never actually use field_nullability or field_metadata in Arrow
- options.emplace_back(new ProjectOptions({"col1"}, {true}, {}));
- options.emplace_back(new ProjectOptions({"col1"}, {false}, {}));
+ options.emplace_back(new MakeStructOptions({"col1"}, {true}, {}));
+ options.emplace_back(new MakeStructOptions({"col1"}, {false}, {}));
options.emplace_back(
- new ProjectOptions({"col1"}, {false}, {key_value_metadata({{"key", "val"}})}));
+ new MakeStructOptions({"col1"}, {false}, {key_value_metadata({{"key", "val"}})}));
options.emplace_back(new DayOfWeekOptions(false, 1));
options.emplace_back(new CastOptions(CastOptions::Safe(boolean())));
options.emplace_back(new CastOptions(CastOptions::Unsafe(int64())));
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 4ebed60..8ff86f3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -510,13 +510,7 @@ class TestCaseWhenNumeric : public ::testing::Test {};
TYPED_TEST_SUITE(TestCaseWhenNumeric, NumericBasedTypes);
Datum MakeStruct(const std::vector<Datum>& conds) {
- ProjectOptions options;
- options.field_names.resize(conds.size());
- options.field_metadata.resize(conds.size());
- for (const auto& datum : conds) {
- options.field_nullability.push_back(datum.null_count() > 0);
- }
- EXPECT_OK_AND_ASSIGN(auto result, CallFunction("project", conds, &options));
+ EXPECT_OK_AND_ASSIGN(auto result, CallFunction("make_struct", conds));
return result;
}
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc b/cpp/src/arrow/compute/kernels/scalar_nested.cc
index e4ab3f9..e9f0696 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc
@@ -62,15 +62,23 @@ const FunctionDoc list_value_length_doc{
"Null values emit a null in the output."),
{"lists"}};
-Result<ValueDescr> ProjectResolve(KernelContext* ctx,
- const std::vector<ValueDescr>& descrs) {
- const auto& names = OptionsWrapper<ProjectOptions>::Get(ctx).field_names;
- const auto& nullable = OptionsWrapper<ProjectOptions>::Get(ctx).field_nullability;
- const auto& metadata = OptionsWrapper<ProjectOptions>::Get(ctx).field_metadata;
-
- if (names.size() != descrs.size() || nullable.size() != descrs.size() ||
- metadata.size() != descrs.size()) {
- return Status::Invalid("project() was passed ", descrs.size(), " arguments but ",
+Result<ValueDescr> MakeStructResolve(KernelContext* ctx,
+ const std::vector<ValueDescr>& descrs) {
+ auto names = OptionsWrapper<MakeStructOptions>::Get(ctx).field_names;
+ auto nullable = OptionsWrapper<MakeStructOptions>::Get(ctx).field_nullability;
+ auto metadata = OptionsWrapper<MakeStructOptions>::Get(ctx).field_metadata;
+
+ if (names.size() == 0) {
+ names.resize(descrs.size());
+ nullable.resize(descrs.size(), true);
+ metadata.resize(descrs.size(), nullptr);
+ int i = 0;
+ for (auto& name : names) {
+ name = std::to_string(i++);
+ }
+ } else if (names.size() != descrs.size() || nullable.size() != descrs.size() ||
+ metadata.size() != descrs.size()) {
+ return Status::Invalid("make_struct() was passed ", descrs.size(), " arguments but ",
names.size(), " field names, ", nullable.size(),
" nullability bits, and ", metadata.size(),
" metadata dictionaries.");
@@ -94,15 +102,16 @@ Result<ValueDescr> ProjectResolve(KernelContext* ctx,
}
}
- fields[i] = field(names[i], descr.type, nullable[i], metadata[i]);
+ fields[i] =
+ field(std::move(names[i]), descr.type, nullable[i], std::move(metadata[i]));
++i;
}
return ValueDescr{struct_(std::move(fields)), shape};
}
-Status ProjectExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
- ARROW_ASSIGN_OR_RAISE(auto descr, ProjectResolve(ctx, batch.GetDescriptors()));
+Status MakeStructExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ ARROW_ASSIGN_OR_RAISE(auto descr, MakeStructResolve(ctx, batch.GetDescriptors()));
for (int i = 0; i < batch.num_values(); ++i) {
const auto& field = checked_cast<const StructType&>(*descr.type).field(i);
@@ -139,11 +148,11 @@ Status ProjectExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
return Status::OK();
}
-const FunctionDoc project_doc{"Wrap Arrays into a StructArray",
- ("Names of the StructArray's fields are\n"
- "specified through ProjectOptions."),
- {"*args"},
- "ProjectOptions"};
+const FunctionDoc make_struct_doc{"Wrap Arrays into a StructArray",
+ ("Names of the StructArray's fields are\n"
+ "specified through MakeStructOptions."),
+ {"*args"},
+ "MakeStructOptions"};
} // namespace
@@ -156,15 +165,17 @@ void RegisterScalarNested(FunctionRegistry* registry) {
ListValueLength<LargeListType>));
DCHECK_OK(registry->AddFunction(std::move(list_value_length)));
- auto project_function =
- std::make_shared<ScalarFunction>("project", Arity::VarArgs(), &project_doc);
- ScalarKernel kernel{KernelSignature::Make({InputType{}}, OutputType{ProjectResolve},
+ static MakeStructOptions kDefaultMakeStructOptions;
+ auto make_struct_function = std::make_shared<ScalarFunction>(
+ "make_struct", Arity::VarArgs(), &make_struct_doc, &kDefaultMakeStructOptions);
+
+ ScalarKernel kernel{KernelSignature::Make({InputType{}}, OutputType{MakeStructResolve},
/*is_varargs=*/true),
- ProjectExec, OptionsWrapper<ProjectOptions>::Init};
+ MakeStructExec, OptionsWrapper<MakeStructOptions>::Init};
kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
- DCHECK_OK(project_function->AddKernel(std::move(kernel)));
- DCHECK_OK(registry->AddFunction(std::move(project_function)));
+ DCHECK_OK(make_struct_function->AddKernel(std::move(kernel)));
+ DCHECK_OK(registry->AddFunction(std::move(make_struct_function)));
}
} // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
index 42de9bc..ef48995 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
@@ -22,6 +22,7 @@
#include "arrow/compute/kernels/test_util.h"
#include "arrow/result.h"
#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
#include "arrow/util/key_value_metadata.h"
namespace arrow {
@@ -39,48 +40,55 @@ TEST(TestScalarNested, ListValueLength) {
}
struct {
+ Result<Datum> operator()(std::vector<Datum> args) {
+ return CallFunction("make_struct", args);
+ }
+
template <typename... Options>
Result<Datum> operator()(std::vector<Datum> args, std::vector<std::string> field_names,
Options... options) {
- ProjectOptions opts{field_names, options...};
- return CallFunction("project", args, &opts);
+ MakeStructOptions opts{field_names, options...};
+ return CallFunction("make_struct", args, &opts);
}
-} Project;
+} MakeStruct;
-TEST(Project, Scalar) {
+TEST(MakeStruct, Scalar) {
auto i32 = MakeScalar(1);
auto f64 = MakeScalar(2.5);
auto str = MakeScalar("yo");
- ASSERT_OK_AND_ASSIGN(auto expected,
- StructScalar::Make({i32, f64, str}, {"i", "f", "s"}));
- ASSERT_OK_AND_EQ(Datum(expected), Project({i32, f64, str}, {"i", "f", "s"}));
+ EXPECT_THAT(MakeStruct({i32, f64, str}, {"i", "f", "s"}),
+ ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"i", "f", "s"}))));
- // Three field names but one input value
- ASSERT_RAISES(Invalid, Project({str}, {"i", "f", "s"}));
+ // Names default to field_index
+ EXPECT_THAT(MakeStruct({i32, f64, str}),
+ ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"0", "1", "2"}))));
// No field names or input values is fine
- expected.reset(new StructScalar{{}, struct_({})});
- ASSERT_OK_AND_EQ(Datum(expected), Project(/*args=*/{}, /*field_names=*/{}));
+ EXPECT_THAT(MakeStruct({}), ResultWith(Datum(*StructScalar::Make({}, {}))));
+
+ // Three field names but one input value
+ EXPECT_THAT(MakeStruct({str}, {"i", "f", "s"}), Raises(StatusCode::Invalid));
}
-TEST(Project, Array) {
+TEST(MakeStruct, Array) {
std::vector<std::string> field_names{"i", "s"};
auto i32 = ArrayFromJSON(int32(), "[42, 13, 7]");
auto str = ArrayFromJSON(utf8(), R"(["aa", "aa", "aa"])");
- ASSERT_OK_AND_ASSIGN(Datum expected, StructArray::Make({i32, str}, field_names));
- ASSERT_OK_AND_EQ(expected, Project({i32, str}, field_names));
+ EXPECT_THAT(MakeStruct({i32, str}, {"i", "s"}),
+ ResultWith(Datum(*StructArray::Make({i32, str}, field_names))));
// Scalars are broadcast to the length of the arrays
- ASSERT_OK_AND_EQ(expected, Project({i32, MakeScalar("aa")}, field_names));
+ EXPECT_THAT(MakeStruct({i32, MakeScalar("aa")}, {"i", "s"}),
+ ResultWith(Datum(*StructArray::Make({i32, str}, field_names))));
// Array length mismatch
- ASSERT_RAISES(Invalid, Project({i32->Slice(1), str}, field_names));
+ EXPECT_THAT(MakeStruct({i32->Slice(1), str}, field_names), Raises(StatusCode::Invalid));
}
-TEST(Project, NullableMetadataPassedThru) {
+TEST(MakeStruct, NullableMetadataPassedThru) {
auto i32 = ArrayFromJSON(int32(), "[42, 13, 7]");
auto str = ArrayFromJSON(utf8(), R"(["aa", "aa", "aa"])");
@@ -90,7 +98,7 @@ TEST(Project, NullableMetadataPassedThru) {
key_value_metadata({"a", "b"}, {"ALPHA", "BRAVO"}), nullptr};
ASSERT_OK_AND_ASSIGN(auto proj,
- Project({i32, str}, field_names, nullability, metadata));
+ MakeStruct({i32, str}, field_names, nullability, metadata));
AssertTypeEqual(*proj.type(), StructType({
field("i", int32(), /*nullable=*/true, metadata[0]),
@@ -98,11 +106,12 @@ TEST(Project, NullableMetadataPassedThru) {
}));
// error: projecting an array containing nulls with nullable=false
- str = ArrayFromJSON(utf8(), R"(["aa", null, "aa"])");
- ASSERT_RAISES(Invalid, Project({i32, str}, field_names, nullability, metadata));
+ EXPECT_THAT(MakeStruct({i32, ArrayFromJSON(utf8(), R"(["aa", null, "aa"])")},
+ field_names, nullability, metadata),
+ Raises(StatusCode::Invalid));
}
-TEST(Project, ChunkedArray) {
+TEST(MakeStruct, ChunkedArray) {
std::vector<std::string> field_names{"i", "s"};
auto i32_0 = ArrayFromJSON(int32(), "[42, 13, 7]");
@@ -122,16 +131,16 @@ TEST(Project, ChunkedArray) {
ASSERT_OK_AND_ASSIGN(Datum expected,
ChunkedArray::Make({expected_0, expected_1, expected_2}));
- ASSERT_OK_AND_EQ(expected, Project({i32, str}, field_names));
+ ASSERT_OK_AND_EQ(expected, MakeStruct({i32, str}, field_names));
// Scalars are broadcast to the length of the arrays
- ASSERT_OK_AND_EQ(expected, Project({i32, MakeScalar("aa")}, field_names));
+ ASSERT_OK_AND_EQ(expected, MakeStruct({i32, MakeScalar("aa")}, field_names));
// Array length mismatch
- ASSERT_RAISES(Invalid, Project({i32->Slice(1), str}, field_names));
+ ASSERT_RAISES(Invalid, MakeStruct({i32->Slice(1), str}, field_names));
}
-TEST(Project, ChunkedArrayDifferentChunking) {
+TEST(MakeStruct, ChunkedArrayDifferentChunking) {
std::vector<std::string> field_names{"i", "s"};
auto i32_0 = ArrayFromJSON(int32(), "[42, 13, 7]");
@@ -159,13 +168,13 @@ TEST(Project, ChunkedArrayDifferentChunking) {
ASSERT_OK_AND_ASSIGN(Datum expected, ChunkedArray::Make(expected_chunks));
- ASSERT_OK_AND_EQ(expected, Project({i32, str}, field_names));
+ ASSERT_OK_AND_EQ(expected, MakeStruct({i32, str}, field_names));
// Scalars are broadcast to the length of the arrays
- ASSERT_OK_AND_EQ(expected, Project({i32, MakeScalar("aa")}, field_names));
+ ASSERT_OK_AND_EQ(expected, MakeStruct({i32, MakeScalar("aa")}, field_names));
// Array length mismatch
- ASSERT_RAISES(Invalid, Project({i32->Slice(1), str}, field_names));
+ ASSERT_RAISES(Invalid, MakeStruct({i32->Slice(1), str}, field_names));
}
} // namespace compute
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 2f7a115..0a28998 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -621,7 +621,7 @@ Result<EnumeratedRecordBatchGenerator> AsyncScanner::ScanBatchesUnorderedAsync(
compute::MakeFilterNode(scan, "filter", scan_options_->filter));
auto exprs = scan_options_->projection.call()->arguments;
- auto names = checked_cast<const compute::ProjectOptions*>(
+ auto names = checked_cast<const compute::MakeStructOptions*>(
scan_options_->projection.call()->options.get())
->field_names;
ARROW_ASSIGN_OR_RAISE(
diff --git a/cpp/src/arrow/dataset/scanner_internal.h b/cpp/src/arrow/dataset/scanner_internal.h
index 27b32aa..a7ba070 100644
--- a/cpp/src/arrow/dataset/scanner_internal.h
+++ b/cpp/src/arrow/dataset/scanner_internal.h
@@ -225,7 +225,7 @@ inline Status SetProjection(ScanOptions* options, const compute::Expression& pro
inline Status SetProjection(ScanOptions* options, std::vector<compute::Expression> exprs,
std::vector<std::string> names) {
- compute::ProjectOptions project_options{std::move(names)};
+ compute::MakeStructOptions project_options{std::move(names)};
for (size_t i = 0; i < exprs.size(); ++i) {
if (auto ref = exprs[i].field_ref()) {
@@ -239,7 +239,7 @@ inline Status SetProjection(ScanOptions* options, std::vector<compute::Expressio
}
return SetProjection(options,
- call("project", std::move(exprs), std::move(project_options)));
+ call("make_struct", std::move(exprs), std::move(project_options)));
}
inline Status SetProjection(ScanOptions* options, std::vector<std::string> names) {
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index 74f558d..5dc83c6 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -1362,7 +1362,8 @@ TEST(ScanNode, MinimalEndToEnd) {
// just be a list of materialized field names)
compute::Expression a_times_2 = call("multiply", {field_ref("a"), literal(2)});
ASSERT_OK_AND_ASSIGN(a_times_2, a_times_2.Bind(*dataset->schema()));
- options->projection = call("project", {a_times_2}, compute::ProjectOptions{{"a * 2"}});
+ options->projection =
+ call("make_struct", {a_times_2}, compute::MakeStructOptions{{"a * 2"}});
// construct the scan node
ASSERT_OK_AND_ASSIGN(compute::ExecNode * scan,
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index f7fd1fa..35011a7 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -926,7 +926,7 @@ Structural transforms
(null if input is null). Output type is Int32 for List, Int64 for LargeList.
* \(10) The output struct's field types are the types of its arguments. The
- field names are specified using an instance of :struct:`ProjectOptions`.
+ field names are specified using an instance of :struct:`MakeStructOptions`.
The output shape will be scalar if all inputs are scalar, otherwise any
scalars will be broadcast to arrays.
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index d3267dc..46cfdc4 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -862,16 +862,16 @@ class PartitionNthOptions(_PartitionNthOptions):
self._set_options(pivot)
-cdef class _ProjectOptions(FunctionOptions):
+cdef class _MakeStructOptions(FunctionOptions):
def _set_options(self, field_names):
cdef:
vector[c_string] c_field_names
for n in field_names:
c_field_names.push_back(tobytes(n))
- self.wrapped.reset(new CProjectOptions(field_names))
+ self.wrapped.reset(new CMakeStructOptions(c_field_names))
-class ProjectOptions(_ProjectOptions):
+class MakeStructOptions(_MakeStructOptions):
def __init__(self, field_names):
self._set_options(field_names)
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 15d1adc..85f637f 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -41,7 +41,7 @@ from pyarrow._compute import ( # noqa
ModeOptions,
PadOptions,
PartitionNthOptions,
- ProjectOptions,
+ MakeStructOptions,
QuantileOptions,
ReplaceSliceOptions,
ReplaceSubstringOptions,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 6977c26..bd3bdb2 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1971,9 +1971,9 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
CPartitionNthOptions(int64_t pivot)
int64_t pivot
- cdef cppclass CProjectOptions \
- "arrow::compute::ProjectOptions"(CFunctionOptions):
- CProjectOptions(vector[c_string] field_names)
+ cdef cppclass CMakeStructOptions \
+ "arrow::compute::MakeStructOptions"(CFunctionOptions):
+ CMakeStructOptions(vector[c_string] field_names)
vector[c_string] field_names
ctypedef enum CSortOrder" arrow::compute::SortOrder":
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index b659707..c98b3a2 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -119,7 +119,7 @@ def test_option_class_equality():
pc.MatchSubstringOptions("pattern"),
pc.PadOptions(5, " "),
pc.PartitionNthOptions(1),
- pc.ProjectOptions([b"field", b"names"]),
+ pc.MakeStructOptions(["field", "names"]),
pc.DayOfWeekOptions(False, 0),
pc.ReplaceSliceOptions(start=0, stop=1, replacement="a"),
pc.ReplaceSubstringOptions("a", "b"),
@@ -1645,3 +1645,29 @@ def test_min_max_element_wise():
assert result == pa.array([2, 3, None])
result = pc.min_element_wise(arr1, arr3, skip_nulls=False)
assert result == pa.array([1, 2, None])
+
+
+def test_make_struct():
+ assert pc.make_struct(1, 'a').as_py() == {'0': 1, '1': 'a'}
+
+ assert pc.make_struct(1, 'a', field_names=['i', 's']).as_py() == {
+ 'i': 1, 's': 'a'}
+
+ assert pc.make_struct([1, 2, 3],
+ "a b c".split()) == pa.StructArray.from_arrays([
+ [1, 2, 3],
+ "a b c".split()], names='0 1'.split())
+
+ with pytest.raises(ValueError, match="Array arguments must all "
+ "be the same length"):
+ pc.make_struct([1, 2, 3, 4], "a b c".split())
+
+ with pytest.raises(ValueError, match="0 arguments but 2 field names"):
+ pc.make_struct(field_names=['one', 'two'])
+
+
+def test_case_when():
+ assert pc.case_when(pc.make_struct([True, False, None],
+ [False, True, None]),
+ [1, 2, 3],
+ [11, 12, 13]) == pa.array([1, 12, None])