You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2023/04/04 12:33:00 UTC
[arrow] branch main updated: GH-33976: [Python] Clean-up Acero related declarations in libarrow_acero.pxd (#34773)
This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 81c828ed5c GH-33976: [Python] Clean-up Acero related declarations in libarrow_acero.pxd (#34773)
81c828ed5c is described below
commit 81c828ed5c7689ec6f5caad354a627847ed46304
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Tue Apr 4 14:32:52 2023 +0200
GH-33976: [Python] Clean-up Acero related declarations in libarrow_acero.pxd (#34773)
### What changes are included in this PR?
I removed some of the declarations that are now no longer used (after the refactoring for https://github.com/apache/arrow/issues/33976)
### Are there any user-facing changes?
No
* Issue: #33976
Lead-authored-by: Joris Van den Bossche <jo...@gmail.com>
Co-authored-by: Davide Pasetto <dp...@gmail.com>
Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
python/pyarrow/includes/libarrow_acero.pxd | 35 ------------------------------
python/pyarrow/tests/test_acero.py | 28 +++++++++---------------
2 files changed, 10 insertions(+), 53 deletions(-)
diff --git a/python/pyarrow/includes/libarrow_acero.pxd b/python/pyarrow/includes/libarrow_acero.pxd
index 5ff5ad99dc..920668cdd0 100644
--- a/python/pyarrow/includes/libarrow_acero.pxd
+++ b/python/pyarrow/includes/libarrow_acero.pxd
@@ -46,9 +46,6 @@ cdef extern from "arrow/acero/options.h" namespace "arrow::acero" nogil:
CJoinType_RIGHT_OUTER "arrow::acero::JoinType::RIGHT_OUTER"
CJoinType_FULL_OUTER "arrow::acero::JoinType::FULL_OUTER"
- cdef cppclass CAsyncExecBatchGenerator "arrow::acero::AsyncExecBatchGenerator":
- pass
-
cdef cppclass CExecNodeOptions "arrow::acero::ExecNodeOptions":
pass
@@ -73,10 +70,6 @@ cdef extern from "arrow/acero/options.h" namespace "arrow::acero" nogil:
cdef cppclass CAggregateNodeOptions "arrow::acero::AggregateNodeOptions"(CExecNodeOptions):
CAggregateNodeOptions(vector[CAggregate] aggregates, vector[CFieldRef] names)
- cdef cppclass COrderBySinkNodeOptions "arrow::acero::OrderBySinkNodeOptions"(CExecNodeOptions):
- COrderBySinkNodeOptions(vector[CSortOptions] options,
- CAsyncExecBatchGenerator generator)
-
cdef cppclass COrderByNodeOptions "arrow::acero::OrderByNodeOptions"(CExecNodeOptions):
COrderByNodeOptions(COrdering ordering)
@@ -114,38 +107,10 @@ cdef extern from "arrow/acero/exec_plan.h" namespace "arrow::acero" nogil:
@staticmethod
CDeclaration Sequence(vector[CDeclaration] decls)
- CResult[CExecNode*] AddToPlan(CExecPlan* plan) const
-
- cdef cppclass CExecPlan "arrow::acero::ExecPlan":
- @staticmethod
- CResult[shared_ptr[CExecPlan]] Make(CExecContext* exec_context)
-
- void StartProducing()
- CStatus Validate()
- CStatus StopProducing()
-
- CFuture_Void finished()
-
- vector[CExecNode*] sinks() const
- vector[CExecNode*] sources() const
-
cdef cppclass CExecNode "arrow::acero::ExecNode":
const vector[CExecNode*]& inputs() const
const shared_ptr[CSchema]& output_schema() const
- cdef cppclass CExecBatch "arrow::acero::ExecBatch":
- vector[CDatum] values
- int64_t length
-
- shared_ptr[CRecordBatchReader] MakeGeneratorReader(
- shared_ptr[CSchema] schema,
- CAsyncExecBatchGenerator gen,
- CMemoryPool* memory_pool
- )
- CResult[CExecNode*] MakeExecNode(c_string factory_name, CExecPlan* plan,
- vector[CExecNode*] inputs,
- const CExecNodeOptions& options)
-
CResult[shared_ptr[CTable]] DeclarationToTable(
CDeclaration declaration, c_bool use_threads
)
diff --git a/python/pyarrow/tests/test_acero.py b/python/pyarrow/tests/test_acero.py
index 8dbe139e7a..f32ca25a6c 100644
--- a/python/pyarrow/tests/test_acero.py
+++ b/python/pyarrow/tests/test_acero.py
@@ -22,13 +22,13 @@ import pyarrow.compute as pc
from pyarrow.compute import field
from pyarrow.acero import (
- TableSourceNodeOptions,
Declaration,
+ TableSourceNodeOptions,
FilterNodeOptions,
ProjectNodeOptions,
AggregateNodeOptions,
+ OrderByNodeOptions,
HashJoinNodeOptions,
- OrderByNodeOptions
)
try:
@@ -122,8 +122,7 @@ def test_project(table_source):
# provide name
decl = Declaration.from_sequence([
table_source,
- Declaration("project", ProjectNodeOptions(
- [pc.multiply(field("a"), 2)], ["a2"]))
+ Declaration("project", ProjectNodeOptions([pc.multiply(field("a"), 2)], ["a2"]))
])
result = decl.to_table()
assert result.schema.names == ["a2"]
@@ -145,8 +144,7 @@ def test_project(table_source):
def test_aggregate_scalar(table_source):
decl = Declaration.from_sequence([
table_source,
- Declaration("aggregate", AggregateNodeOptions(
- [("a", "sum", None, "a_sum")]))
+ Declaration("aggregate", AggregateNodeOptions([("a", "sum", None, "a_sum")]))
])
result = decl.to_table()
assert result.schema.names == ["a_sum"]
@@ -245,30 +243,26 @@ def test_order_by():
table_source = Declaration("table_source", TableSourceNodeOptions(table))
ord_opts = OrderByNodeOptions([("b", "ascending")])
- decl = Declaration.from_sequence(
- [table_source, Declaration("order_by", ord_opts)])
+ decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
result = decl.to_table()
expected = pa.table({"a": [1, 4, 2, 3], "b": [1, 2, 3, None]})
assert result.equals(expected)
ord_opts = OrderByNodeOptions([(field("b"), "descending")])
- decl = Declaration.from_sequence(
- [table_source, Declaration("order_by", ord_opts)])
+ decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
result = decl.to_table()
expected = pa.table({"a": [2, 4, 1, 3], "b": [3, 2, 1, None]})
assert result.equals(expected)
ord_opts = OrderByNodeOptions([(1, "descending")], null_placement="at_start")
- decl = Declaration.from_sequence(
- [table_source, Declaration("order_by", ord_opts)])
+ decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
result = decl.to_table()
expected = pa.table({"a": [3, 2, 4, 1], "b": [None, 3, 2, 1]})
assert result.equals(expected)
# emtpy ordering
ord_opts = OrderByNodeOptions([])
- decl = Declaration.from_sequence(
- [table_source, Declaration("order_by", ord_opts)])
+ decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
with pytest.raises(
ValueError, match="`ordering` must be an explicit non-empty ordering"
):
@@ -283,11 +277,9 @@ def test_order_by():
def test_hash_join():
left = pa.table({'key': [1, 2, 3], 'a': [4, 5, 6]})
- left_source = Declaration(
- "table_source", options=TableSourceNodeOptions(left))
+ left_source = Declaration("table_source", options=TableSourceNodeOptions(left))
right = pa.table({'key': [2, 3, 4], 'b': [4, 5, 6]})
- right_source = Declaration(
- "table_source", options=TableSourceNodeOptions(right))
+ right_source = Declaration("table_source", options=TableSourceNodeOptions(right))
# inner join
join_opts = HashJoinNodeOptions("inner", left_keys="key", right_keys="key")