You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2023/04/04 12:33:00 UTC

[arrow] branch main updated: GH-33976: [Python] Clean-up Acero related declarations in libarrow_acero.pxd (#34773)

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 81c828ed5c GH-33976: [Python] Clean-up Acero related declarations in libarrow_acero.pxd (#34773)
81c828ed5c is described below

commit 81c828ed5c7689ec6f5caad354a627847ed46304
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Tue Apr 4 14:32:52 2023 +0200

    GH-33976: [Python] Clean-up Acero related declarations in libarrow_acero.pxd (#34773)
    
    ### What changes are included in this PR?
    
    I removed some of the declarations that are now no longer used (after the refactoring for https://github.com/apache/arrow/issues/33976)
    
    ### Are there any user-facing changes?
    
    No
    
    * Issue: #33976
    
    Lead-authored-by: Joris Van den Bossche <jo...@gmail.com>
    Co-authored-by: Davide Pasetto <dp...@gmail.com>
    Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
 python/pyarrow/includes/libarrow_acero.pxd | 35 ------------------------------
 python/pyarrow/tests/test_acero.py         | 28 +++++++++---------------
 2 files changed, 10 insertions(+), 53 deletions(-)

diff --git a/python/pyarrow/includes/libarrow_acero.pxd b/python/pyarrow/includes/libarrow_acero.pxd
index 5ff5ad99dc..920668cdd0 100644
--- a/python/pyarrow/includes/libarrow_acero.pxd
+++ b/python/pyarrow/includes/libarrow_acero.pxd
@@ -46,9 +46,6 @@ cdef extern from "arrow/acero/options.h" namespace "arrow::acero" nogil:
         CJoinType_RIGHT_OUTER "arrow::acero::JoinType::RIGHT_OUTER"
         CJoinType_FULL_OUTER "arrow::acero::JoinType::FULL_OUTER"
 
-    cdef cppclass CAsyncExecBatchGenerator "arrow::acero::AsyncExecBatchGenerator":
-        pass
-
     cdef cppclass CExecNodeOptions "arrow::acero::ExecNodeOptions":
         pass
 
@@ -73,10 +70,6 @@ cdef extern from "arrow/acero/options.h" namespace "arrow::acero" nogil:
     cdef cppclass CAggregateNodeOptions "arrow::acero::AggregateNodeOptions"(CExecNodeOptions):
         CAggregateNodeOptions(vector[CAggregate] aggregates, vector[CFieldRef] names)
 
-    cdef cppclass COrderBySinkNodeOptions "arrow::acero::OrderBySinkNodeOptions"(CExecNodeOptions):
-        COrderBySinkNodeOptions(vector[CSortOptions] options,
-                                CAsyncExecBatchGenerator generator)
-
     cdef cppclass COrderByNodeOptions "arrow::acero::OrderByNodeOptions"(CExecNodeOptions):
         COrderByNodeOptions(COrdering ordering)
 
@@ -114,38 +107,10 @@ cdef extern from "arrow/acero/exec_plan.h" namespace "arrow::acero" nogil:
         @staticmethod
         CDeclaration Sequence(vector[CDeclaration] decls)
 
-        CResult[CExecNode*] AddToPlan(CExecPlan* plan) const
-
-    cdef cppclass CExecPlan "arrow::acero::ExecPlan":
-        @staticmethod
-        CResult[shared_ptr[CExecPlan]] Make(CExecContext* exec_context)
-
-        void StartProducing()
-        CStatus Validate()
-        CStatus StopProducing()
-
-        CFuture_Void finished()
-
-        vector[CExecNode*] sinks() const
-        vector[CExecNode*] sources() const
-
     cdef cppclass CExecNode "arrow::acero::ExecNode":
         const vector[CExecNode*]& inputs() const
         const shared_ptr[CSchema]& output_schema() const
 
-    cdef cppclass CExecBatch "arrow::acero::ExecBatch":
-        vector[CDatum] values
-        int64_t length
-
-    shared_ptr[CRecordBatchReader] MakeGeneratorReader(
-        shared_ptr[CSchema] schema,
-        CAsyncExecBatchGenerator gen,
-        CMemoryPool* memory_pool
-    )
-    CResult[CExecNode*] MakeExecNode(c_string factory_name, CExecPlan* plan,
-                                     vector[CExecNode*] inputs,
-                                     const CExecNodeOptions& options)
-
     CResult[shared_ptr[CTable]] DeclarationToTable(
         CDeclaration declaration, c_bool use_threads
     )
diff --git a/python/pyarrow/tests/test_acero.py b/python/pyarrow/tests/test_acero.py
index 8dbe139e7a..f32ca25a6c 100644
--- a/python/pyarrow/tests/test_acero.py
+++ b/python/pyarrow/tests/test_acero.py
@@ -22,13 +22,13 @@ import pyarrow.compute as pc
 from pyarrow.compute import field
 
 from pyarrow.acero import (
-    TableSourceNodeOptions,
     Declaration,
+    TableSourceNodeOptions,
     FilterNodeOptions,
     ProjectNodeOptions,
     AggregateNodeOptions,
+    OrderByNodeOptions,
     HashJoinNodeOptions,
-    OrderByNodeOptions
 )
 
 try:
@@ -122,8 +122,7 @@ def test_project(table_source):
     # provide name
     decl = Declaration.from_sequence([
         table_source,
-        Declaration("project", ProjectNodeOptions(
-            [pc.multiply(field("a"), 2)], ["a2"]))
+        Declaration("project", ProjectNodeOptions([pc.multiply(field("a"), 2)], ["a2"]))
     ])
     result = decl.to_table()
     assert result.schema.names == ["a2"]
@@ -145,8 +144,7 @@ def test_project(table_source):
 def test_aggregate_scalar(table_source):
     decl = Declaration.from_sequence([
         table_source,
-        Declaration("aggregate", AggregateNodeOptions(
-            [("a", "sum", None, "a_sum")]))
+        Declaration("aggregate", AggregateNodeOptions([("a", "sum", None, "a_sum")]))
     ])
     result = decl.to_table()
     assert result.schema.names == ["a_sum"]
@@ -245,30 +243,26 @@ def test_order_by():
     table_source = Declaration("table_source", TableSourceNodeOptions(table))
 
     ord_opts = OrderByNodeOptions([("b", "ascending")])
-    decl = Declaration.from_sequence(
-        [table_source, Declaration("order_by", ord_opts)])
+    decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
     result = decl.to_table()
     expected = pa.table({"a": [1, 4, 2, 3], "b": [1, 2, 3, None]})
     assert result.equals(expected)
 
     ord_opts = OrderByNodeOptions([(field("b"), "descending")])
-    decl = Declaration.from_sequence(
-        [table_source, Declaration("order_by", ord_opts)])
+    decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
     result = decl.to_table()
     expected = pa.table({"a": [2, 4, 1, 3], "b": [3, 2, 1, None]})
     assert result.equals(expected)
 
     ord_opts = OrderByNodeOptions([(1, "descending")], null_placement="at_start")
-    decl = Declaration.from_sequence(
-        [table_source, Declaration("order_by", ord_opts)])
+    decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
     result = decl.to_table()
     expected = pa.table({"a": [3, 2, 4, 1], "b": [None, 3, 2, 1]})
     assert result.equals(expected)
 
     # emtpy ordering
     ord_opts = OrderByNodeOptions([])
-    decl = Declaration.from_sequence(
-        [table_source, Declaration("order_by", ord_opts)])
+    decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
     with pytest.raises(
         ValueError, match="`ordering` must be an explicit non-empty ordering"
     ):
@@ -283,11 +277,9 @@ def test_order_by():
 
 def test_hash_join():
     left = pa.table({'key': [1, 2, 3], 'a': [4, 5, 6]})
-    left_source = Declaration(
-        "table_source", options=TableSourceNodeOptions(left))
+    left_source = Declaration("table_source", options=TableSourceNodeOptions(left))
     right = pa.table({'key': [2, 3, 4], 'b': [4, 5, 6]})
-    right_source = Declaration(
-        "table_source", options=TableSourceNodeOptions(right))
+    right_source = Declaration("table_source", options=TableSourceNodeOptions(right))
 
     # inner join
     join_opts = HashJoinNodeOptions("inner", left_keys="key", right_keys="key")