You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by bk...@apache.org on 2021/01/11 15:05:02 UTC

[arrow] branch master updated: ARROW-11166: [Python] Add binding for ProjectOptions

This is an automated email from the ASF dual-hosted git repository.

bkietz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a4c860b  ARROW-11166: [Python] Add binding for ProjectOptions
a4c860b is described below

commit a4c860bf47149933ca0d5d47f60d32269b502d43
Author: Benjamin Kietzman <be...@gmail.com>
AuthorDate: Mon Jan 11 10:03:54 2021 -0500

    ARROW-11166: [Python] Add binding for ProjectOptions
    
    See also: https://github.com/apache/arrow/pull/8894/#issuecomment-756222590
    
    The "project" compute function is not really intended for direct use; it's primarily a convenience for exposing expressions to projection: https://issues.apache.org/jira/browse/ARROW-11174
    
    As such, maybe it should be hidden instead of exposed to python?
    
    Closes #9131 from bkietz/11166-Add-bindings-for-ProjectO
    
    Authored-by: Benjamin Kietzman <be...@gmail.com>
    Signed-off-by: Benjamin Kietzman <be...@gmail.com>
---
 docs/source/cpp/compute.rst          |  7 +++++++
 python/pyarrow/_compute.pyx          | 20 ++++++++++++++++++++
 python/pyarrow/compute.py            |  1 +
 python/pyarrow/includes/libarrow.pxd |  5 +++++
 4 files changed, 33 insertions(+)

diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 4728597..158297b 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -461,6 +461,8 @@ Structural transforms
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
 | list_value_length        | Unary      | List-like                                      | Int32 or Int64      | \(5)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
+| project                  | Varargs    | Any                                            | Struct              | \(6)    |
++--------------------------+------------+------------------------------------------------+---------------------+---------+
 
 * \(1) First input must be an array, second input a scalar of the same type.
   Output is an array of the same type as the inputs, and with the same values
@@ -475,6 +477,11 @@ Structural transforms
 * \(5) Each output element is the length of the corresponding input element
   (null if input is null).  Output type is Int32 for List, Int64 for LargeList.
 
+* \(6) The output struct's field types are the types of its arguments. The
+  field names are specified using an instance of :struct:`ProjectOptions`.
+  The output shape will be scalar if all inputs are scalar, otherwise any
+  scalars will be broadcast to arrays.
+
 Conversions
 ~~~~~~~~~~~
 
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index c975901..3772ab2 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -665,6 +665,26 @@ class PartitionNthOptions(_PartitionNthOptions):
         self._set_options(pivot)
 
 
+cdef class _ProjectOptions(FunctionOptions):
+    cdef:
+        unique_ptr[CProjectOptions] project_options
+
+    cdef const CFunctionOptions* get_options(self) except NULL:
+        return self.project_options.get()
+
+    def _set_options(self, field_names):
+        cdef:
+            vector[c_string] c_field_names
+        for n in field_names:
+            c_field_names.push_back(tobytes(n))
+        self.project_options.reset(new CProjectOptions(field_names))
+
+
+class ProjectOptions(_ProjectOptions):
+    def __init__(self, field_names):
+        self._set_options(field_names)
+
+
 cdef class _MinMaxOptions(FunctionOptions):
     cdef:
         CMinMaxOptions min_max_options
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 5127bbd..dcd1c90 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -36,6 +36,7 @@ from pyarrow._compute import (  # noqa
     MinMaxOptions,
     ModeOptions,
     PartitionNthOptions,
+    ProjectOptions,
     SetLookupOptions,
     StrptimeOptions,
     TakeOptions,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 603ef29..aeba2d3 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1814,6 +1814,11 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         CPartitionNthOptions(int64_t pivot)
         int64_t pivot
 
+    cdef cppclass CProjectOptions \
+            "arrow::compute::ProjectOptions"(CFunctionOptions):
+        CProjectOptions(vector[c_string] field_names)
+        vector[c_string] field_names
+
     ctypedef enum CSortOrder" arrow::compute::SortOrder":
         CSortOrder_Ascending \
             "arrow::compute::SortOrder::Ascending"