You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pc...@apache.org on 2018/11/11 21:30:56 UTC

[arrow] branch master updated: ARROW-3746: [Gandiva] [Python] Print list of functions registered with gandiva

This is an automated email from the ASF dual-hosted git repository.

pcmoritz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 1ef6c26  ARROW-3746: [Gandiva] [Python] Print list of functions registered with gandiva
1ef6c26 is described below

commit 1ef6c2644b654fa77c49cc20bb9d8fc66d3f0c4f
Author: Philipp Moritz <pc...@gmail.com>
AuthorDate: Sun Nov 11 13:30:38 2018 -0800

    ARROW-3746: [Gandiva] [Python] Print list of functions registered with gandiva
    
    I'm also making the iterators of the Function registry static, can you check if that's ok @praveenbingo and @pravindra
    
    Author: Philipp Moritz <pc...@gmail.com>
    
    Closes #2933 from pcmoritz/gandiva-print-functions and squashes the following commits:
    
    58fb14bfd <Philipp Moritz> linting
    09fe76b2d <Philipp Moritz> documentation fix and cleanup
    1bc904fe8 <Philipp Moritz> add test and simplify
    3b7d57960 <Philipp Moritz> lint
    947dd64b5 <Philipp Moritz> lint
    c02dc7c0b <Philipp Moritz> fix
    cd520f834 <Philipp Moritz> fix
    4a23c50de <Philipp Moritz> update
    079070552 <Philipp Moritz> python linting
    cf7fa35b0 <Philipp Moritz> fix lint
    eacceec55 <Philipp Moritz> add documentation
    3a1b78cfc <Philipp Moritz> print list of functions registered with gandiva
---
 cpp/src/gandiva/expression_registry.cc | 11 ++++++
 cpp/src/gandiva/expression_registry.h  |  3 ++
 python/pyarrow/gandiva.pyx             | 64 ++++++++++++++++++++++++++++++++--
 python/pyarrow/includes/libgandiva.pxd | 21 +++++++++++
 python/pyarrow/tests/test_gandiva.py   | 10 ++++++
 5 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/cpp/src/gandiva/expression_registry.cc b/cpp/src/gandiva/expression_registry.cc
index c17c5b3..fb5a45e 100644
--- a/cpp/src/gandiva/expression_registry.cc
+++ b/cpp/src/gandiva/expression_registry.cc
@@ -150,4 +150,15 @@ void ExpressionRegistry::AddArrowTypesToVector(arrow::Type::type& type,
   }
 }
 
+std::vector<std::shared_ptr<FunctionSignature>> GetRegisteredFunctionSignatures() {
+  ExpressionRegistry registry;
+  std::vector<std::shared_ptr<FunctionSignature>> signatures;
+  for (auto iter = registry.function_signature_begin();
+       iter != registry.function_signature_end(); iter++) {
+    signatures.push_back(std::make_shared<FunctionSignature>(
+        (*iter).base_name(), (*iter).param_types(), (*iter).ret_type()));
+  }
+  return signatures;
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/expression_registry.h b/cpp/src/gandiva/expression_registry.h
index fde0449..a03deab 100644
--- a/cpp/src/gandiva/expression_registry.h
+++ b/cpp/src/gandiva/expression_registry.h
@@ -61,5 +61,8 @@ class ExpressionRegistry {
   static void AddArrowTypesToVector(arrow::Type::type& type, DataTypeVector& vector);
   std::unique_ptr<FunctionRegistry> function_registry_;
 };
+
+std::vector<std::shared_ptr<FunctionSignature>> GetRegisteredFunctionSignatures();
+
 }  // namespace gandiva
 #endif  // GANDIVA_TYPES_H
diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
index 84fc5fa..162517a 100644
--- a/python/pyarrow/gandiva.pyx
+++ b/python/pyarrow/gandiva.pyx
@@ -29,7 +29,8 @@ from pyarrow.includes.libarrow cimport *
 from pyarrow.compat import frombytes
 from pyarrow.types import _as_type
 from pyarrow.lib cimport (Array, DataType, Field, MemoryPool, RecordBatch,
-                          Schema, check_status, pyarrow_wrap_array)
+                          Schema, check_status, pyarrow_wrap_array,
+                          pyarrow_wrap_data_type)
 
 from pyarrow.includes.libgandiva cimport (CCondition, CExpression,
                                           CNode, CProjector, CFilter,
@@ -56,7 +57,9 @@ from pyarrow.includes.libgandiva cimport (CCondition, CExpression,
                                           SelectionVector_MakeInt32,
                                           SelectionVector_MakeInt64,
                                           Projector_Make,
-                                          Filter_Make)
+                                          Filter_Make,
+                                          CFunctionSignature,
+                                          GetRegisteredFunctionSignatures)
 
 
 cdef class Node:
@@ -257,3 +260,60 @@ cpdef make_filter(Schema schema, Condition condition):
     cdef shared_ptr[CFilter] result
     check_status(Filter_Make(schema.sp_schema, condition.condition, &result))
     return Filter.create(result)
+
+cdef class FunctionSignature:
+    """
+    Signature of a Gandiva function including name, parameter types
+    and return type.
+    """
+
+    cdef:
+        shared_ptr[CFunctionSignature] signature
+
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly."
+                        .format(self.__class__.__name__))
+
+    @staticmethod
+    cdef create(shared_ptr[CFunctionSignature] signature):
+        cdef FunctionSignature self = FunctionSignature.__new__(
+            FunctionSignature)
+        self.signature = signature
+        return self
+
+    def return_type(self):
+        return pyarrow_wrap_data_type(self.signature.get().ret_type())
+
+    def param_types(self):
+        result = []
+        cdef vector[shared_ptr[CDataType]] types = \
+            self.signature.get().param_types()
+        for t in types:
+            result.append(pyarrow_wrap_data_type(t))
+        return result
+
+    def name(self):
+        return self.signature.get().base_name().decode()
+
+    def __repr__(self):
+        signature = self.signature.get().ToString().decode()
+        return "FunctionSignature(" + signature + ")"
+
+
+def get_registered_function_signatures():
+    """
+    Return the function in Gandiva's ExpressionRegistry.
+
+    Returns
+    -------
+    registry: a list of registered function signatures
+    """
+    results = []
+
+    cdef vector[shared_ptr[CFunctionSignature]] signatures = \
+        GetRegisteredFunctionSignatures()
+
+    for signature in signatures:
+        results.append(FunctionSignature.create(signature))
+
+    return results
diff --git a/python/pyarrow/includes/libgandiva.pxd b/python/pyarrow/includes/libgandiva.pxd
index a9f4a7e..6e98e89 100644
--- a/python/pyarrow/includes/libgandiva.pxd
+++ b/python/pyarrow/includes/libgandiva.pxd
@@ -151,3 +151,24 @@ cdef extern from "gandiva/filter.h" namespace "gandiva" nogil:
         "gandiva::Filter::Make"(
             shared_ptr[CSchema] schema, shared_ptr[CCondition] condition,
             shared_ptr[CFilter]* filter)
+
+cdef extern from "gandiva/function_signature.h" namespace "gandiva" nogil:
+
+    cdef cppclass CFunctionSignature" gandiva::FunctionSignature":
+
+        CFunctionSignature(const c_string& base_name,
+                           vector[shared_ptr[CDataType]] param_types,
+                           shared_ptr[CDataType] ret_type)
+
+        shared_ptr[CDataType] ret_type() const
+
+        const c_string& base_name() const
+
+        vector[shared_ptr[CDataType]] param_types() const
+
+        c_string ToString() const
+
+cdef extern from "gandiva/expression_registry.h" namespace "gandiva" nogil:
+
+    cdef vector[shared_ptr[CFunctionSignature]] \
+        GetRegisteredFunctionSignatures()
diff --git a/python/pyarrow/tests/test_gandiva.py b/python/pyarrow/tests/test_gandiva.py
index 579f88d..dd94ecd 100644
--- a/python/pyarrow/tests/test_gandiva.py
+++ b/python/pyarrow/tests/test_gandiva.py
@@ -162,3 +162,13 @@ def test_regex():
     r, = projector.evaluate(table.to_batches()[0])
     b = pa.array([False, True, True, True], type=pa.bool_())
     assert r.equals(b)
+
+
+@pytest.mark.gandiva
+def test_get_registered_function_signatures():
+    import pyarrow.gandiva as gandiva
+    signatures = gandiva.get_registered_function_signatures()
+
+    assert type(signatures[0].return_type()) is pa.DataType
+    assert type(signatures[0].param_types()) is list
+    assert hasattr(signatures[0], "name")