You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pc...@apache.org on 2018/11/08 05:06:55 UTC

[arrow] branch master updated: ARROW-3602: [Gandiva] [Python] Initial Gandiva Cython bindings

This is an automated email from the ASF dual-hosted git repository.

pcmoritz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e9cb87  ARROW-3602: [Gandiva] [Python] Initial Gandiva Cython bindings
8e9cb87 is described below

commit 8e9cb870de0ecb126a0d7403f34e3a1ce119d618
Author: Philipp Moritz <pc...@gmail.com>
AuthorDate: Wed Nov 7 21:06:45 2018 -0800

    ARROW-3602: [Gandiva] [Python] Initial Gandiva Cython bindings
    
    This is an initial Cython wrapper for Gandiva.
    
    Feedback is appreciated (the API is experimental right now and will most likely change in the future).
    
    Author: Philipp Moritz <pc...@gmail.com>
    
    Closes #2822 from pcmoritz/gandiva-cython and squashes the following commits:
    
    6ff0d9402 <Philipp Moritz> Merge branch 'master' into gandiva-cython
    6ea00628b <Philipp Moritz> use cython instantiation
    4a366bb42 <Philipp Moritz> Merge branch 'master' into gandiva-cython
    73bc203fd <Philipp Moritz> use strings instead of bytes for function names
    f589de37d <Philipp Moritz> fix travis
    69b10cb3d <Philipp Moritz> don't build gandiva in mac os build
    20b75a6bb <Philipp Moritz> update
    e1d074bc5 <Philipp Moritz> build gandiva tests
    562b763e4 <Philipp Moritz> put back llvm
    9b9d3ab22 <Philipp Moritz> don't build gandiva tests in python build
    1ee601933 <Philipp Moritz> don't run gandiva tests on python build
    646f36014 <Philipp Moritz> Merge branch 'gandiva-cython' of github.com:pcmoritz/arrow into gandiva-cython
    40bb0c701 <Philipp Moritz> use gandiva files
    cd282a348 <Philipp Moritz> Merge branch 'master' into gandiva-cython
    829f7a2cf <Philipp Moritz> fix
    f726d1745 <Philipp Moritz> remove compiler error
    99f93f167 <Philipp Moritz> Merge branch 'master' into gandiva-cython
    cfec265e7 <Philipp Moritz> use clang
    600888443 <Philipp Moritz> install llvm 6.0 (?)
    5abd24897 <Philipp Moritz> build gandiva
    9ee2c5868 <Philipp Moritz> add gandiva flag
    038084655 <Philipp Moritz> fix pytest include
    27434d596 <Philipp Moritz> add ganvida pytest flags
    8374cdb6f <Philipp Moritz> make gandiva optional for tests
    5ceb22163 <Philipp Moritz> allow gandiva failure
    021b301c0 <Philipp Moritz> lint
    276536c8a <Philipp Moritz> Merge branch 'master' into gandiva-cython
    b41599496 <Philipp Moritz> remove gandiva cython bindings from wheels
    7b75dec5e <Philipp Moritz> linting
    dc2a64870 <Philipp Moritz> add filter
    0ff33c18d <Philipp Moritz> add test
    b4571eed2 <Philipp Moritz> build gandiva for wheels
    92c8dff70 <Philipp Moritz> linting
    ae5305af1 <Philipp Moritz> linting
    97568d90b <Philipp Moritz> whitespace
    335be395d <Philipp Moritz> update FindGandiva.cmake
    33786d260 <Philipp Moritz> memory pool handling
    26be86632 <Philipp Moritz> fix array creation
    aab17705c <Philipp Moritz> fixes
    7a8c9483e <Philipp Moritz> port gandiva cython wrappers to in-tree gandiva
---
 .travis.yml                            |   2 +
 ci/travis_script_python.sh             |   8 ++
 cpp/CMakeLists.txt                     |   4 +
 cpp/cmake_modules/FindGandiva.cmake    |  96 ++++++++++++++++
 cpp/src/gandiva/CMakeLists.txt         |  52 +++++----
 cpp/src/gandiva/tests/generate_data.h  |   1 -
 python/CMakeLists.txt                  |  24 ++++
 python/pyarrow/gandiva.pyx             | 204 +++++++++++++++++++++++++++++++++
 python/pyarrow/includes/libgandiva.pxd | 107 +++++++++++++++++
 python/pyarrow/tests/conftest.py       |   8 ++
 python/pyarrow/tests/test_gandiva.py   | 100 ++++++++++++++++
 python/setup.py                        |   9 ++
 12 files changed, 590 insertions(+), 25 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index f6f499c..20bf4ae 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -78,6 +78,7 @@ matrix:
     - ARROW_BUILD_WARNING_LEVEL=CHECKIN
     - ARROW_TRAVIS_PYTHON_JVM=1
     - ARROW_TRAVIS_JAVA_BUILD_ONLY=1
+    - ARROW_TRAVIS_PYTHON_GANDIVA=1
     # ARROW-2999 Benchmarks are disabled in Travis CI for the time being
     # - ARROW_TRAVIS_PYTHON_BENCHMARKS=1
     - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
@@ -85,6 +86,7 @@ matrix:
     # (ARROW_CI_CPP_AFFECTED implies ARROW_CI_PYTHON_AFFECTED)
     - if [ $ARROW_CI_PYTHON_AFFECTED != "1" ]; then exit; fi
     - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
+    - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
     # If either C++ or Python changed, we must install the C++ libraries
     - git submodule update --init
     - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 4d48adb..608e1ce 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -96,6 +96,10 @@ if [ $ARROW_TRAVIS_COVERAGE == "1" ]; then
   CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_GENERATE_COVERAGE=ON"
 fi
 
+if [ $ARROW_TRAVIS_PYTHON_GANDIVA == "1" ]; then
+  CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_GANDIVA=ON -DARROW_GANDIVA_BUILD_TESTS=OFF"
+fi
+
 cmake -GNinja \
       $CMAKE_COMMON_FLAGS \
       -DARROW_BUILD_TESTS=on \
@@ -136,6 +140,9 @@ export PYARROW_BUILD_TYPE=$ARROW_BUILD_TYPE
 export PYARROW_WITH_PARQUET=1
 export PYARROW_WITH_PLASMA=1
 export PYARROW_WITH_ORC=1
+if [ $ARROW_TRAVIS_PYTHON_GANDIVA == "1" ]; then
+  export PYARROW_WITH_GANDIVA=1
+fi
 
 python setup.py develop
 
@@ -201,6 +208,7 @@ if [ "$ARROW_TRAVIS_PYTHON_BENCHMARKS" == "1" ] && [ "$PYTHON_VERSION" == "3.6"
   export PYARROW_WITH_PARQUET=1
   export PYARROW_WITH_PLASMA=1
   export PYARROW_WITH_ORC=0
+  export PYARROW_WITH_GANDIVA=0
 
   pushd $ARROW_PYTHON_DIR
   # Workaround for https://github.com/airspeed-velocity/asv/issues/631
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index b0960f8..13b556e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -317,6 +317,10 @@ Always OFF if building binaries"
     "Build the Gandiva JNI wrappers"
     ON)
 
+  option(ARROW_GANDIVA_BUILD_TESTS
+    "Build the Gandiva googletest unit tests"
+    ON)
+
 endif()
 
 
diff --git a/cpp/cmake_modules/FindGandiva.cmake b/cpp/cmake_modules/FindGandiva.cmake
new file mode 100644
index 0000000..5559c09
--- /dev/null
+++ b/cpp/cmake_modules/FindGandiva.cmake
@@ -0,0 +1,96 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# - Find GANDIVA (gandiva/client.h, libgandiva.a, libgandiva.so)
+# This module defines
+#  GANDIVA_INCLUDE_DIR, directory containing headers
+#  GANDIVA_LIBS, directory containing gandiva libraries
+#  GANDIVA_STATIC_LIB, path to libgandiva.a
+#  GANDIVA_SHARED_LIB, path to libgandiva's shared library
+#  GANDIVA_SHARED_IMP_LIB, path to libgandiva's import library (MSVC only)
+#  GANDIVA_FOUND, whether gandiva has been found
+
+include(FindPkgConfig)
+
+if ("$ENV{ARROW_HOME}" STREQUAL "")
+  pkg_check_modules(GANDIVA gandiva)
+  if (GANDIVA_FOUND)
+    pkg_get_variable(GANDIVA_SO_VERSION gandiva so_version)
+    set(GANDIVA_ABI_VERSION ${GANDIVA_SO_VERSION})
+    message(STATUS "Gandiva SO and ABI version: ${GANDIVA_SO_VERSION}")
+    pkg_get_variable(GANDIVA_FULL_SO_VERSION gandiva full_so_version)
+    message(STATUS "Gandiva full SO version: ${GANDIVA_FULL_SO_VERSION}")
+    set(GANDIVA_INCLUDE_DIR ${GANDIVA_INCLUDE_DIRS})
+    set(GANDIVA_LIBS ${GANDIVA_LIBRARY_DIRS})
+    set(GANDIVA_SEARCH_LIB_PATH ${GANDIVA_LIBRARY_DIRS})
+  endif()
+else()
+  set(GANDIVA_HOME "$ENV{ARROW_HOME}")
+
+  set(GANDIVA_SEARCH_HEADER_PATHS
+    ${GANDIVA_HOME}/include
+    )
+
+  set(GANDIVA_SEARCH_LIB_PATH
+    ${GANDIVA_HOME}/lib
+    )
+
+  find_path(GANDIVA_INCLUDE_DIR gandiva/expression_registry.h PATHS
+    ${GANDIVA_SEARCH_HEADER_PATHS}
+    # make sure we don't accidentally pick up a different version
+    NO_DEFAULT_PATH
+    )
+endif()
+
+find_library(GANDIVA_LIB_PATH NAMES gandiva
+  PATHS
+  ${GANDIVA_SEARCH_LIB_PATH}
+  NO_DEFAULT_PATH)
+get_filename_component(GANDIVA_LIBS ${GANDIVA_LIB_PATH} DIRECTORY)
+
+if (GANDIVA_INCLUDE_DIR AND GANDIVA_LIBS)
+  set(GANDIVA_FOUND TRUE)
+  set(GANDIVA_LIB_NAME gandiva)
+
+  set(GANDIVA_STATIC_LIB ${GANDIVA_LIBS}/lib${GANDIVA_LIB_NAME}.a)
+
+  set(GANDIVA_SHARED_LIB ${GANDIVA_LIBS}/lib${GANDIVA_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+endif()
+
+if (GANDIVA_FOUND)
+  if (NOT Gandiva_FIND_QUIETLY)
+    message(STATUS "Found the Gandiva core library: ${GANDIVA_LIB_PATH}")
+  endif ()
+else ()
+  if (NOT Gandiva_FIND_QUIETLY)
+    set(GANDIVA_ERR_MSG "Could not find the Gandiva library. Looked for headers")
+    set(GANDIVA_ERR_MSG "${GANDIVA_ERR_MSG} in ${GANDIVA_SEARCH_HEADER_PATHS}, and for libs")
+    set(GANDIVA_ERR_MSG "${GANDIVA_ERR_MSG} in ${GANDIVA_SEARCH_LIB_PATH}")
+    if (Gandiva_FIND_REQUIRED)
+      message(FATAL_ERROR "${GANDIVA_ERR_MSG}")
+    else (Gandiva_FIND_REQUIRED)
+      message(STATUS "${GANDIVA_ERR_MSG}")
+    endif (Gandiva_FIND_REQUIRED)
+  endif ()
+  set(GANDIVA_FOUND FALSE)
+endif ()
+
+mark_as_advanced(
+  GANDIVA_INCLUDE_DIR
+  GANDIVA_STATIC_LIB
+  GANDIVA_SHARED_LIB
+)
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 0aeb24b..6c227ba 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -120,34 +120,38 @@ install(
   FILES "${CMAKE_CURRENT_BINARY_DIR}/gandiva.pc"
   DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
 
-#args: label test-file src-files
-add_gandiva_unit_test(bitmap_accumulator_test.cc bitmap_accumulator.cc)
-add_gandiva_unit_test(engine_llvm_test.cc engine.cc llvm_types.cc configuration.cc
-    gdv_function_stubs.cc context_helper.cc to_date_holder.cc date_utils.cc
-    exported_funcs_registry.cc ${BC_FILE_PATH_CC})
-add_gandiva_unit_test(function_signature_test.cc function_signature.cc)
-add_gandiva_unit_test(function_registry_test.cc function_registry.cc function_signature.cc)
-add_gandiva_unit_test(llvm_types_test.cc llvm_types.cc)
-add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc regex_util.cc engine.cc
-    llvm_types.cc expr_decomposer.cc function_registry.cc annotator.cc
-    bitmap_accumulator.cc configuration.cc  function_signature.cc like_holder.cc
-    to_date_holder.cc date_utils.cc regex_util.cc gdv_function_stubs.cc context_helper.cc
-    exported_funcs_registry.cc ${BC_FILE_PATH_CC})
-add_gandiva_unit_test(annotator_test.cc annotator.cc function_signature.cc)
-add_gandiva_unit_test(tree_expr_test.cc tree_expr_builder.cc expr_decomposer.cc annotator.cc function_registry.cc function_signature.cc like_holder.cc regex_util.cc to_date_holder.cc date_utils.cc)
-add_gandiva_unit_test(expr_decomposer_test.cc expr_decomposer.cc tree_expr_builder.cc annotator.cc function_registry.cc function_signature.cc like_holder.cc regex_util.cc to_date_holder.cc date_utils.cc)
-add_gandiva_unit_test(expression_registry_test.cc llvm_types.cc expression_registry.cc function_signature.cc function_registry.cc)
-add_gandiva_unit_test(selection_vector_test.cc selection_vector.cc)
-add_gandiva_unit_test(lru_cache_test.cc)
-add_gandiva_unit_test(to_date_holder_test.cc to_date_holder.cc date_utils.cc)
-add_gandiva_unit_test(simple_arena_test.cc)
+if (ARROW_GANDIVA_BUILD_TESTS)
+  #args: label test-file src-files
+  add_gandiva_unit_test(bitmap_accumulator_test.cc bitmap_accumulator.cc)
+  add_gandiva_unit_test(engine_llvm_test.cc engine.cc llvm_types.cc configuration.cc
+      gdv_function_stubs.cc context_helper.cc to_date_holder.cc date_utils.cc
+      exported_funcs_registry.cc ${BC_FILE_PATH_CC})
+  add_gandiva_unit_test(function_signature_test.cc function_signature.cc)
+  add_gandiva_unit_test(function_registry_test.cc function_registry.cc function_signature.cc)
+  add_gandiva_unit_test(llvm_types_test.cc llvm_types.cc)
+  add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc regex_util.cc engine.cc
+      llvm_types.cc expr_decomposer.cc function_registry.cc annotator.cc
+      bitmap_accumulator.cc configuration.cc  function_signature.cc like_holder.cc
+      to_date_holder.cc date_utils.cc regex_util.cc gdv_function_stubs.cc context_helper.cc
+      exported_funcs_registry.cc ${BC_FILE_PATH_CC})
+  add_gandiva_unit_test(annotator_test.cc annotator.cc function_signature.cc)
+  add_gandiva_unit_test(tree_expr_test.cc tree_expr_builder.cc expr_decomposer.cc annotator.cc function_registry.cc function_signature.cc like_holder.cc regex_util.cc to_date_holder.cc date_utils.cc)
+  add_gandiva_unit_test(expr_decomposer_test.cc expr_decomposer.cc tree_expr_builder.cc annotator.cc function_registry.cc function_signature.cc like_holder.cc regex_util.cc to_date_holder.cc date_utils.cc)
+  add_gandiva_unit_test(expression_registry_test.cc llvm_types.cc expression_registry.cc function_signature.cc function_registry.cc)
+  add_gandiva_unit_test(selection_vector_test.cc selection_vector.cc)
+  add_gandiva_unit_test(lru_cache_test.cc)
+  add_gandiva_unit_test(to_date_holder_test.cc to_date_holder.cc date_utils.cc)
+  add_gandiva_unit_test(simple_arena_test.cc)
+endif()
 
 if (ARROW_GANDIVA_JAVA)
   add_subdirectory(jni)
 endif()
 add_subdirectory(precompiled)
 
-include(CTest)
-enable_testing()
+if (ARROW_GANDIVA_BUILD_TESTS)
+  include(CTest)
+  enable_testing()
 
-add_subdirectory(tests)
+  add_subdirectory(tests)
+endif()
diff --git a/cpp/src/gandiva/tests/generate_data.h b/cpp/src/gandiva/tests/generate_data.h
index 884c211..01665b8 100644
--- a/cpp/src/gandiva/tests/generate_data.h
+++ b/cpp/src/gandiva/tests/generate_data.h
@@ -67,7 +67,6 @@ class BoundedInt32DataGenerator : public Int32DataGenerator {
 
  protected:
   uint32_t upperBound_;
-  Random random_;
 };
 
 class Int64DataGenerator : public DataGenerator<int64_t> {
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 46d9b54..234186f 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -488,6 +488,30 @@ if (PYARROW_BUILD_ORC)
       _orc)
 endif()
 
+## Gandiva
+if (PYARROW_BUILD_GANDIVA)
+  find_package(Gandiva)
+
+  if(NOT GANDIVA_FOUND)
+    message(FATAL_ERROR "Unable to locate Gandiva libraries")
+  endif()
+
+  include_directories(SYSTEM ${GANDIVA_INCLUDE_DIR})
+
+  if (PYARROW_BUNDLE_ARROW_CPP)
+    bundle_arrow_lib(GANDIVA_SHARED_LIB
+      ABI_VERSION ${ARROW_ABI_VERSION}
+      SO_VERSION ${ARROW_SO_VERSION})
+  endif()
+
+  set(LINK_LIBS
+    ${LINK_LIBS}
+    ${GANDIVA_SHARED_LIB})
+
+  set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} gandiva)
+endif()
+
+
 ############################################################
 # Setup and build Cython modules
 ############################################################
diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
new file mode 100644
index 0000000..7bc462f
--- /dev/null
+++ b/python/pyarrow/gandiva.pyx
@@ -0,0 +1,204 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from libcpp cimport bool as c_bool, nullptr
+from libcpp.memory cimport shared_ptr, unique_ptr, make_shared
+from libcpp.string cimport string as c_string
+from libcpp.vector cimport vector as c_vector
+from libc.stdint cimport int64_t, uint8_t, uintptr_t
+
+from pyarrow.includes.libarrow cimport *
+from pyarrow.compat import frombytes
+from pyarrow.lib cimport check_status, pyarrow_wrap_array
+
+from pyarrow.includes.libgandiva cimport (CCondition, CExpression,
+                                          CNode, CProjector, CFilter,
+                                          CSelectionVector,
+                                          TreeExprBuilder_MakeExpression,
+                                          TreeExprBuilder_MakeFunction,
+                                          TreeExprBuilder_MakeLiteral,
+                                          TreeExprBuilder_MakeField,
+                                          TreeExprBuilder_MakeIf,
+                                          TreeExprBuilder_MakeCondition,
+                                          SelectionVector_MakeInt32,
+                                          Projector_Make,
+                                          Filter_Make)
+
+from pyarrow.lib cimport (Array, DataType, Field, MemoryPool,
+                          RecordBatch, Schema)
+
+cdef class Node:
+    cdef:
+        shared_ptr[CNode] node
+
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly, use the "
+                        "TreeExprBuilder API directly"
+                        .format(self.__class__.__name__))
+
+    @staticmethod
+    cdef create(shared_ptr[CNode] node):
+        cdef Node self = Node.__new__(Node)
+        self.node = node
+        return self
+
+cdef class Expression:
+    cdef:
+        shared_ptr[CExpression] expression
+
+    cdef void init(self, shared_ptr[CExpression] expression):
+        self.expression = expression
+
+cdef class Condition:
+    cdef:
+        shared_ptr[CCondition] condition
+
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly, use the "
+                        "TreeExprBuilder API instead"
+                        .format(self.__class__.__name__))
+
+    @staticmethod
+    cdef create(shared_ptr[CCondition] condition):
+        cdef Condition self = Condition.__new__(Condition)
+        self.condition = condition
+        return self
+
+cdef class SelectionVector:
+    cdef:
+        shared_ptr[CSelectionVector] selection_vector
+
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly."
+                        .format(self.__class__.__name__))
+
+    @staticmethod
+    cdef create(shared_ptr[CSelectionVector] selection_vector):
+        cdef SelectionVector self = SelectionVector.__new__(SelectionVector)
+        self.selection_vector = selection_vector
+        return self
+
+    def to_array(self):
+        cdef shared_ptr[CArray] result = self.selection_vector.get().ToArray()
+        return pyarrow_wrap_array(result)
+
+cdef class Projector:
+    cdef:
+        shared_ptr[CProjector] projector
+        MemoryPool pool
+
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly, use "
+                        "make_projector instead"
+                        .format(self.__class__.__name__))
+
+    @staticmethod
+    cdef create(shared_ptr[CProjector] projector, MemoryPool pool):
+        cdef Projector self = Projector.__new__(Projector)
+        self.projector = projector
+        self.pool = pool
+        return self
+
+    def evaluate(self, RecordBatch batch):
+        cdef vector[shared_ptr[CArray]] results
+        check_status(self.projector.get().Evaluate(
+            batch.sp_batch.get()[0], self.pool.pool, &results))
+        cdef shared_ptr[CArray] result
+        arrays = []
+        for result in results:
+            arrays.append(pyarrow_wrap_array(result))
+        return arrays
+
+cdef class Filter:
+    cdef:
+        shared_ptr[CFilter] filter
+
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly, use "
+                        "make_filter instead"
+                        .format(self.__class__.__name__))
+
+    @staticmethod
+    cdef create(shared_ptr[CFilter] filter):
+        cdef Filter self = Filter.__new__(Filter)
+        self.filter = filter
+        return self
+
+    def evaluate(self, RecordBatch batch, MemoryPool pool):
+        cdef shared_ptr[CSelectionVector] selection
+        check_status(SelectionVector_MakeInt32(
+            batch.num_rows, pool.pool, &selection))
+        check_status(self.filter.get().Evaluate(
+            batch.sp_batch.get()[0], selection))
+        return SelectionVector.create(selection)
+
+cdef class TreeExprBuilder:
+
+    def make_literal(self, value):
+        cdef shared_ptr[CNode] r = TreeExprBuilder_MakeLiteral(value)
+        return Node.create(r)
+
+    def make_expression(self, Node root_node, Field return_field):
+        cdef shared_ptr[CExpression] r = TreeExprBuilder_MakeExpression(
+            root_node.node, return_field.sp_field)
+        cdef Expression expression = Expression()
+        expression.init(r)
+        return expression
+
+    def make_function(self, name, children, DataType return_type):
+        cdef c_vector[shared_ptr[CNode]] c_children
+        cdef Node child
+        for child in children:
+            c_children.push_back(child.node)
+        cdef shared_ptr[CNode] r = TreeExprBuilder_MakeFunction(
+            name.encode(), c_children, return_type.sp_type)
+        return Node.create(r)
+
+    def make_field(self, Field field):
+        cdef shared_ptr[CNode] r = TreeExprBuilder_MakeField(field.sp_field)
+        return Node.create(r)
+
+    def make_if(self, Node condition, Node this_node,
+                Node else_node, DataType return_type):
+        cdef shared_ptr[CNode] r = TreeExprBuilder_MakeIf(
+            condition.node, this_node.node, else_node.node,
+            return_type.sp_type)
+        return Node.create(r)
+
+    def make_condition(self, Node condition):
+        cdef shared_ptr[CCondition] r = TreeExprBuilder_MakeCondition(
+            condition.node)
+        return Condition.create(r)
+
+cpdef make_projector(Schema schema, children, MemoryPool pool):
+    cdef c_vector[shared_ptr[CExpression]] c_children
+    cdef Expression child
+    for child in children:
+        c_children.push_back(child.expression)
+    cdef shared_ptr[CProjector] result
+    check_status(Projector_Make(schema.sp_schema, c_children,
+                                &result))
+    return Projector.create(result, pool)
+
+cpdef make_filter(Schema schema, Condition condition):
+    cdef shared_ptr[CFilter] result
+    check_status(Filter_Make(schema.sp_schema, condition.condition, &result))
+    return Filter.create(result)
diff --git a/python/pyarrow/includes/libgandiva.pxd b/python/pyarrow/includes/libgandiva.pxd
new file mode 100644
index 0000000..b1e45af
--- /dev/null
+++ b/python/pyarrow/includes/libgandiva.pxd
@@ -0,0 +1,107 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+
+cdef extern from "gandiva/gandiva_aliases.h" namespace "gandiva" nogil:
+
+    cdef cppclass CNode" gandiva::Node":
+        pass
+
+    cdef cppclass CExpression" gandiva::Expression":
+        pass
+
+    ctypedef vector[shared_ptr[CNode]] CNodeVector" gandiva::NodeVector"
+
+    ctypedef vector[shared_ptr[CExpression]] \
+        CExpressionVector" gandiva::ExpressionVector"
+
+cdef extern from "gandiva/selection_vector.h" namespace "gandiva" nogil:
+
+    cdef cppclass CSelectionVector" gandiva::SelectionVector":
+
+        shared_ptr[CArray] ToArray()
+
+    cdef CStatus SelectionVector_MakeInt32\
+        "gandiva::SelectionVector::MakeInt32"(
+            int max_slots, CMemoryPool* pool,
+            shared_ptr[CSelectionVector]* selection_vector)
+
+cdef extern from "gandiva/condition.h" namespace "gandiva" nogil:
+
+    cdef cppclass CCondition" gandiva::Condition":
+        pass
+
+cdef extern from "gandiva/arrow.h" namespace "gandiva" nogil:
+
+    ctypedef vector[shared_ptr[CArray]] CArrayVector" gandiva::ArrayVector"
+
+
+cdef extern from "gandiva/tree_expr_builder.h" namespace "gandiva" nogil:
+
+    cdef shared_ptr[CNode] TreeExprBuilder_MakeLiteral \
+        "gandiva::TreeExprBuilder::MakeLiteral"(double value)
+
+    cdef shared_ptr[CExpression] TreeExprBuilder_MakeExpression\
+        "gandiva::TreeExprBuilder::MakeExpression"(
+            shared_ptr[CNode] root_node, shared_ptr[CField] result_field)
+
+    cdef shared_ptr[CNode] TreeExprBuilder_MakeFunction \
+        "gandiva::TreeExprBuilder::MakeFunction"(
+            const c_string& name, const CNodeVector& children,
+            shared_ptr[CDataType] return_type)
+
+    cdef shared_ptr[CNode] TreeExprBuilder_MakeField \
+        "gandiva::TreeExprBuilder::MakeField"(shared_ptr[CField] field)
+
+    cdef shared_ptr[CNode] TreeExprBuilder_MakeIf \
+        "gandiva::TreeExprBuilder::MakeIf"(
+            shared_ptr[CNode] condition, shared_ptr[CNode] this_node,
+            shared_ptr[CNode] else_node, shared_ptr[CDataType] return_type)
+
+    cdef shared_ptr[CCondition] TreeExprBuilder_MakeCondition \
+        "gandiva::TreeExprBuilder::MakeCondition"(
+            shared_ptr[CNode] condition)
+
+    cdef CStatus Projector_Make \
+        "gandiva::Projector::Make"(
+            shared_ptr[CSchema] schema, const CExpressionVector& children,
+            shared_ptr[CProjector]* projector)
+
+cdef extern from "gandiva/projector.h" namespace "gandiva" nogil:
+
+    cdef cppclass CProjector" gandiva::Projector":
+
+        CStatus Evaluate(
+            const CRecordBatch& batch, CMemoryPool* pool,
+            const CArrayVector* output)
+
+cdef extern from "gandiva/filter.h" namespace "gandiva" nogil:
+
+    cdef cppclass CFilter" gandiva::Filter":
+
+        CStatus Evaluate(
+            const CRecordBatch& batch,
+            shared_ptr[CSelectionVector] out_selection)
+
+    cdef CStatus Filter_Make \
+        "gandiva::Filter::Make"(
+            shared_ptr[CSchema] schema, shared_ptr[CCondition] condition,
+            shared_ptr[CFilter]* filter)
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 68266c8..6cdedbb 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -24,6 +24,7 @@ except ImportError:
 
 
 groups = [
+    'gandiva',
     'hdfs',
     'large_memory',
     'orc',
@@ -35,6 +36,7 @@ groups = [
 
 
 defaults = {
+    'gandiva': False,
     'hdfs': False,
     'large_memory': False,
     'orc': False,
@@ -45,6 +47,12 @@ defaults = {
 }
 
 try:
+    import pyarrow.gandiva # noqa
+    defaults['gandiva'] = True
+except ImportError:
+    pass
+
+try:
     import pyarrow.orc # noqa
     defaults['orc'] = True
 except ImportError:
diff --git a/python/pyarrow/tests/test_gandiva.py b/python/pyarrow/tests/test_gandiva.py
new file mode 100644
index 0000000..f5874e4
--- /dev/null
+++ b/python/pyarrow/tests/test_gandiva.py
@@ -0,0 +1,100 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+
+import pyarrow as pa
+import pandas as pd
+
+
+@pytest.mark.gandiva
+def test_tree_exp_builder():
+    import pyarrow.gandiva as gandiva
+
+    builder = gandiva.TreeExprBuilder()
+
+    field_a = pa.field('a', pa.int32())
+    field_b = pa.field('b', pa.int32())
+
+    schema = pa.schema([field_a, field_b])
+
+    field_result = pa.field('res', pa.int32())
+
+    node_a = builder.make_field(field_a)
+    node_b = builder.make_field(field_b)
+
+    condition = builder.make_function("greater_than", [node_a, node_b],
+                                      pa.bool_())
+    if_node = builder.make_if(condition, node_a, node_b, pa.int32())
+
+    expr = builder.make_expression(if_node, field_result)
+
+    projector = gandiva.make_projector(
+        schema, [expr], pa.default_memory_pool())
+
+    a = pa.array([10, 12, -20, 5], type=pa.int32())
+    b = pa.array([5, 15, 15, 17], type=pa.int32())
+    e = pa.array([10, 15, 15, 17], type=pa.int32())
+    input_batch = pa.RecordBatch.from_arrays([a, b], names=['a', 'b'])
+
+    r, = projector.evaluate(input_batch)
+    assert r.equals(e)
+
+
+@pytest.mark.gandiva
+def test_table():
+    import pyarrow.gandiva as gandiva
+
+    df = pd.DataFrame({"a": [1.0, 2.0], "b": [3.0, 4.0]})
+    table = pa.Table.from_pandas(df)
+
+    builder = gandiva.TreeExprBuilder()
+    node_a = builder.make_field(table.schema.field_by_name("a"))
+    node_b = builder.make_field(table.schema.field_by_name("b"))
+
+    sum = builder.make_function("add", [node_a, node_b], pa.float64())
+
+    field_result = pa.field("c", pa.float64())
+    expr = builder.make_expression(sum, field_result)
+
+    projector = gandiva.make_projector(
+        table.schema, [expr], pa.default_memory_pool())
+
+    # TODO: Add .evaluate function which can take Tables instead of
+    # RecordBatches
+    r, = projector.evaluate(table.to_batches()[0])
+
+    e = pa.Array.from_pandas(df["a"] + df["b"])
+    assert r.equals(e)
+
+
+@pytest.mark.gandiva
+def test_filter():
+    import pyarrow.gandiva as gandiva
+
+    df = pd.DataFrame({"a": [1.0 * i for i in range(10000)]})
+    table = pa.Table.from_pandas(df)
+
+    builder = gandiva.TreeExprBuilder()
+    node_a = builder.make_field(table.schema.field_by_name("a"))
+    thousand = builder.make_literal(1000.0)
+    cond = builder.make_function("less_than", [node_a, thousand], pa.bool_())
+    condition = builder.make_condition(cond)
+
+    filter = gandiva.make_filter(table.schema, condition)
+    result = filter.evaluate(table.to_batches()[0], pa.default_memory_pool())
+    assert result.to_array().equals(pa.array(range(1000), type=pa.uint32()))
diff --git a/python/setup.py b/python/setup.py
index 359960a..e6a8871 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -106,6 +106,7 @@ class build_ext(_build_ext):
                      ('with-tensorflow', None,
                       'build pyarrow with TensorFlow support'),
                      ('with-orc', None, 'build the ORC extension'),
+                     ('with-gandiva', None, 'build the Gandiva extension'),
                      ('generate-coverage', None,
                       'enable Cython code coverage'),
                      ('bundle-boost', None,
@@ -147,6 +148,8 @@ class build_ext(_build_ext):
             os.environ.get('PYARROW_WITH_TENSORFLOW', '0'))
         self.with_orc = strtobool(
             os.environ.get('PYARROW_WITH_ORC', '0'))
+        self.with_gandiva = strtobool(
+            os.environ.get('PYARROW_WITH_GANDIVA', '0'))
         self.generate_coverage = strtobool(
             os.environ.get('PYARROW_GENERATE_COVERAGE', '0'))
         self.bundle_arrow_cpp = strtobool(
@@ -155,6 +158,7 @@ class build_ext(_build_ext):
             os.environ.get('PYARROW_BUNDLE_BOOST', '0'))
 
     CYTHON_MODULE_NAMES = [
+        'gandiva',
         'lib',
         '_csv',
         '_cuda',
@@ -214,6 +218,9 @@ class build_ext(_build_ext):
             if self.with_orc:
                 cmake_options.append('-DPYARROW_BUILD_ORC=on')
 
+            if self.with_gandiva:
+                cmake_options.append('-DPYARROW_BUILD_GANDIVA=on')
+
             if len(self.cmake_cxxflags) > 0:
                 cmake_options.append('-DPYARROW_CXXFLAGS={0}'
                                      .format(self.cmake_cxxflags))
@@ -373,6 +380,8 @@ class build_ext(_build_ext):
             return True
         if name == '_cuda' and not self.with_cuda:
             return True
+        if name == 'gandiva' and not self.with_gandiva:
+            return True
         return False
 
     def _get_build_dir(self):