You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ro...@apache.org on 2018/08/08 20:06:51 UTC

[arrow] branch master updated: ARROW-2975: [Plasma] Fix TensorFlow operator compilation with pip package

This is an automated email from the ASF dual-hosted git repository.

robertnishihara pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4660833  ARROW-2975: [Plasma] Fix TensorFlow operator compilation with pip package
4660833 is described below

commit 4660833b2c5ef63a97445e304b8f72a2e0170f9c
Author: Philipp Moritz <pc...@gmail.com>
AuthorDate: Wed Aug 8 13:06:41 2018 -0700

    ARROW-2975: [Plasma] Fix TensorFlow operator compilation with pip package
    
    This uses pyarrow.get_include() and pyarrow.get_library_dirs() instead of pkg-config to build the TensorFlow operator. The latter is not available in pip packages.
    
    Author: Philipp Moritz <pc...@gmail.com>
    
    Closes #2368 from pcmoritz/fix-plasma-tf-op and squashes the following commits:
    
    41708bc <Philipp Moritz> Update plasma.py
    ca0971d <Philipp Moritz> cleanups
    349a147 <Philipp Moritz> rewrite in python and pass GOOGLE_CUDA correctly
    1688abd <Philipp Moritz> batch library dirs
    d1ce76a <Philipp Moritz> fix quoting
    0e7e75e <Philipp Moritz> quote variables
    2ddc7ff <Philipp Moritz> add plasma include file
    71aae0f <Philipp Moritz> build the tensorflow op even if pkg-config is not available
---
 python/CMakeLists.txt              |  2 ++
 python/pyarrow/plasma.py           | 22 ++++++++++++++++++---
 python/pyarrow/tensorflow/build.sh | 39 --------------------------------------
 3 files changed, 21 insertions(+), 42 deletions(-)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 306f041..f3cd6b3 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -402,6 +402,8 @@ if (PYARROW_BUILD_PLASMA)
   ADD_THIRDPARTY_LIB(libplasma
     SHARED_LIB ${PLASMA_SHARED_LIB})
 
+  file(COPY ${ARROW_INCLUDE_DIR}/plasma DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY}/include)
+
   if (PYARROW_BUNDLE_ARROW_CPP)
     bundle_arrow_lib(PLASMA_SHARED_LIB
       ABI_VERSION ${ARROW_ABI_VERSION}
diff --git a/python/pyarrow/plasma.py b/python/pyarrow/plasma.py
index bc8a35c..2f77f5c 100644
--- a/python/pyarrow/plasma.py
+++ b/python/pyarrow/plasma.py
@@ -18,8 +18,9 @@
 import contextlib
 import os
 import pyarrow as pa
-import subprocess
 import shutil
+import subprocess
+import sys
 import tempfile
 import time
 
@@ -52,8 +53,23 @@ def build_plasma_tensorflow_op():
         pass
     else:
         print("Compiling Plasma TensorFlow Op...")
-        script_path = os.path.join(pa.__path__[0], "tensorflow", "build.sh")
-        subprocess.check_call(["bash", script_path])
+        dir_path = os.path.dirname(os.path.realpath(__file__))
+        cc_path = os.path.join(dir_path, "tensorflow", "plasma_op.cc")
+        so_path = os.path.join(dir_path, "tensorflow", "plasma_op.so")
+        tf_cflags = tf.sysconfig.get_compile_flags()
+        if sys.platform == 'darwin':
+            tf_cflags = ["-undefined", "dynamic_lookup"] + tf_cflags
+        cmd = ["g++", "-std=c++11", "-g", "-shared", cc_path,
+               "-o", so_path, "-DNDEBUG", "-I" + pa.get_include()]
+        cmd += ["-L" + dir for dir in pa.get_library_dirs()]
+        cmd += ["-lplasma", "-larrow_python", "-larrow", "-fPIC"]
+        cmd += tf_cflags
+        cmd += tf.sysconfig.get_link_flags()
+        cmd += ["-O2"]
+        if tf.test.is_built_with_cuda():
+            cmd += ["-DGOOGLE_CUDA"]
+        print("Running command " + str(cmd))
+        subprocess.check_call(cmd)
         tf_plasma_op = tf.load_op_library(TF_PLASMA_OP_PATH)
 
 
diff --git a/python/pyarrow/tensorflow/build.sh b/python/pyarrow/tensorflow/build.sh
deleted file mode 100644
index 927bc20..0000000
--- a/python/pyarrow/tensorflow/build.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -ex
-
-PYARROW_TENSORFLOW_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
-
-TF_CFLAGS=$(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))')
-TF_LFLAGS=$(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))')
-
-if [ "$(uname)" == "Darwin" ]; then
-    TF_CFLAGS="-undefined dynamic_lookup ${TF_CFLAGS}"
-fi
-
-NDEBUG="-DNDEBUG"
-
-g++ -std=c++11 -g -shared $PYARROW_TENSORFLOW_DIR/plasma_op.cc -o $PYARROW_TENSORFLOW_DIR/plasma_op.so \
-    ${NDEBUG} \
-    `pkg-config --cflags --libs plasma arrow arrow-python` \
-    -fPIC \
-    ${TF_CFLAGS[@]} \
-    ${TF_LFLAGS[@]} \
-    -O2