You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2019/02/21 15:33:53 UTC
[arrow] branch master updated: ARROW-4372: [C++] Embed precompiled
bitcode in the gandiva library
This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new e8cc48b ARROW-4372: [C++] Embed precompiled bitcode in the gandiva library
e8cc48b is described below
commit e8cc48b455251f4a8b6a8174d019669fe4a92d20
Author: Krisztián Szűcs <sz...@gmail.com>
AuthorDate: Thu Feb 21 16:33:26 2019 +0100
ARROW-4372: [C++] Embed precompiled bitcode in the gandiva library
We were not running the pyarrow tests after installing the manylinux wheels, which can lead to uncaught issues, like: https://travis-ci.org/kszucs/crossbow/builds/484284104
Author: Krisztián Szűcs <sz...@gmail.com>
Closes #3484 from kszucs/manylinux_tests and squashes the following commits:
3b1da30e <Krisztián Szűcs> use sudo
c573a564 <Krisztián Szűcs> use env variables insude the container
fd5e3fea <Krisztián Szűcs> use latest docker image tag
d5531d95 <Krisztián Szűcs> test imports inside the wheel container
1aa19f18 <Krisztián Szűcs> reenable travis builds
b3994962 <Krisztián Szűcs> test py27mu and py36m wheels
71233c7d <Krisztián Szűcs> test 2.7,16 wheel
2372f3d7 <Krisztián Szűcs> fix requirements path; disable other CI tests
3e4ec2a2 <Krisztián Szűcs> unterminated llvm:MemoryBuffer; fix check_import.py path
7c88d61b <Krisztián Szűcs> only build python 3.6 wheel
18c54883 <Krisztián Szűcs> install wheel from dist dir
0bb07a7e <Krisztián Szűcs> better bash split
54fc653b <Krisztián Szűcs> don't export
d3cb058b <Krisztián Szűcs> fix wheel building script
0d29b311 <Krisztián Szűcs> remove not existing resources from gandiva's pom
5d75adb4 <Krisztián Szűcs> initialize jni loader
09d829a4 <Krisztián Szűcs> build wheels for a single python distribution at a time; adjust travis and crossbow scripts
79abc0e8 <Krisztián Szűcs> mark .cc file as generated
af78be26 <Krisztián Szűcs> don't bundle irhelpers in the jar
a88cd370 <Krisztián Szűcs> cmake format
7deb3593 <Krisztián Szűcs> fix REGEX; remove byteCodeFilePath from java configuration object
fa195294 <Krisztián Szűcs> properly construct llvm:StringRef
5841dcdd <Krisztián Szűcs> remove commented code
42391b13 <Krisztián Szűcs> don't pass precompiled bitcode all around the constructors
d480c83c <Krisztián Szűcs> use const string ref for now
b0b11175 <Krisztián Szűcs> conda llvmdev
169f43ac <Krisztián Szűcs> build gandiva in cpp docker image
cb696251 <Krisztián Szűcs> silent maven download msgs
19200c30 <Krisztián Szűcs> don't run wheel tests twice; cmake format
f2205d01 <Krisztián Szűcs> gandiva jni
dbf5b1cf <Krisztián Szűcs> embed precompiled bitcode as char array; load precompiled IR from string
00d98e0d <Krisztián Szűcs> try to bundle bytecode files
97fe94b2 <Krisztián Szűcs> fix requirements-test.txt path
86e7e5bc <Krisztián Szűcs> run pyarrow tests in manylinux CI build
---
.travis.yml | 2 +
ci/docker_build_cpp.sh | 2 +
ci/travis_script_gandiva_java.sh | 2 +
ci/travis_script_manylinux.sh | 57 +++++--
cpp/Dockerfile | 1 +
cpp/src/gandiva/CMakeLists.txt | 19 +--
cpp/src/gandiva/configuration.cc | 5 +-
cpp/src/gandiva/configuration.h | 24 +--
cpp/src/gandiva/engine.cc | 19 ++-
cpp/src/gandiva/engine.h | 5 +-
cpp/src/gandiva/jni/config_builder.cc | 8 -
cpp/src/gandiva/jni/env_helper.h | 3 -
cpp/src/gandiva/jni/jni_common.cc | 6 -
cpp/src/gandiva/precompiled/CMakeLists.txt | 26 ++-
...c_file_path.cc.in => precompiled_bitcode.cc.in} | 7 +-
cpp/src/gandiva/tests/projector_test.cc | 15 --
cpp/src/gandiva/tests/test_util.h | 3 +-
dev/tasks/python-wheels/linux-test.sh | 4 +-
dev/tasks/python-wheels/travis.linux.yml | 6 +-
java/gandiva/pom.xml | 14 +-
.../gandiva/evaluator/ConfigurationBuilder.java | 12 +-
.../apache/arrow/gandiva/evaluator/JniLoader.java | 21 +--
python/manylinux1/build_arrow.sh | 181 ++++++++++-----------
23 files changed, 207 insertions(+), 235 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index 60c4446..7458ab6 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -225,6 +225,8 @@ matrix:
- $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6
- name: "[manylinux1] Python"
language: cpp
+ env:
+ - PYTHON_VERSIONS="2.7,32 3.6,16"
before_script:
- if [ $ARROW_CI_PYTHON_AFFECTED == "1" ]; then docker pull quay.io/xhochy/arrow_manylinux1_x86_64_base:llvm-7-manylinux1; fi
script:
diff --git a/ci/docker_build_cpp.sh b/ci/docker_build_cpp.sh
index 450dc87..5bf6094 100755
--- a/ci/docker_build_cpp.sh
+++ b/ci/docker_build_cpp.sh
@@ -34,6 +34,8 @@ cmake -GNinja \
-DARROW_PARQUET=${ARROW_PARQUET:-ON} \
-DARROW_HDFS=${ARROW_HDFS:-OFF} \
-DARROW_PYTHON=${ARROW_PYTHON:-OFF} \
+ -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \
+ -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA:-OFF} \
-DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \
-DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \
-DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \
diff --git a/ci/travis_script_gandiva_java.sh b/ci/travis_script_gandiva_java.sh
index 387be9a..9d5871f 100755
--- a/ci/travis_script_gandiva_java.sh
+++ b/ci/travis_script_gandiva_java.sh
@@ -22,6 +22,8 @@ set -e
source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
JAVA_DIR=${TRAVIS_BUILD_DIR}/java
+export MAVEN_OPTS="-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+
pushd $JAVA_DIR
# build with gandiva profile
diff --git a/ci/travis_script_manylinux.sh b/ci/travis_script_manylinux.sh
index 8e8086a..fa02c82 100755
--- a/ci/travis_script_manylinux.sh
+++ b/ci/travis_script_manylinux.sh
@@ -20,23 +20,58 @@
set -ex
-pushd python/manylinux1
-docker run --shm-size=2g --rm -e PYARROW_PARALLEL=3 -v $PWD:/io -v $PWD/../../:/arrow quay.io/xhochy/arrow_manylinux1_x86_64_base:llvm-7-manylinux1 /io/build_arrow.sh
-
# Testing for https://issues.apache.org/jira/browse/ARROW-2657
# These tests cannot be run inside of the docker container, since TensorFlow
# does not run on manylinux1
source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
-
source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh
-PYTHON_VERSION=3.6
-CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION
+pushd python/manylinux1
+
+cat << EOF > check_imports.py
+import sys
+import pyarrow
+import pyarrow.orc
+import pyarrow.parquet
+import pyarrow.plasma
+import tensorflow
+
+if sys.version_info.major > 2:
+ import pyarrow.gandiva
+EOF
+
+for PYTHON_TUPLE in ${PYTHON_VERSIONS}; do
+ IFS="," read PYTHON_VERSION UNICODE_WIDTH <<< $PYTHON_TUPLE
+
+ # cleanup the artifact directory, docker writes it as root
+ sudo rm -rf dist
+
+ # build the wheels
+ docker run --shm-size=2g --rm \
+ -e PYARROW_PARALLEL=3 \
+ -e PYTHON_VERSION=$PYTHON_VERSION \
+ -e UNICODE_WIDTH=$UNICODE_WIDTH \
+ -v $PWD:/io \
+ -v $PWD/../../:/arrow \
+ quay.io/xhochy/arrow_manylinux1_x86_64_base:latest \
+ /io/build_arrow.sh
+
+ # create a testing conda environment
+ CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION
+ conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION
+ conda activate $CONDA_ENV_DIR
+
+ # install the produced wheels
+ pip install tensorflow
+ pip install dist/*.whl
+
+ # Test optional dependencies and the presence of tensorflow
+ python check_imports.py
-conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION
-conda activate $CONDA_ENV_DIR
+ # Install test dependencies and run pyarrow tests
+ pip install -r ../requirements-test.txt
+ pytest --pyargs pyarrow
+done
-pip install -q tensorflow
-pip install "dist/`ls dist/ | grep cp36`"
-python -c "import pyarrow; import tensorflow"
+popd
diff --git a/cpp/Dockerfile b/cpp/Dockerfile
index 17d332d..b570bf6 100644
--- a/cpp/Dockerfile
+++ b/cpp/Dockerfile
@@ -48,6 +48,7 @@ RUN arrow/ci/docker_install_conda.sh && \
ENV CC=gcc \
CXX=g++ \
+ ARROW_GANDIVA=OFF \
ARROW_BUILD_TESTS=ON \
ARROW_BUILD_TOOLCHAIN=$CONDA_PREFIX \
ARROW_HOME=$CONDA_PREFIX \
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 068a21e..9d03ef1 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -27,17 +27,14 @@ add_dependencies(gandiva-all gandiva gandiva-tests gandiva-benchmarks)
find_package(LLVM)
-# Set the path where the byte-code files will be installed.
-set(GANDIVA_BC_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/gandiva)
+# Set the path where the bitcode file generated, see precompiled/CMakeLists.txt
+set(GANDIVA_PRECOMPILED_BC_PATH "${CMAKE_CURRENT_BINARY_DIR}/irhelpers.bc")
+set(GANDIVA_PRECOMPILED_CC_PATH "${CMAKE_CURRENT_BINARY_DIR}/precompiled_bitcode.cc")
+set(GANDIVA_PRECOMPILED_CC_IN_PATH
+ "${CMAKE_CURRENT_SOURCE_DIR}/precompiled_bitcode.cc.in")
-set(GANDIVA_BC_FILE_NAME irhelpers.bc)
-set(GANDIVA_BC_INSTALL_PATH "${GANDIVA_BC_INSTALL_DIR}/${GANDIVA_BC_FILE_NAME}")
-set(GANDIVA_BC_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${GANDIVA_BC_FILE_NAME}")
-install(FILES ${GANDIVA_BC_OUTPUT_PATH} DESTINATION ${GANDIVA_BC_INSTALL_DIR})
-
-set(BC_FILE_PATH_CC "${CMAKE_CURRENT_BINARY_DIR}/bc_file_path.cc")
-configure_file(bc_file_path.cc.in ${BC_FILE_PATH_CC})
-add_definitions(-DGANDIVA_BYTE_COMPILE_FILE_PATH="${GANDIVA_BC_OUTPUT_PATH}")
+# add_arrow_lib will look for this not yet existing file, so flag as generated
+set_source_files_properties(${GANDIVA_PRECOMPILED_CC_PATH} PROPERTIES GENERATED TRUE)
set(SRC_FILES
annotator.cc
@@ -73,7 +70,7 @@ set(SRC_FILES
selection_vector.cc
tree_expr_builder.cc
to_date_holder.cc
- ${BC_FILE_PATH_CC})
+ ${GANDIVA_PRECOMPILED_CC_PATH})
set(GANDIVA_SHARED_PRIVATE_LINK_LIBS arrow_shared LLVM::LLVM_INTERFACE ${RE2_LIBRARY})
diff --git a/cpp/src/gandiva/configuration.cc b/cpp/src/gandiva/configuration.cc
index df1a13d..0c6a33f 100644
--- a/cpp/src/gandiva/configuration.cc
+++ b/cpp/src/gandiva/configuration.cc
@@ -25,12 +25,11 @@ const std::shared_ptr<Configuration> ConfigurationBuilder::default_configuration
InitDefaultConfig();
std::size_t Configuration::Hash() const {
- boost::hash<std::string> string_hash;
- return string_hash(byte_code_file_path_);
+ return 0; // dummy for now, no configuration properties yet
}
bool Configuration::operator==(const Configuration& other) const {
- return other.byte_code_file_path() == byte_code_file_path();
+ return true; // always true, no configuration properties yet
}
bool Configuration::operator!=(const Configuration& other) const {
diff --git a/cpp/src/gandiva/configuration.h b/cpp/src/gandiva/configuration.h
index 480a95e..7fb0d63 100644
--- a/cpp/src/gandiva/configuration.h
+++ b/cpp/src/gandiva/configuration.h
@@ -26,9 +26,6 @@
namespace gandiva {
-GANDIVA_EXPORT
-extern const char kByteCodeFilePath[];
-
class ConfigurationBuilder;
/// \brief runtime config for gandiva
///
@@ -38,17 +35,9 @@ class GANDIVA_EXPORT Configuration {
public:
friend class ConfigurationBuilder;
- const std::string& byte_code_file_path() const { return byte_code_file_path_; }
-
std::size_t Hash() const;
bool operator==(const Configuration& other) const;
bool operator!=(const Configuration& other) const;
-
- private:
- explicit Configuration(const std::string& byte_code_file_path)
- : byte_code_file_path_(byte_code_file_path) {}
-
- const std::string byte_code_file_path_;
};
/// \brief configuration builder for gandiva
@@ -57,15 +46,8 @@ class GANDIVA_EXPORT Configuration {
/// to override specific values and build a custom instance
class GANDIVA_EXPORT ConfigurationBuilder {
public:
- ConfigurationBuilder() : byte_code_file_path_(kByteCodeFilePath) {}
-
- ConfigurationBuilder& set_byte_code_file_path(const std::string& byte_code_file_path) {
- byte_code_file_path_ = byte_code_file_path;
- return *this;
- }
-
std::shared_ptr<Configuration> build() {
- std::shared_ptr<Configuration> configuration(new Configuration(byte_code_file_path_));
+ std::shared_ptr<Configuration> configuration(new Configuration());
return configuration;
}
@@ -74,10 +56,8 @@ class GANDIVA_EXPORT ConfigurationBuilder {
}
private:
- std::string byte_code_file_path_;
-
static std::shared_ptr<Configuration> InitDefaultConfig() {
- std::shared_ptr<Configuration> configuration(new Configuration(kByteCodeFilePath));
+ std::shared_ptr<Configuration> configuration(new Configuration());
return configuration;
}
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index c816ec7..22805bb 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -66,6 +66,9 @@
namespace gandiva {
+extern const char kPrecompiledBitcode[];
+extern const size_t kPrecompiledBitcodeSize;
+
std::once_flag init_once_flag;
bool Engine::init_once_done_ = false;
@@ -114,7 +117,7 @@ Status Engine::Make(std::shared_ptr<Configuration> config,
// Add mappings for functions that can be accessed from LLVM/IR module.
engine_obj->AddGlobalMappings();
- auto status = engine_obj->LoadPreCompiledIRFiles(config->byte_code_file_path());
+ auto status = engine_obj->LoadPreCompiledIR();
ARROW_RETURN_NOT_OK(status);
// Add decimal functions
@@ -126,14 +129,16 @@ Status Engine::Make(std::shared_ptr<Configuration> config,
}
// Handling for pre-compiled IR libraries.
-Status Engine::LoadPreCompiledIRFiles(const std::string& byte_code_file_path) {
+Status Engine::LoadPreCompiledIR() {
+ auto bitcode = llvm::StringRef(kPrecompiledBitcode, kPrecompiledBitcodeSize);
+
/// Read from file into memory buffer.
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer_or_error =
- llvm::MemoryBuffer::getFile(byte_code_file_path);
- ARROW_RETURN_IF(
- !buffer_or_error,
- Status::CodeGenError("Could not load module from IR ", byte_code_file_path, ": ",
- buffer_or_error.getError().message()));
+ llvm::MemoryBuffer::getMemBuffer(bitcode, "precompiled", false);
+
+ ARROW_RETURN_IF(!buffer_or_error,
+ Status::CodeGenError("Could not load module from IR: ",
+ buffer_or_error.getError().message()));
std::unique_ptr<llvm::MemoryBuffer> buffer = move(buffer_or_error.get());
diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h
index 7a976d5..3b46f62 100644
--- a/cpp/src/gandiva/engine.h
+++ b/cpp/src/gandiva/engine.h
@@ -79,8 +79,9 @@ class GANDIVA_EXPORT Engine {
llvm::ExecutionEngine& execution_engine() { return *execution_engine_.get(); }
- /// load pre-compiled IR modules and merge them into the main module.
- Status LoadPreCompiledIRFiles(const std::string& byte_code_file_path);
+ /// load pre-compiled IR modules from precompiled_bitcode.cc and merge them into
+ /// the main module.
+ Status LoadPreCompiledIR();
// Create and add mappings for cpp functions that can be accessed from LLVM.
void AddGlobalMappings();
diff --git a/cpp/src/gandiva/jni/config_builder.cc b/cpp/src/gandiva/jni/config_builder.cc
index b224279..f2694bb 100644
--- a/cpp/src/gandiva/jni/config_builder.cc
+++ b/cpp/src/gandiva/jni/config_builder.cc
@@ -34,16 +34,8 @@ using gandiva::ConfigurationBuilder;
JNIEXPORT jlong JNICALL
Java_org_apache_arrow_gandiva_evaluator_ConfigurationBuilder_buildConfigInstance(
JNIEnv* env, jobject configuration) {
- jstring byte_code_file_path =
- (jstring)env->CallObjectMethod(configuration, byte_code_accessor_method_id_, 0);
ConfigurationBuilder configuration_builder;
- if (byte_code_file_path != nullptr) {
- const char* byte_code_file_path_cpp = env->GetStringUTFChars(byte_code_file_path, 0);
- configuration_builder.set_byte_code_file_path(byte_code_file_path_cpp);
- env->ReleaseStringUTFChars(byte_code_file_path, byte_code_file_path_cpp);
- }
std::shared_ptr<Configuration> config = configuration_builder.build();
- env->DeleteLocalRef(byte_code_file_path);
return ConfigHolder::MapInsert(config);
}
diff --git a/cpp/src/gandiva/jni/env_helper.h b/cpp/src/gandiva/jni/env_helper.h
index c1bfbaa..1b56304 100644
--- a/cpp/src/gandiva/jni/env_helper.h
+++ b/cpp/src/gandiva/jni/env_helper.h
@@ -23,7 +23,4 @@
// class references
extern jclass configuration_builder_class_;
-// method references
-extern jmethodID byte_code_accessor_method_id_;
-
#endif // ENV_HELPER_H
diff --git a/cpp/src/gandiva/jni/jni_common.cc b/cpp/src/gandiva/jni/jni_common.cc
index 339b0cd..1ba3aad 100644
--- a/cpp/src/gandiva/jni/jni_common.cc
+++ b/cpp/src/gandiva/jni/jni_common.cc
@@ -68,7 +68,6 @@ static jint JNI_VERSION = JNI_VERSION_1_6;
// extern refs - initialized for other modules.
jclass configuration_builder_class_;
-jmethodID byte_code_accessor_method_id_;
// refs for self.
static jclass gandiva_exception_;
@@ -93,11 +92,6 @@ jint JNI_OnLoad(JavaVM* vm, void* reserved) {
gandiva_exception_ = (jclass)env->NewGlobalRef(localExceptionClass);
env->DeleteLocalRef(localExceptionClass);
- const char method_name[] = "getByteCodeFilePath";
- const char return_type[] = "()Ljava/lang/String;";
- byte_code_accessor_method_id_ =
- env->GetMethodID(configuration_builder_class_, method_name, return_type);
-
env->ExceptionDescribe();
return JNI_VERSION;
diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt
index 8e2f498..1249000 100644
--- a/cpp/src/gandiva/precompiled/CMakeLists.txt
+++ b/cpp/src/gandiva/precompiled/CMakeLists.txt
@@ -67,12 +67,32 @@ foreach(SRC_FILE ${PRECOMPILED_SRCS})
endforeach()
# link all of the bitcode files into a single bitcode file.
-add_custom_command(OUTPUT ${GANDIVA_BC_OUTPUT_PATH}
- COMMAND ${LLVM_LINK_EXECUTABLE} -o ${GANDIVA_BC_OUTPUT_PATH}
+add_custom_command(OUTPUT ${GANDIVA_PRECOMPILED_BC_PATH}
+ COMMAND ${LLVM_LINK_EXECUTABLE} -o ${GANDIVA_PRECOMPILED_BC_PATH}
${BC_FILES}
DEPENDS ${BC_FILES})
-add_custom_target(precompiled ALL DEPENDS ${GANDIVA_BC_OUTPUT_PATH})
+# write a cmake script to replace precompiled bitcode file's content into a .cc file
+file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/WritePrecompiledCC.cmake" "\
+ file(READ \"${GANDIVA_PRECOMPILED_BC_PATH}\" HEXCONTENT HEX)
+ string(REGEX REPLACE
+ \"([0-9a-f][0-9a-f])\"
+ \"'\\\\\\\\x\\\\1',\"
+ GANDIVA_PRECOMPILED_BITCODE_CHARS
+ \"\${HEXCONTENT}\")
+ configure_file(${GANDIVA_PRECOMPILED_CC_IN_PATH}
+ ${GANDIVA_PRECOMPILED_CC_PATH})
+")
+
+# add the previous command to the execution chain
+add_custom_command(OUTPUT ${GANDIVA_PRECOMPILED_CC_PATH}
+ COMMAND ${CMAKE_COMMAND} -P
+ "${CMAKE_CURRENT_BINARY_DIR}/WritePrecompiledCC.cmake"
+ DEPENDS ${GANDIVA_PRECOMPILED_CC_IN_PATH}
+ ${GANDIVA_PRECOMPILED_BC_PATH})
+
+add_custom_target(precompiled ALL
+ DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH} ${GANDIVA_PRECOMPILED_CC_PATH})
function(add_precompiled_unit_test REL_TEST_NAME)
get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE)
diff --git a/cpp/src/gandiva/bc_file_path.cc.in b/cpp/src/gandiva/precompiled_bitcode.cc.in
similarity index 79%
rename from cpp/src/gandiva/bc_file_path.cc.in
rename to cpp/src/gandiva/precompiled_bitcode.cc.in
index 54e81ca..1d5985a 100644
--- a/cpp/src/gandiva/bc_file_path.cc.in
+++ b/cpp/src/gandiva/precompiled_bitcode.cc.in
@@ -15,9 +15,12 @@
// specific language governing permissions and limitations
// under the License.
+#include <string>
+
namespace gandiva {
-// Path to the byte-code file.
-extern const char kByteCodeFilePath[] = "${GANDIVA_BC_INSTALL_PATH}";
+// Content of precompiled bitcode file.
+extern const char kPrecompiledBitcode[] = { ${GANDIVA_PRECOMPILED_BITCODE_CHARS} };
+extern const size_t kPrecompiledBitcodeSize = sizeof(kPrecompiledBitcode);
} // namespace gandiva
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index ba0e632..8c0b9dd 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -84,21 +84,6 @@ TEST_F(TestProjector, TestProjectCache) {
&cached_projector);
ASSERT_OK(status);
EXPECT_EQ(cached_projector, projector);
-
- // if configuration is different, should return a new projector.
-
- // build a new path by replacing the first '/' with '//'
- std::string alt_path(GANDIVA_BYTE_COMPILE_FILE_PATH);
- auto pos = alt_path.find('/', 0);
- EXPECT_NE(pos, std::string::npos);
- alt_path.replace(pos, 1, "//");
- auto other_configuration =
- ConfigurationBuilder().set_byte_code_file_path(alt_path).build();
- std::shared_ptr<Projector> should_be_new_projector2;
- status = Projector::Make(schema, {sum_expr, sub_expr}, other_configuration,
- &should_be_new_projector2);
- ASSERT_OK(status);
- EXPECT_NE(projector, should_be_new_projector2);
}
TEST_F(TestProjector, TestProjectCacheFieldNames) {
diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h
index af16412..e841e6a 100644
--- a/cpp/src/gandiva/tests/test_util.h
+++ b/cpp/src/gandiva/tests/test_util.h
@@ -89,8 +89,7 @@ static ArrayPtr MakeArrowTypeArray(const std::shared_ptr<arrow::DataType>& type,
std::shared_ptr<Configuration> TestConfiguration() {
auto builder = ConfigurationBuilder();
- builder.set_byte_code_file_path(GANDIVA_BYTE_COMPILE_FILE_PATH);
- return builder.build();
+ return builder.DefaultConfiguration();
}
} // namespace gandiva
diff --git a/dev/tasks/python-wheels/linux-test.sh b/dev/tasks/python-wheels/linux-test.sh
index 45efdb0..dcd77eb 100755
--- a/dev/tasks/python-wheels/linux-test.sh
+++ b/dev/tasks/python-wheels/linux-test.sh
@@ -24,17 +24,17 @@ pip install /arrow/python/manylinux1/dist/*.whl
python --version
# Test optional dependencies
-command="
+python -c "
import sys
import pyarrow
import pyarrow.orc
import pyarrow.parquet
import pyarrow.plasma
+import tensorflow
if sys.version_info.major > 2:
import pyarrow.gandiva
"
-python -c "$command"
# Run pyarrow tests
pip install -r /arrow/python/requirements-test.txt
diff --git a/dev/tasks/python-wheels/travis.linux.yml b/dev/tasks/python-wheels/travis.linux.yml
index b5cbc65..240c523 100644
--- a/dev/tasks/python-wheels/travis.linux.yml
+++ b/dev/tasks/python-wheels/travis.linux.yml
@@ -41,10 +41,12 @@ script:
- pushd arrow/python/manylinux1
- docker run --shm-size=2g
-e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }}
- -e PYTHON_VERSIONS="{{ python_version }},{{ unicode_width }}"
+ -e PYTHON_VERSION="{{ python_version }}"
+ -e UNICODE_WIDTH="{{ unicode_width }}"
-v $PWD:/io
-v $PWD/../../:/arrow
- quay.io/xhochy/arrow_manylinux1_x86_64_base:latest /io/build_arrow.sh
+ quay.io/xhochy/arrow_manylinux1_x86_64_base:llvm-7-manylinux1
+ /io/build_arrow.sh
- popd
# test on multiple distributions
diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index a090153..3469c9f 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -29,7 +29,7 @@
<protobuf.version>2.5.0</protobuf.version>
<dep.guava.version>18.0</dep.guava.version>
<checkstyle.failOnViolation>true</checkstyle.failOnViolation>
- <gandiva.cpp.build.dir>../../cpp/debug/debug</gandiva.cpp.build.dir>
+ <gandiva.cpp.build.dir>../../cpp/build/debug</gandiva.cpp.build.dir>
</properties>
<dependencies>
<dependency>
@@ -126,18 +126,6 @@
<include>**/libgandiva_jni.*</include>
</includes>
</resource>
- <resource>
- <directory>${gandiva.cpp.build.dir}</directory>
- <includes>
- <include>**/libgandiva_helpers.*</include>
- </includes>
- </resource>
- <resource>
- <directory>${gandiva.cpp.build.dir}/../src/gandiva</directory>
- <includes>
- <include>irhelpers.bc</include>
- </includes>
- </resource>
</resources>
<extensions>
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
index 46deee9..f0e8c0d 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
@@ -22,18 +22,8 @@ package org.apache.arrow.gandiva.evaluator;
*/
public class ConfigurationBuilder {
- private String byteCodeFilePath = "";
-
- public ConfigurationBuilder withByteCodeFilePath(final String byteCodeFilePath) {
- this.byteCodeFilePath = byteCodeFilePath;
- return this;
- }
-
- public String getByteCodeFilePath() {
- return byteCodeFilePath;
- }
-
public native long buildConfigInstance();
public native void releaseConfigInstance(long configId);
+
}
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java
index ccb5307..b6174d7 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java
@@ -32,16 +32,13 @@ import org.apache.arrow.gandiva.exceptions.GandivaException;
*/
class JniLoader {
private static final String LIBRARY_NAME = "gandiva_jni";
- private static final String IRHELPERS_BC = "irhelpers.bc";
private static volatile JniLoader INSTANCE;
private static volatile long defaultConfiguration = 0L;
- private final String byteCodeFilePath;
private final JniWrapper wrapper;
- private JniLoader(String byteCodeFilePath) {
- this.byteCodeFilePath = byteCodeFilePath;
+ private JniLoader() {
this.wrapper = new JniWrapper();
}
@@ -60,8 +57,7 @@ class JniLoader {
try {
String tempDir = System.getProperty("java.io.tmpdir");
loadGandivaLibraryFromJar(tempDir);
- File byteCodeFile = moveFileFromJarToTemp(tempDir, IRHELPERS_BC);
- return new JniLoader(byteCodeFile.getAbsolutePath());
+ return new JniLoader();
} catch (IOException ioException) {
throw new GandivaException("unable to create native instance", ioException);
}
@@ -108,13 +104,6 @@ class JniLoader {
}
/**
- * Returns the byte code file path extracted from jar.
- */
- public String getByteCodeFilePath() {
- return byteCodeFilePath;
- }
-
- /**
* Returns the jni wrapper.
*/
JniWrapper getWrapper() throws GandivaException {
@@ -130,11 +119,9 @@ class JniLoader {
if (defaultConfiguration == 0L) {
synchronized (ConfigurationBuilder.class) {
if (defaultConfiguration == 0L) {
- String defaultByteCodeFilePath = JniLoader.getInstance().getByteCodeFilePath();
-
+ JniLoader.getInstance(); // setup
defaultConfiguration = new ConfigurationBuilder()
- .withByteCodeFilePath(defaultByteCodeFilePath)
- .buildConfigInstance();
+ .buildConfigInstance();
}
}
}
diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh
index 75537a6..e0475bb 100755
--- a/python/manylinux1/build_arrow.sh
+++ b/python/manylinux1/build_arrow.sh
@@ -23,9 +23,6 @@
# Build upon the scripts in https://github.com/matthew-brett/manylinux-builds
# * Copyright (c) 2013-2016, Matt Terry and Matthew Brett (BSD 2-clause)
-# Build different python versions with various unicode widths
-PYTHON_VERSIONS="${PYTHON_VERSIONS:-2.7,16 2.7,32 3.5,16 3.6,16 3.7,16}"
-
source /multibuild/manylinux_utils.sh
# Quit on failure
@@ -48,95 +45,89 @@ export PYARROW_CMAKE_OPTIONS='-DTHRIFT_HOME=/usr -DBoost_NAMESPACE=arrow_boost -
# Ensure the target directory exists
mkdir -p /io/dist
-for PYTHON_TUPLE in ${PYTHON_VERSIONS}; do
- IFS=","
- set -- $PYTHON_TUPLE;
- PYTHON=$1
- U_WIDTH=$2
- CPYTHON_PATH="$(cpython_path $PYTHON ${U_WIDTH})"
- PYTHON_INTERPRETER="${CPYTHON_PATH}/bin/python"
- PIP="${CPYTHON_PATH}/bin/pip"
- PATH="$PATH:${CPYTHON_PATH}"
-
- if [ $PYTHON != "2.7" ]; then
- # Gandiva is not supported on Python 2.7
- export PYARROW_WITH_GANDIVA=1
- export BUILD_ARROW_GANDIVA=ON
- else
- export PYARROW_WITH_GANDIVA=0
- export BUILD_ARROW_GANDIVA=OFF
- fi
-
- echo "=== (${PYTHON}) Building Arrow C++ libraries ==="
- ARROW_BUILD_DIR=/tmp/build-PY${PYTHON}-${U_WIDTH}
- mkdir -p "${ARROW_BUILD_DIR}"
- pushd "${ARROW_BUILD_DIR}"
- PATH="${CPYTHON_PATH}/bin:$PATH" cmake -DCMAKE_BUILD_TYPE=Release \
- -DCMAKE_INSTALL_PREFIX=/arrow-dist \
- -DCMAKE_INSTALL_LIBDIR=lib \
- -DARROW_BUILD_TESTS=OFF \
- -DARROW_BUILD_SHARED=ON \
- -DARROW_BOOST_USE_SHARED=ON \
- -DARROW_GANDIVA_PC_CXX_FLAGS="-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2;-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2/x86_64-CentOS-linux/" \
- -DARROW_JEMALLOC=ON \
- -DARROW_RPATH_ORIGIN=ON \
- -DARROW_PYTHON=ON \
- -DARROW_PARQUET=ON \
- -DPythonInterp_FIND_VERSION=${PYTHON} \
- -DARROW_PLASMA=ON \
- -DARROW_TENSORFLOW=ON \
- -DARROW_ORC=ON \
- -DARROW_GANDIVA=${BUILD_ARROW_GANDIVA} \
- -DARROW_GANDIVA_JAVA=OFF \
- -DBoost_NAMESPACE=arrow_boost \
- -DBOOST_ROOT=/arrow_boost_dist \
- -GNinja /arrow/cpp
- ninja install
- popd
-
- # Check that we don't expose any unwanted symbols
- /io/scripts/check_arrow_visibility.sh
-
- echo "=== (${PYTHON}) Install the wheel build dependencies ==="
- $PIP install -r requirements-wheel.txt
-
- # Clear output directory
- rm -rf dist/
- echo "=== (${PYTHON}) Building wheel ==="
- # Remove build directory to ensure CMake gets a clean run
- rm -rf build/
- PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py build_ext \
- --inplace \
- --bundle-arrow-cpp \
- --bundle-boost \
- --boost-namespace=arrow_boost
- PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py bdist_wheel
- PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py sdist
-
- echo "=== (${PYTHON}) Tag the wheel with manylinux1 ==="
- mkdir -p repaired_wheels/
- auditwheel -v repair -L . dist/pyarrow-*.whl -w repaired_wheels/
-
- echo "=== (${PYTHON}) Testing manylinux1 wheel ==="
- source /venv-test-${PYTHON}-${U_WIDTH}/bin/activate
- pip install repaired_wheels/*.whl
-
- if [ $PYTHON != "2.7" ]; then
- PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.gandiva"
- fi
- PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.orc"
- PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.parquet"
- PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.plasma"
-
- echo "=== (${PYTHON}) Install modules required for testing ==="
- pip install -r requirements-test.txt
-
- # The TensorFlow test will be skipped here, since TensorFlow is not
- # manylinux1 compatible; however, the wheels will support TensorFlow on
- # a TensorFlow compatible system
- py.test -v -r sxX --durations=15 --parquet ${VIRTUAL_ENV}/lib/*/site-packages/pyarrow
- deactivate
-
- mv repaired_wheels/*.whl /io/dist
- mv dist/*.tar.gz /io/dist
-done
+# Must pass PYTHON_VERSION and UNICODE_WIDTH env variables
+# possible values are: 2.7,16 2.7,32 3.5,16 3.6,16 3.7,16
+
+CPYTHON_PATH="$(cpython_path ${PYTHON_VERSION} ${UNICODE_WIDTH})"
+PYTHON_INTERPRETER="${CPYTHON_PATH}/bin/python"
+PIP="${CPYTHON_PATH}/bin/pip"
+PATH="${PATH}:${CPYTHON_PATH}"
+
+if [ ${PYTHON_VERSION} != "2.7" ]; then
+ # Gandiva is not supported on Python 2.7
+ export PYARROW_WITH_GANDIVA=1
+ export BUILD_ARROW_GANDIVA=ON
+else
+ export PYARROW_WITH_GANDIVA=0
+ export BUILD_ARROW_GANDIVA=OFF
+fi
+
+echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
+ARROW_BUILD_DIR=/tmp/build-PY${PYTHON_VERSION}-${UNICODE_WIDTH}
+mkdir -p "${ARROW_BUILD_DIR}"
+pushd "${ARROW_BUILD_DIR}"
+PATH="${CPYTHON_PATH}/bin:${PATH}" cmake -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_INSTALL_PREFIX=/arrow-dist \
+ -DCMAKE_INSTALL_LIBDIR=lib \
+ -DARROW_BUILD_TESTS=OFF \
+ -DARROW_BUILD_SHARED=ON \
+ -DARROW_BOOST_USE_SHARED=ON \
+ -DARROW_GANDIVA_PC_CXX_FLAGS="-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2;-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2/x86_64-CentOS-linux/" \
+ -DARROW_JEMALLOC=ON \
+ -DARROW_RPATH_ORIGIN=ON \
+ -DARROW_PYTHON=ON \
+ -DARROW_PARQUET=ON \
+ -DPythonInterp_FIND_VERSION=${PYTHON_VERSION} \
+ -DARROW_PLASMA=ON \
+ -DARROW_TENSORFLOW=ON \
+ -DARROW_ORC=ON \
+ -DARROW_GANDIVA=${BUILD_ARROW_GANDIVA} \
+ -DARROW_GANDIVA_JAVA=OFF \
+ -DBoost_NAMESPACE=arrow_boost \
+ -DBOOST_ROOT=/arrow_boost_dist \
+ -GNinja /arrow/cpp
+ninja install
+popd
+
+# Check that we don't expose any unwanted symbols
+/io/scripts/check_arrow_visibility.sh
+
+echo "=== (${PYTHON_VERSION}) Install the wheel build dependencies ==="
+$PIP install -r requirements-wheel.txt
+
+# Clear output directory
+rm -rf dist/
+echo "=== (${PYTHON_VERSION}) Building wheel ==="
+# Remove build directory to ensure CMake gets a clean run
+rm -rf build/
+PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py build_ext \
+ --inplace \
+ --bundle-arrow-cpp \
+ --bundle-boost \
+ --boost-namespace=arrow_boost
+PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py bdist_wheel
+PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py sdist
+
+echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux1 ==="
+mkdir -p repaired_wheels/
+auditwheel -v repair -L . dist/pyarrow-*.whl -w repaired_wheels/
+
+# Install the built wheels
+$PIP install repaired_wheels/*.whl
+
+# Test that the modules are importable
+$PYTHON_INTERPRETER -c "
+import sys
+import pyarrow
+import pyarrow.orc
+import pyarrow.parquet
+import pyarrow.plasma
+
+if sys.version_info.major > 2:
+ import pyarrow.gandiva
+"
+
+# More thorough testing happens outsite of the build to prevent
+# packaging issues like ARROW-4372
+mv dist/*.tar.gz /io/dist
+mv repaired_wheels/*.whl /io/dist