You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/12/19 16:07:20 UTC

[arrow] branch master updated: ARROW-3803: [C++/Python] Merge C++ builds and tests, run Python tests in separate CI entries

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new d089643  ARROW-3803: [C++/Python] Merge C++ builds and tests, run Python tests in separate CI entries
d089643 is described below

commit d08964334082e87010b37933623f021c98e8733d
Author: Wes McKinney <we...@apache.org>
AuthorDate: Wed Dec 19 10:07:13 2018 -0600

    ARROW-3803: [C++/Python] Merge C++ builds and tests, run Python tests in separate CI entries
    
    I found unfortunately that the conda-forge boost-cpp package is not fully compatible with Xcode 8.3, see https://issues.apache.org/jira/browse/ARROW-4056
    
    We might have to build a vendored Boost in this CI entry to work around the problem (this is what the Ray project did when they also hit this issue)
    
    Author: Wes McKinney <we...@apache.org>
    
    Closes #3208 from wesm/ARROW-3803 and squashes the following commits:
    
    7c47776a9 <Wes McKinney> Remove now unneeded travis_script_gandiva_cpp.sh
    9c8d6aa27 <Wes McKinney> * Combine C++ CI jobs, split Python CI jobs into separate build entries * Use gcc 4.8 * Pin boost-cpp 1.68.0 due to crashes caused by 1.69.0
---
 .travis.yml                                        | 103 ++++++++++-----------
 ci/conda_env_cpp.yml                               |   4 +-
 ci/travis_before_script_cpp.sh                     |  19 ++--
 ci/travis_env_common.sh                            |   2 +
 ci/travis_script_gandiva_cpp.sh                    |  30 ------
 ci/travis_script_python.sh                         |   6 +-
 cpp/cmake_modules/ThirdpartyToolchain.cmake        |   2 +
 cpp/src/plasma/CMakeLists.txt                      |   2 +
 .../gandiva/evaluator/MicroBenchmarkTest.java      |   2 +
 python/CMakeLists.txt                              |   1 +
 10 files changed, 77 insertions(+), 94 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 6440812..f7094fc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -62,74 +62,67 @@ matrix:
     - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
     script:
     - $TRAVIS_BUILD_DIR/ci/travis_lint.sh
-  - name: "C++ & Python w/ gcc 4.9"
+  - name: "C++ unit tests, code coverage with gcc 4.8"
     compiler: gcc
     language: cpp
     os: linux
     jdk: openjdk8
     env:
-    - ARROW_TRAVIS_USE_TOOLCHAIN=1
     - ARROW_TRAVIS_VALGRIND=1
+    - ARROW_TRAVIS_USE_TOOLCHAIN=1
     - ARROW_TRAVIS_PLASMA=1
     - ARROW_TRAVIS_ORC=1
     - ARROW_TRAVIS_COVERAGE=1
     - ARROW_TRAVIS_PARQUET=1
-    - ARROW_TRAVIS_PYTHON_DOCS=1
+    - ARROW_TRAVIS_GANDIVA=1
+    - ARROW_TRAVIS_GANDIVA_JAVA=1
     - ARROW_BUILD_WARNING_LEVEL=CHECKIN
-    - ARROW_TRAVIS_PYTHON_JVM=1
-    - ARROW_TRAVIS_JAVA_BUILD_ONLY=1
-    - ARROW_TRAVIS_PYTHON_GANDIVA=1
-    # ARROW-2999 Benchmarks are disabled in Travis CI for the time being
-    # - ARROW_TRAVIS_PYTHON_BENCHMARKS=1
-    - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
     before_script:
-    # (ARROW_CI_CPP_AFFECTED implies ARROW_CI_PYTHON_AFFECTED)
-    - if [ $ARROW_CI_PYTHON_AFFECTED != "1" ]; then exit; fi
+    - if [ $ARROW_CI_CPP_AFFECTED != "1" ] && [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
     - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
     - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
     # If either C++ or Python changed, we must install the C++ libraries
     - git submodule update --init
     - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
     script:
-    # All test steps are required for accurate C++ coverage info
-    - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
-    # Build Arrow Java to test the pyarrow<->JVM in-process bridge
-    - $TRAVIS_BUILD_DIR/ci/travis_script_java.sh
-    # Only run Plasma tests with valgrind in one of the Python builds because
-    # they are slow
-    - export PLASMA_VALGRIND=0
-    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 2.7
-    - export PLASMA_VALGRIND=1
-    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6
-    - $TRAVIS_BUILD_DIR/ci/travis_upload_cpp_coverage.sh
-  - name: "Gandiva C++ w/ gcc 4.9 and Java"
+    - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh || travis_terminate 1
+    - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_java.sh || travis_terminate 1
+    - $TRAVIS_BUILD_DIR/ci/travis_upload_cpp_coverage.sh || travis_terminate 1
+  - name: "Python 2.7 and 3.6 unit tests, coverage with gcc 4.8"
     compiler: gcc
     language: cpp
     os: linux
     jdk: openjdk8
     env:
-    - ARROW_TRAVIS_GANDIVA=1
-    - ARROW_TRAVIS_GANDIVA_JAVA=1
-    - ARROW_TRAVIS_GANDIVA_TESTS=1
-    - ARROW_TRAVIS_OPTIONAL_INSTALL=1
-    - ARROW_CPP_BUILD_TARGETS="gandiva-all"
-    - ARROW_TRAVIS_USE_TOOLCHAIN=1
+    # Valgrind is needed for the Plasma store tests
     - ARROW_TRAVIS_VALGRIND=1
+    - ARROW_TRAVIS_USE_TOOLCHAIN=1
+    - ARROW_TRAVIS_COVERAGE=1
+    - ARROW_TRAVIS_PYTHON_DOCS=1
     - ARROW_BUILD_WARNING_LEVEL=CHECKIN
-    - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
+    - ARROW_TRAVIS_PYTHON_JVM=1
+    - ARROW_TRAVIS_PYTHON_GANDIVA=1
+    - ARROW_TRAVIS_OPTIONAL_INSTALL=1
+    # TODO(wesm): Run the benchmarks outside of Travis
+    # - ARROW_TRAVIS_PYTHON_BENCHMARKS=1
     before_script:
-    # Run if something changed in CPP or Java.
-    - if [ $ARROW_CI_CPP_AFFECTED != "1" ] && [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
+    - if [ $ARROW_CI_PYTHON_AFFECTED != "1" ]; then exit; fi
     - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
     - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
-    - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
+    - $TRAVIS_BUILD_DIR/ci/travis_install_toolchain.sh
     script:
-    - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_cpp.sh
-    - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_java.sh
-  - name: "[OS X] C++ & Python w/ XCode 6.4"
+    - $TRAVIS_BUILD_DIR/ci/travis_script_java.sh
+    # Only run Plasma tests with valgrind in one of the Python builds because
+    # they are slow
+    - export PLASMA_VALGRIND=0
+    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 2.7
+    - export PLASMA_VALGRIND=1
+    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6
+    - $TRAVIS_BUILD_DIR/ci/travis_upload_cpp_coverage.sh
+  - name: "[OS X] C++ w/ XCode 8.3"
     compiler: clang
     language: cpp
-    osx_image: xcode6.4
+    osx_image: xcode8.3
     os: osx
     cache:
     addons:
@@ -138,39 +131,41 @@ matrix:
     - ARROW_TRAVIS_PLASMA=1
     - ARROW_TRAVIS_ORC=1
     - ARROW_TRAVIS_PARQUET=1
+    - ARROW_TRAVIS_GANDIVA=1
+    - ARROW_TRAVIS_GANDIVA_JAVA=1
+    - ARROW_TRAVIS_OPTIONAL_INSTALL=1
     - ARROW_BUILD_WARNING_LEVEL=CHECKIN
+    # ARROW-3803: The Xcode 8.3 image has Boost libraries in /usr/local/lib
+    # which can get loaded before the toolchain Boost libraries. These seem to
+    # get loaded even though we are modifying LD_LIBRARY_PATH. We build our own
+    # Boost and statically link to get around the issue until this can be
+    # investigated further
+    - ARROW_TRAVIS_VENDORED_BOOST=1
     before_script:
-    - if [ $ARROW_CI_PYTHON_AFFECTED != "1" ]; then exit; fi
+    - if [ $ARROW_CI_CPP_AFFECTED != "1" ] && [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
     # If either C++ or Python changed, we must install the C++ libraries
     - git submodule update --init
     - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
     script:
-    - if [ $ARROW_CI_CPP_AFFECTED == "1" ]; then $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh; fi
-    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 2.7
-    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6
-  - name: "[OS X] Gandiva C++ w/ XCode 8.3 & Java"
+    - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
+    - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_java.sh
+  - name: "[OS X] Python w/ XCode 6.4"
     compiler: clang
     language: cpp
-    # xcode 7.3 has a bug in strptime.
-    osx_image: xcode8.3
+    osx_image: xcode6.4
     os: osx
     cache:
     addons:
     env:
-    - ARROW_TRAVIS_GANDIVA=1
-    - ARROW_TRAVIS_GANDIVA_JAVA=1
-    - ARROW_TRAVIS_GANDIVA_TESTS=1
-    - ARROW_TRAVIS_OPTIONAL_INSTALL=1
-    - ARROW_CPP_BUILD_TARGETS="gandiva-all"
     - ARROW_TRAVIS_USE_TOOLCHAIN=1
     - ARROW_BUILD_WARNING_LEVEL=CHECKIN
+    - ARROW_TRAVIS_OPTIONAL_INSTALL=1
     before_script:
-    # Run if something changed in CPP or Java.
-    - if [ $ARROW_CI_CPP_AFFECTED != "1" ] && [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
-    - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh --only-library
     script:
-    - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_cpp.sh
-    - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_java.sh
+    - if [ $ARROW_CI_PYTHON_AFFECTED != "1" ]; then exit; fi
+    - $TRAVIS_BUILD_DIR/ci/travis_install_toolchain.sh
+    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 2.7
+    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6
   - name: "[manylinux1] Python"
     language: cpp
     before_script:
diff --git a/ci/conda_env_cpp.yml b/ci/conda_env_cpp.yml
index 1e22e90..87523b3 100644
--- a/ci/conda_env_cpp.yml
+++ b/ci/conda_env_cpp.yml
@@ -15,7 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-boost-cpp
+# ARROW-4056: The conda-forge boost 1.69.0 seems to break the Parquet unit
+# tests with Xcode 8.3. Root cause not yet determined
+boost-cpp=1.68.0
 brotli
 bzip2
 cmake
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index aa5b2a6..8ddc986 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -40,6 +40,14 @@ if [ "$only_library_mode" == "no" ]; then
   source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh
 fi
 
+if [ "$ARROW_TRAVIS_USE_TOOLCHAIN" == "1" ]; then
+  # Set up C++ toolchain from conda-forge packages for faster builds
+  source $TRAVIS_BUILD_DIR/ci/travis_install_toolchain.sh
+fi
+
+mkdir -p $ARROW_CPP_BUILD_DIR
+pushd $ARROW_CPP_BUILD_DIR
+
 CMAKE_COMMON_FLAGS="\
 -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL \
 -DARROW_NO_DEPRECATED_API=ON \
@@ -48,15 +56,10 @@ CMAKE_LINUX_FLAGS=""
 CMAKE_OSX_FLAGS=""
 
 if [ "$ARROW_TRAVIS_USE_TOOLCHAIN" == "1" ]; then
-  # Set up C++ toolchain from conda-forge packages for faster builds
-  source $TRAVIS_BUILD_DIR/ci/travis_install_toolchain.sh
   CMAKE_COMMON_FLAGS="${CMAKE_COMMON_FLAGS} -DARROW_JEMALLOC=ON"
   CMAKE_COMMON_FLAGS="${CMAKE_COMMON_FLAGS} -DARROW_WITH_BZ2=ON"
 fi
 
-mkdir -p $ARROW_CPP_BUILD_DIR
-pushd $ARROW_CPP_BUILD_DIR
-
 if [ $only_library_mode == "yes" ]; then
   CMAKE_COMMON_FLAGS="\
 $CMAKE_COMMON_FLAGS \
@@ -115,10 +118,14 @@ if [ $ARROW_TRAVIS_VERBOSE == "1" ]; then
   CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
 fi
 
-if [ $ARROW_TRAVIS_USE_VENDORED_BOOST == "1" ]; then
+if [ $ARROW_TRAVIS_VENDORED_BOOST == "1" ]; then
   CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_BOOST_VENDORED=ON"
 fi
 
+if [ $ARROW_TRAVIS_STATIC_BOOST == "1" ]; then
+  CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_BOOST_USE_SHARED=OFF"
+fi
+
 if [ $ARROW_TRAVIS_OPTIONAL_INSTALL == "1" ]; then
   CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_OPTIONAL_INSTALL=ON"
 fi
diff --git a/ci/travis_env_common.sh b/ci/travis_env_common.sh
index f5748b2..636a25f 100755
--- a/ci/travis_env_common.sh
+++ b/ci/travis_env_common.sh
@@ -33,6 +33,8 @@ export ARROW_RUBY_DIR=$TRAVIS_BUILD_DIR/ruby
 export ARROW_RUST_DIR=${TRAVIS_BUILD_DIR}/rust
 export ARROW_R_DIR=${TRAVIS_BUILD_DIR}/r
 
+export ARROW_TRAVIS_COVERAGE=${ARROW_TRAVIS_COVERAGE:=0}
+
 if [ "$ARROW_TRAVIS_COVERAGE" == "1" ]; then
     export ARROW_CPP_COVERAGE_FILE=${TRAVIS_BUILD_DIR}/coverage.info
     export ARROW_PYTHON_COVERAGE_FILE=${TRAVIS_BUILD_DIR}/.coverage
diff --git a/ci/travis_script_gandiva_cpp.sh b/ci/travis_script_gandiva_cpp.sh
deleted file mode 100755
index bc4a7a9..0000000
--- a/ci/travis_script_gandiva_cpp.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-
-source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
-
-pushd $CPP_BUILD_DIR
-
-PATH=$ARROW_BUILD_TYPE:$PATH ctest -j2 --output-on-failure -L gandiva-tests
-
-popd
-
-# TODO : Capture C++ coverage info
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 20ec57e..69e115a 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -87,7 +87,7 @@ rm -rf *
 # XXX Can we simply reuse CMAKE_COMMON_FLAGS from travis_before_script_cpp.sh?
 CMAKE_COMMON_FLAGS="-DARROW_EXTRA_ERROR_CONTEXT=ON"
 
-PYTHON_CPP_BUILD_TARGETS="arrow_python-all plasma"
+PYTHON_CPP_BUILD_TARGETS="arrow_python-all plasma parquet"
 
 if [ $ARROW_TRAVIS_COVERAGE == "1" ]; then
   CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_GENERATE_COVERAGE=ON"
@@ -103,6 +103,7 @@ cmake -GNinja \
       -DARROW_BUILD_TESTS=ON \
       -DARROW_BUILD_UTILITIES=OFF \
       -DARROW_OPTIONAL_INSTALL=ON \
+      -DARROW_PARQUET=on \
       -DARROW_PLASMA=on \
       -DARROW_TENSORFLOW=on \
       -DARROW_PYTHON=on \
@@ -176,12 +177,11 @@ if [ "$ARROW_TRAVIS_COVERAGE" == "1" ]; then
     coverage report -i --include="*/_parquet.pyx"
     # Generate XML file for CodeCov
     coverage xml -i -o $TRAVIS_BUILD_DIR/coverage.xml
-    # Capture C++ coverage info and combine with previous coverage file
+    # Capture C++ coverage info
     pushd $TRAVIS_BUILD_DIR
     lcov --quiet --directory . --capture --no-external --output-file coverage-python-tests.info \
         2>&1 | grep -v "WARNING: no data found for /usr/include"
     lcov --add-tracefile coverage-python-tests.info \
-        --add-tracefile $ARROW_CPP_COVERAGE_FILE \
         --output-file $ARROW_CPP_COVERAGE_FILE
     rm coverage-python-tests.info
     popd   # $TRAVIS_BUILD_DIR
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index db0b69b..3381b5c 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -346,6 +346,8 @@ if (MSVC AND ARROW_USE_STATIC_CRT)
   set(Boost_USE_STATIC_RUNTIME ON)
 endif()
 set(Boost_ADDITIONAL_VERSIONS
+  "1.70.0" "1.70"
+  "1.69.0" "1.69"
   "1.68.0" "1.68"
   "1.67.0" "1.67"
   "1.66.0" "1.66"
diff --git a/cpp/src/plasma/CMakeLists.txt b/cpp/src/plasma/CMakeLists.txt
index d9c7dca..a71acf8 100644
--- a/cpp/src/plasma/CMakeLists.txt
+++ b/cpp/src/plasma/CMakeLists.txt
@@ -96,6 +96,8 @@ ADD_ARROW_LIB(plasma
   SHARED_LINK_LIBS ${FLATBUFFERS_STATIC_LIB} ${CMAKE_THREAD_LIBS_INIT} ${PLASMA_LINK_LIBS}
   STATIC_LINK_LIBS ${FLATBUFFERS_STATIC_LIB} ${CMAKE_THREAD_LIBS_INIT} ${PLASMA_STATIC_LINK_LIBS})
 
+add_dependencies(plasma ${PLASMA_LIBRARIES})
+
 foreach(LIB_TARGET ${PLASMA_LIBRARIES})
   target_compile_definitions(${LIB_TARGET}
     PRIVATE ARROW_EXPORTING)
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java
index c4d6bd9..6934c3f 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java
@@ -26,10 +26,12 @@ import org.apache.arrow.gandiva.expression.TreeNode;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.junit.Assert;
+import org.junit.Ignore;
 import org.junit.Test;
 
 import com.google.common.collect.Lists;
 
+@Ignore
 public class MicroBenchmarkTest extends BaseEvaluatorTest {
 
   private double toleranceRatio = 4.0;
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 1a87454..a6e4123 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -138,6 +138,7 @@ if ("${COMPILER_FAMILY}" STREQUAL "clang")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constant-logical-operand")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-declarations")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sometimes-uninitialized")
 
   # We have public Cython APIs which return C++ types, which are in an extern
   # "C" blog (no symbol mangling) and clang doesn't like this