You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/12/17 21:30:43 UTC
[arrow] branch master updated: ARROW-17692: [R] Add support for building with system AWS SDK C++ (#14235)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new dca8c07de6 ARROW-17692: [R] Add support for building with system AWS SDK C++ (#14235)
dca8c07de6 is described below
commit dca8c07de6089bde7a9c2e10b42b72541a616cb6
Author: Nic Crane <th...@gmail.com>
AuthorDate: Sat Dec 17 21:30:37 2022 +0000
ARROW-17692: [R] Add support for building with system AWS SDK C++ (#14235)
This PR uses "pkg-config --static ... arrow" to collect build flags. "pkg-config --static ... arrow" reports suitable build flags that depend on build options and used libraries for Apache Arrow C++. This works with the system AWS SDK C++.
Lead-authored-by: Sutou Kouhei <ko...@clear-code.com>
Co-authored-by: Nic Crane <th...@gmail.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
ci/docker/linux-r.dockerfile | 5 +-
ci/docker/ubuntu-18.04-cpp.dockerfile | 8 ++
ci/docker/ubuntu-20.04-cpp.dockerfile | 8 ++
ci/docker/ubuntu-22.04-cpp.dockerfile | 10 +-
ci/scripts/r_docker_configure.sh | 18 +---
ci/scripts/r_install_system_dependencies.sh | 141 ++++++++++++++++++++++++++++
cpp/cmake_modules/BuildUtils.cmake | 6 +-
cpp/cmake_modules/FindzstdAlt.cmake | 30 +++++-
cpp/cmake_modules/ThirdpartyToolchain.cmake | 12 ++-
dev/tasks/conda-recipes/r-arrow/build.sh | 2 +
dev/tasks/r/github.linux.offline.build.yml | 14 ++-
dev/tasks/r/github.macos-linux.local.yml | 16 ++--
dev/tasks/r/github.packages.yml | 21 +++--
r/configure | 130 +++++++++++++------------
r/inst/build_arrow_static.sh | 2 +-
r/tests/testthat/helper-skip.R | 14 +++
r/tests/testthat/test-python.R | 2 +
17 files changed, 329 insertions(+), 110 deletions(-)
diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile
index db5e5f656d..d368a6629c 100644
--- a/ci/docker/linux-r.dockerfile
+++ b/ci/docker/linux-r.dockerfile
@@ -45,6 +45,7 @@ ENV PATH "${RPREFIX}/bin:${PATH}"
# Patch up some of the docker images
COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/
COPY ci/etc/rprofile /arrow/ci/etc/
+COPY ci/scripts/r_install_system_dependencies.sh /arrow/ci/scripts/
COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/r_docker_configure.sh
@@ -53,10 +54,6 @@ RUN /arrow/ci/scripts/r_docker_configure.sh
COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
-# Set up Python 3 and its dependencies
-RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
- ln -s /usr/bin/pip3 /usr/local/bin/pip
-
COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
COPY r/DESCRIPTION /arrow/r/
RUN /arrow/ci/scripts/r_deps.sh /arrow
diff --git a/ci/docker/ubuntu-18.04-cpp.dockerfile b/ci/docker/ubuntu-18.04-cpp.dockerfile
index 5d469e0ac6..715cc3424f 100644
--- a/ci/docker/ubuntu-18.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-18.04-cpp.dockerfile
@@ -88,11 +88,19 @@ RUN apt-get update -y -q && \
libcurl4-openssl-dev \
libgflags-dev \
libgoogle-glog-dev \
+ libidn2-dev \
+ libkrb5-dev \
+ libldap-dev \
liblz4-dev \
+ libnghttp2-dev \
libprotobuf-dev \
libprotoc-dev \
+ libpsl-dev \
libre2-dev \
+ librtmp-dev \
libsnappy-dev \
+ libssh-dev \
+ libssh2-1-dev \
libssl-dev \
ninja-build \
pkg-config \
diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile
index 1cd0581aa4..6cf48c56aa 100644
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -82,12 +82,20 @@ RUN apt-get update -y -q && \
libcurl4-openssl-dev \
libgflags-dev \
libgoogle-glog-dev \
+ libidn2-dev \
+ libkrb5-dev \
+ libldap-dev \
liblz4-dev \
+ libnghttp2-dev \
libprotobuf-dev \
libprotoc-dev \
+ libpsl-dev \
libradospp-dev \
libre2-dev \
+ librtmp-dev \
libsnappy-dev \
+ libssh-dev \
+ libssh2-1-dev \
libssl-dev \
libthrift-dev \
libutf8proc-dev \
diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile
index 4bbb5c2b31..d47614ed2c 100644
--- a/ci/docker/ubuntu-22.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp.dockerfile
@@ -81,13 +81,21 @@ RUN apt-get update -y -q && \
libgflags-dev \
libgoogle-glog-dev \
libgrpc++-dev \
+ libidn2-dev \
+ libkrb5-dev \
+ libldap-dev \
liblz4-dev \
+ libnghttp2-dev \
libprotobuf-dev \
libprotoc-dev \
+ libpsl-dev \
libre2-dev \
+ librtmp-dev \
libsnappy-dev \
- libssl-dev \
libsqlite3-dev \
+ libssh-dev \
+ libssh2-1-dev \
+ libssl-dev \
libthrift-dev \
libutf8proc-dev \
libzstd-dev \
diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh
index c671271e22..1cbd5f0b5e 100755
--- a/ci/scripts/r_docker_configure.sh
+++ b/ci/scripts/r_docker_configure.sh
@@ -87,22 +87,8 @@ if [[ -n "$DEVTOOLSET_VERSION" ]]; then
fi
fi
-if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_GCS" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then
- # Install curl and openssl for S3/GCS support
- if [ "$PACKAGE_MANAGER" = "apt-get" ]; then
- apt-get install -y libcurl4-openssl-dev libssl-dev
- else
- $PACKAGE_MANAGER install -y libcurl-devel openssl-devel
- fi
-
- # The Dockerfile should have put this file here
- if [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then
- ${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh latest /usr/local
- fi
-
- if [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_gcs_testbench.sh" ] && [ "`which pip`" ]; then
- ${ARROW_SOURCE_HOME}/ci/scripts/install_gcs_testbench.sh default
- fi
+if [ -f "${ARROW_SOURCE_HOME}/ci/scripts/r_install_system_dependencies.sh" ]; then
+ "${ARROW_SOURCE_HOME}/ci/scripts/r_install_system_dependencies.sh"
fi
# Install rsync for bundling cpp source and curl to make sure it is installed on all images
diff --git a/ci/scripts/r_install_system_dependencies.sh b/ci/scripts/r_install_system_dependencies.sh
new file mode 100755
index 0000000000..f8f7367590
--- /dev/null
+++ b/ci/scripts/r_install_system_dependencies.sh
@@ -0,0 +1,141 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${ARROW_SOURCE_HOME:=/arrow}
+
+if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_GCS" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then
+ # Figure out what package manager we have
+ if [ "`which dnf`" ]; then
+ PACKAGE_MANAGER=dnf
+ elif [ "`which yum`" ]; then
+ PACKAGE_MANAGER=yum
+ elif [ "`which zypper`" ]; then
+ PACKAGE_MANAGER=zypper
+ else
+ PACKAGE_MANAGER=apt-get
+ apt-get update
+ fi
+
+ # Install curl and OpenSSL for S3/GCS support
+ #
+ # We need to install all dependencies explicitly to use "pkg-config
+ # --static --libs libcurl" result. Because libcurl-dev/libcurl-devel
+ # don't depend on packages that are only needed for "pkg-config
+ # --static".
+ case "$PACKAGE_MANAGER" in
+ apt-get)
+ # "pkg-config --static --libs libcurl" has
+ # * -lnghttp2
+ # * -lidn2
+ # * -lrtmp
+ # * -lssh or -lssh2
+ # * -lpsl
+ # * -lssl
+ # * -lcrypto
+ # * -lgssapi_krb5
+ # * -lkrb5
+ # * -lk5crypto
+ # * -lcom_err
+ # * -lldap
+ # * -llber
+ # * -lzstd
+ # * -lbrotlidec
+ # * -lz
+ apt-get install -y \
+ libbrotli-dev \
+ libcurl4-openssl-dev \
+ libidn2-dev \
+ libkrb5-dev \
+ libldap-dev \
+ libnghttp2-dev \
+ libpsl-dev \
+ librtmp-dev \
+ libssh-dev \
+ libssh2-1-dev \
+ libssl-dev \
+ libzstd-dev
+ ;;
+ dnf|yum)
+ # "pkg-config --static --libs libcurl" has -lidl, -lssh2 and -lldap
+ $PACKAGE_MANAGER install -y \
+ libcurl-devel \
+ libidn-devel \
+ libssh2-devel \
+ openldap-devel \
+ openssl-devel
+ ;;
+ zypper)
+ # "pkg-config --static --libs libcurl" has
+ # * -lnghttp2
+ # * -lidn2
+ # * -lssh
+ # * -lpsl
+ # * -lssl
+ # * -lcrypto
+ # * -lgssapi_krb5
+ # * -lkrb5
+ # * -lk5crypto
+ # * -lcom_err
+ # * -lldap
+ # * -llber
+ # * -lzstd
+ # * -lbrotlidec
+ # * -lz
+ $PACKAGE_MANAGER install -y \
+ krb5-devel \
+ libbrotli-devel \
+ libcurl-devel \
+ libidn2-devel \
+ libnghttp2-devel \
+ libpsl-devel \
+ libssh-devel \
+ libzstd-devel \
+ openldap2-devel \
+ openssl-devel
+ ;;
+ *)
+ $PACKAGE_MANAGER install -y libcurl-devel openssl-devel
+ ;;
+ esac
+
+ # The Dockerfile should have put this file here
+ if [ "$ARROW_S3" == "ON" ] && [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then
+ "${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" latest /usr/local
+ fi
+
+ if [ "$ARROW_GCS" == "ON" ] && [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_gcs_testbench.sh" ]; then
+ case "$PACKAGE_MANAGER" in
+ zypper)
+ # python3 is Python 3.6 on OpenSUSE 15.3.
+ # PyArrow supports Python 3.7 or later.
+ $PACKAGE_MANAGER install -y python39-pip
+ ln -s /usr/bin/python3.9 /usr/local/bin/python
+ ln -s /usr/bin/pip3.9 /usr/local/bin/pip
+ ;;
+ *)
+ $PACKAGE_MANAGER install -y python3-pip
+ ln -s /usr/bin/python3 /usr/local/bin/python
+ ln -s /usr/bin/pip3 /usr/local/bin/pip
+ ;;
+ esac
+ "${ARROW_SOURCE_HOME}/ci/scripts/install_gcs_testbench.sh" default
+ fi
+fi
diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake
index 71eec565be..1da1e1de10 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -926,7 +926,11 @@ function(ARROW_INSTALL_ALL_HEADERS PATH)
endfunction()
function(ARROW_ADD_PKG_CONFIG MODULE)
- configure_file(${MODULE}.pc.in "${CMAKE_CURRENT_BINARY_DIR}/${MODULE}.pc" @ONLY)
+ configure_file(${MODULE}.pc.in "${CMAKE_CURRENT_BINARY_DIR}/${MODULE}.pc.generate.in"
+ @ONLY)
+ file(GENERATE
+ OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${MODULE}.pc"
+ INPUT "${CMAKE_CURRENT_BINARY_DIR}/${MODULE}.pc.generate.in")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${MODULE}.pc"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
endfunction()
diff --git a/cpp/cmake_modules/FindzstdAlt.cmake b/cpp/cmake_modules/FindzstdAlt.cmake
index c05cdd36a4..980cf26552 100644
--- a/cpp/cmake_modules/FindzstdAlt.cmake
+++ b/cpp/cmake_modules/FindzstdAlt.cmake
@@ -77,6 +77,7 @@ else()
find_package(PkgConfig QUIET)
pkg_check_modules(ZSTD_PC libzstd)
if(ZSTD_PC_FOUND)
+ set(zstdAlt_VERSION "${ZSTD_PC_VERSION}")
set(ZSTD_INCLUDE_DIR "${ZSTD_PC_INCLUDEDIR}")
list(APPEND ZSTD_PC_LIBRARY_DIRS "${ZSTD_PC_LIBDIR}")
@@ -96,7 +97,34 @@ else()
endif()
endif()
-find_package_handle_standard_args(zstdAlt REQUIRED_VARS ZSTD_LIB ZSTD_INCLUDE_DIR)
+if("${zstdAlt_VERSION}" STREQUAL "" AND ZSTD_INCLUDE_DIR)
+ file(READ "${ZSTD_INCLUDE_DIR}/zstd.h" ZSTD_H_CONTENT)
+ string(REGEX MATCH "#define ZSTD_VERSION_MAJOR +([0-9]+)" ZSTD_VERSION_MAJOR_DEFINITION
+ "${ZSTD_H_CONTENT}")
+ string(REGEX REPLACE "^.+ ([0-9]+)$" "\\1" ZSTD_VERSION_MAJOR
+ "${ZSTD_VERSION_MAJOR_DEFINITION}")
+ string(REGEX MATCH "#define ZSTD_VERSION_MINOR +([0-9]+)" ZSTD_VERSION_MINOR_DEFINITION
+ "${ZSTD_H_CONTENT}")
+ string(REGEX REPLACE "^.+ ([0-9]+)$" "\\1" ZSTD_VERSION_MINOR
+ "${ZSTD_VERSION_MINOR_DEFINITION}")
+ string(REGEX MATCH "#define ZSTD_VERSION_RELEASE +([0-9]+)"
+ ZSTD_VERSION_RELEASE_DEFINITION "${ZSTD_H_CONTENT}")
+ string(REGEX REPLACE "^.+ ([0-9]+)$" "\\1" ZSTD_VERSION_RELEASE
+ "${ZSTD_VERSION_RELEASE_DEFINITION}")
+ if("${ZSTD_VERSION_MAJOR}" STREQUAL ""
+ OR "${ZSTD_VERSION_MINOR}" STREQUAL ""
+ OR "${ZSTD_VERSION_RELEASE}" STREQUAL "")
+ set(zstdAlt_VERSION "0.0.0")
+ else()
+ set(zstdAlt_VERSION
+ "${ZSTD_VERSION_MAJOR}.${ZSTD_VERSION_MINOR}.${ZSTD_VERSION_RELEASE}")
+ endif()
+endif()
+
+find_package_handle_standard_args(
+ zstdAlt
+ REQUIRED_VARS ZSTD_LIB ZSTD_INCLUDE_DIR
+ VERSION_VAR zstdAlt_VERSION)
if(zstdAlt_FOUND)
if(ARROW_ZSTD_USE_SHARED)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 3f3487063f..60fcbc22ef 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -4225,7 +4225,7 @@ macro(build_google_cloud_cpp_storage)
endmacro()
if(ARROW_WITH_GOOGLE_CLOUD_CPP)
- resolve_dependency(google_cloud_cpp_storage)
+ resolve_dependency(google_cloud_cpp_storage PC_PACKAGE_NAMES google_cloud_cpp_storage)
get_target_property(google_cloud_cpp_storage_INCLUDE_DIR google-cloud-cpp::storage
INTERFACE_INCLUDE_DIRECTORIES)
message(STATUS "Found google-cloud-cpp::storage headers: ${google_cloud_cpp_storage_INCLUDE_DIR}"
@@ -4767,6 +4767,16 @@ if(ARROW_S3)
message(STATUS "Found AWS SDK headers: ${AWSSDK_INCLUDE_DIR}")
message(STATUS "Found AWS SDK libraries: ${AWSSDK_LINK_LIBRARIES}")
+ if(${AWSSDK_SOURCE} STREQUAL "SYSTEM")
+ foreach(AWSSDK_LINK_LIBRARY ${AWSSDK_LINK_LIBRARIES})
+ string(APPEND ARROW_PC_LIBS_PRIVATE " $<TARGET_FILE:${AWSSDK_LINK_LIBRARY}>")
+ endforeach()
+ endif()
+ if(UNIX)
+ string(APPEND ARROW_PC_REQUIRES_PRIVATE " libcurl")
+ endif()
+ string(APPEND ARROW_PC_REQUIRES_PRIVATE " openssl")
+
if(APPLE)
# CoreFoundation's path is hardcoded in the CMake files provided by
# aws-sdk-cpp to use the MacOSX SDK provided by XCode which makes
diff --git a/dev/tasks/conda-recipes/r-arrow/build.sh b/dev/tasks/conda-recipes/r-arrow/build.sh
old mode 100644
new mode 100755
index 8479d9db5c..efb9dfcd51
--- a/dev/tasks/conda-recipes/r-arrow/build.sh
+++ b/dev/tasks/conda-recipes/r-arrow/build.sh
@@ -11,5 +11,7 @@ if [[ "${target_platform}" == osx-* ]]; then
export ARROW_R_CXXFLAGS="${ARROW_R_CXXFLAGS} -D_LIBCPP_DISABLE_AVAILABILITY"
fi
+export PKG_CONFIG_PATH="${PREFIX}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}"
+
# ${R_ARGS} necessary to support cross-compilation
${R} CMD INSTALL --build r/. ${R_ARGS}
diff --git a/dev/tasks/r/github.linux.offline.build.yml b/dev/tasks/r/github.linux.offline.build.yml
index bc97e1bfd0..b116accda8 100644
--- a/dev/tasks/r/github.linux.offline.build.yml
+++ b/dev/tasks/r/github.linux.offline.build.yml
@@ -66,14 +66,12 @@ jobs:
path: arrow/r/
- name: Install system dependencies
run: |
- sudo apt-get update
- sudo apt install libcurl4-openssl-dev libssl-dev
+ sudo arrow/ci/scripts/r_install_system_dependencies.sh
arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
- - name: Prepare PKG_CONFIG_PATH for Homebrew
- run: |
- # zstd is installed by Homebrew on GitHub Actions.
- echo "PKG_CONFIG_PATH=$(brew --prefix)/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}" \
- >> "$GITHUB_ENV"
+ env:
+ ARROW_GCS: "ON"
+ ARROW_S3: "ON"
+ ARROW_SOURCE_HOME: arrow
- name: Install dependencies
run: |
install.packages(c("remotes", "glue", "sys"))
@@ -83,7 +81,7 @@ jobs:
env:
TEST_OFFLINE_BUILD: true
LIBARROW_MINIMAL: false
- {{ macros.github_set_sccache_envvars()|indent(8)}}
+ {{ macros.github_set_sccache_envvars()|indent(8)}}
run: |
cd arrow/r
R CMD INSTALL --install-tests --no-test-load --no-docs --no-help --no-byte-compile arrow_with_deps.tar.gz
diff --git a/dev/tasks/r/github.macos-linux.local.yml b/dev/tasks/r/github.macos-linux.local.yml
index 24ed712caf..520965e8ad 100644
--- a/dev/tasks/r/github.macos-linux.local.yml
+++ b/dev/tasks/r/github.macos-linux.local.yml
@@ -38,19 +38,19 @@ jobs:
{{ macros.github_checkout_arrow()|indent }}
- name: Configure non-autobrew dependencies (macos)
+ if: contains(matrix.os, 'macOS')
run: |
brew install openssl
brew install sccache
- if: contains(matrix.os, 'macOS')
- name: Configure non-autobrew dependencies (linux)
+ if: contains(matrix.os, 'ubuntu')
run: |
- sudo apt-get update
- sudo apt install libcurl4-openssl-dev libssl-dev
+ sudo env \
+ ARROW_GCS=ON \
+ ARROW_S3=ON \
+ ARROW_SOURCE_HOME=arrow \
+ arrow/ci/scripts/r_install_system_dependencies.sh
arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
- # zstd is installed by Homebrew on GitHub Actions.
- echo "PKG_CONFIG_PATH=$(brew --prefix)/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}" \
- >> "$GITHUB_ENV"
- if: contains(matrix.os, 'ubuntu')
- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true
@@ -69,7 +69,7 @@ jobs:
FORCE_BUNDLED_BUILD: true
LIBARROW_MINIMAL: false
ARROW_R_DEV: TRUE
- {{ macros.github_set_sccache_envvars()|indent(8)}}
+ {{ macros.github_set_sccache_envvars()|indent(8)}}
run: |
sccache --start-server
cd arrow/r
diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml
index 2242729928..b02fc0ade5 100644
--- a/dev/tasks/r/github.packages.yml
+++ b/dev/tasks/r/github.packages.yml
@@ -318,20 +318,23 @@ jobs:
with:
install-r: false
{{ macros.github_setup_local_r_repo(false, false)|indent }}
+ {{ macros.github_checkout_arrow()|indent }}
- name: Install sccache
shell: bash
run: |
- curl -s \
- https://raw.githubusercontent.com/{{ arrow.github_repo }}/{{ arrow.head }}/ci/scripts/install_sccache.sh | \
- bash -s unknown-linux-musl /usr/local/bin
- - name: Prepare PKG_CONFIG_PATH for Homebrew
- run: |
- # zstd is installed by Homebrew on GitHub Actions.
- echo "PKG_CONFIG_PATH=$(brew --prefix)/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}" \
- >> "$GITHUB_ENV"
+ arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
- name: Install R package system dependencies
run: |
- sudo apt update && sudo apt install libcurl4-openssl-dev
+ sudo arrow/ci/scripts/r_install_system_dependencies.sh
+ env:
+ ARROW_GCS: "ON"
+ ARROW_S3: "ON"
+ ARROW_SOURCE_HOME: arrow
+ - name: Remove arrow/
+ run: |
+ rm -rf arrow/
+ - name: Enable parallel build
+ run: |
echo "MAKEFLAGS=-j$(nproc)" >> $GITHUB_ENV
- name: Install arrow from nightly repo
env:
diff --git a/r/configure b/r/configure
index eae33be57a..3f5f743fc0 100755
--- a/r/configure
+++ b/r/configure
@@ -31,7 +31,6 @@ PKG_DEB_NAME="(unsuppored)"
PKG_RPM_NAME="(unsuppored)"
PKG_BREW_NAME="apache-arrow"
PKG_TEST_HEADER="<arrow/api.h>"
-PKG_LIBS="-larrow"
# Make some env vars case-insensitive
ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'`
@@ -76,32 +75,41 @@ if [ "$FORCE_AUTOBREW" = "true" ] || [ "$FORCE_BUNDLED_BUILD" = "true" ]; then
ARROW_USE_PKG_CONFIG="false"
fi
+S3_LIBS=""
+GCS_LIBS=""
# Note that cflags may be empty in case of success
if [ "$ARROW_HOME" ] && [ "$FORCE_BUNDLED_BUILD" != "true" ]; then
echo "*** Using ARROW_HOME as the source of libarrow"
PKG_CFLAGS="-I$ARROW_HOME/include $PKG_CFLAGS"
- PKG_DIRS="-L$ARROW_HOME/lib"
+ PKG_LIBS="-larrow"
+ LIB_DIR="$ARROW_HOME/lib"
+ PKG_DIRS="-L$LIB_DIR"
elif [ "$INCLUDE_DIR" ] && [ "$LIB_DIR" ]; then
echo "*** Using INCLUDE_DIR/LIB_DIR as the source of libarrow"
PKG_CFLAGS="-I$INCLUDE_DIR $PKG_CFLAGS"
+ PKG_LIBS="-larrow"
PKG_DIRS="-L$LIB_DIR"
else
# Use pkg-config to find libarrow if available and allowed
pkg-config --version >/dev/null 2>&1
- if [ $? -eq 0 ] && [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then
+ if [ $? -eq 0 ]; then
+ PKG_CONFIG_AVAILABLE=true
+ else
+ PKG_CONFIG_AVAILABLE=false
+ fi
+ if [ "$PKG_CONFIG_AVAILABLE" = "true" ] && [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then
# Set the search paths and compile flags
PKGCONFIG_CFLAGS=`pkg-config --cflags --silence-errors ${PKG_CONFIG_NAME}`
- PKGCONFIG_LIBS=`pkg-config --libs-only-l --silence-errors ${PKG_CONFIG_NAME}`
- PKGCONFIG_LIBS="${PKGCONFIG_LIBS} `pkg-config --libs-only-other --silence-errors ${PKG_CONFIG_NAME}`"
+ PKGCONFIG_LIBS=`pkg-config --libs-only-l --libs-only-other --silence-errors ${PKG_CONFIG_NAME}`
PKGCONFIG_DIRS=`pkg-config --libs-only-L --silence-errors ${PKG_CONFIG_NAME}`
fi
if [ "$PKGCONFIG_CFLAGS" ] && [ "$PKGCONFIG_LIBS" ]; then
- FOUND_LIB_DIR=`echo $PKG_DIRS | sed -e 's/^-L//'`
+ FOUND_LIB_DIR=`echo $PKGCONFIG_DIRS | sed -e 's/^-L//'`
echo "*** Arrow C++ libraries found via pkg-config at $FOUND_LIB_DIR"
- PKG_CFLAGS="$PKGCONFIG_CFLAGS"
- PKG_LIBS=${PKGCONFIG_LIBS}
- PKG_DIRS=${PKGCONFIG_DIRS}
+ PKG_CFLAGS="$PKGCONFIG_CFLAGS $PKG_CFLAGS"
+ PKG_LIBS="${PKGCONFIG_LIBS}"
+ PKG_DIRS="${PKGCONFIG_DIRS}"
# Check for version mismatch
PC_LIB_VERSION=`pkg-config --modversion arrow`
@@ -118,9 +126,9 @@ else
if [ "$FORCE_AUTOBREW" != "true" ] && [ "`command -v brew`" ] && [ "`brew ls --versions ${PKG_BREW_NAME}`" != "" ]; then
echo "*** Using Homebrew ${PKG_BREW_NAME}"
BREWDIR=`brew --prefix`
- PKG_LIBS="$PKG_LIBS -larrow_bundled_dependencies"
+ PKG_LIBS="-larrow -larrow_bundled_dependencies"
PKG_DIRS="-L$BREWDIR/opt/$PKG_BREW_NAME/lib $PKG_DIRS"
- PKG_CFLAGS="-I$BREWDIR/opt/$PKG_BREW_NAME/include"
+ PKG_CFLAGS="-I$BREWDIR/opt/$PKG_BREW_NAME/include $PKG_CFLAGS"
else
echo "*** Downloading ${PKG_BREW_NAME}"
if [ -f "autobrew" ]; then
@@ -141,10 +149,10 @@ else
if [ "${NOT_CRAN}" = "true" ]; then
# Set some default values
if [ "${LIBARROW_BINARY}" = "" ]; then
- LIBARROW_BINARY=true; export LIBARROW_BINARY
+ export LIBARROW_BINARY=true
fi
if [ "${LIBARROW_MINIMAL}" = "" ]; then
- LIBARROW_MINIMAL=false; export LIBARROW_MINIMAL
+ export LIBARROW_MINIMAL=false
fi
fi
@@ -157,52 +165,61 @@ else
if [ "$UNAME" = "Darwin" ] && [ "${OPENSSL_ROOT_DIR}" = "" ]; then
brew --prefix openssl >/dev/null 2>&1
if [ $? -eq 0 ]; then
- OPENSSL_ROOT_DIR="`brew --prefix openssl`"; export OPENSSL_ROOT_DIR
+ export OPENSSL_ROOT_DIR="`brew --prefix openssl`"
+ export PKG_CONFIG_PATH="`brew --prefix openssl`/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}"
fi
fi
if [ "${ARROW_DEPENDENCY_SOURCE}" = "" ]; then
- ARROW_DEPENDENCY_SOURCE=AUTO; export ARROW_DEPENDENCY_SOURCE
+ export ARROW_DEPENDENCY_SOURCE=AUTO
fi
- if [ "${ARROW_DEPENDENCY_SOURCE}" = "AUTO" ]; then
- pkg-config --version >/dev/null 2>&1
- if [ $? -ne 0 ]; then
- export ARROW_DEPENDENCY_SOURCE=BUNDLED
- echo "**** pkg-config not installed, setting ARROW_DEPENDENCY_SOURCE=BUNDLED"
- fi
+ if [ "${ARROW_DEPENDENCY_SOURCE}" = "AUTO" ] && \
+ [ "${PKG_CONFIG_AVAILABLE}" = "false" ]; then
+ export ARROW_DEPENDENCY_SOURCE=BUNDLED
+ echo "**** pkg-config not installed, setting ARROW_DEPENDENCY_SOURCE=BUNDLED"
fi
${R_HOME}/bin/Rscript tools/nixlibs.R $VERSION
- PKG_CFLAGS="-I`pwd`/libarrow/arrow-${VERSION}/include $PKG_CFLAGS"
- LIB_DIR="libarrow/arrow-${VERSION}/lib"
+ LIB_DIR="`pwd`/libarrow/arrow-${VERSION}/lib"
if [ -d "$LIB_DIR" ]; then
- # Enumerate the static libs, put their -l flags in BUNDLED_LIBS,
- # and put their -L location in PKG_DIRS
- #
- # If tools/nixlibs.R fails to produce libs, this dir won't exist
- # so don't try (the error message from `ls` would be misleading)
- # Assume nixlibs.R has handled and messaged about its failure already
- #
- # TODO: what about non-bundled deps?
- # Set CDPATH locally to prevent interference from global CDPATH (if set)
- BUNDLED_LIBS=`CDPATH=''; cd $LIB_DIR && ls *.a`
- BUNDLED_LIBS=`echo "$BUNDLED_LIBS" | sed -e "s/\\.a lib/ -l/g" | sed -e "s/\\.a$//" | sed -e "s/^lib/-l/" | tr '\n' ' ' | sed -e "s/ $//"`
- PKG_DIRS="-L`pwd`/$LIB_DIR"
-
- # Use pkg-config to do static linking of libarrow's dependencies
- if [ "$ARROW_DEPENDENCY_SOURCE" = "AUTO" ] || [ "$ARROW_DEPENDENCY_SOURCE" = "SYSTEM" ]; then
- export PKG_CONFIG_PATH=${LIB_DIR}/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}
- PKG_LIBS="`pkg-config --libs --static --silence-errors ${PKG_CONFIG_NAME}`"
- fi
-
- # When using brew's openssl it is not bundled and it is not on the system
- # search path and so we must add the lib path to BUNDLED_LIBS if we are
- # using it. Note the order is important, this must be after the arrow
- # lib path + the pkg and bundled libs above so this is why we're
- # appending to BUNDLED_LIBS and not PKG_DIRS
- if [ "$OPENSSL_ROOT_DIR" != "" ]; then
- BUNDLED_LIBS="$BUNDLED_LIBS -L$OPENSSL_ROOT_DIR/lib"
+ if [ "${PKG_CONFIG_AVAILABLE}" = "true" ]; then
+ # Use pkg-config to do static linking of libarrow's dependencies
+ export PKG_CONFIG_PATH="${LIB_DIR}/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}"
+ PKG_CONFIG="pkg-config"
+ # pkg-config on CentOS 7 doesn't have --define-prefix option.
+ if ${PKG_CONFIG} --help | grep -- --define-prefix >/dev/null 2>&1; then
+ # --define-prefix is for binary packages. Binary packages
+ # uses "/arrow/r/libarrow/dist" as prefix but it doesn't
+ # match the extracted path. --define-prefix uses a directory
+ # that arrow.pc exists as its prefix instead of
+ # "/arrow/r/libarrow/dist".
+ PKG_CONFIG="${PKG_CONFIG} --define-prefix"
+ else
+ # Rewrite prefix= in arrow.pc on CentOS 7.
+ sed \
+ -i.bak \
+ -e "s,prefix=/arrow/r/libarrow/dist,prefix=${LIB_DIR}/..,g" \
+ ${LIB_DIR}/pkgconfig/*.pc
+ rm -f ${LIB_DIR}/pkgconfig/*.pc.bak
+ fi
+ PKG_CONFIG="${PKG_CONFIG} --static --silence-errors"
+ PKG_CFLAGS="`${PKG_CONFIG} --cflags ${PKG_CONFIG_NAME}` $PKG_CFLAGS"
+ PKG_DIRS="`${PKG_CONFIG} --libs-only-L ${PKG_CONFIG_NAME}`"
+ PKG_LIBS="`${PKG_CONFIG} --libs-only-l --libs-only-other ${PKG_CONFIG_NAME}`"
+ else
+ # This case must be ARROW_DEPENDENCY_SOURCE=BUNDLED.
+ PKG_CFLAGS="-I${LIB_DIR}/../include $PKG_CFLAGS"
+ PKG_DIRS="-L${LIB_DIR}"
+ if [ "${OPENSSL_ROOT_DIR}" != "" ]; then
+ PKG_DIRS="${PKG_DIRS} -L${OPENSSL_ROOT_DIR}/lib"
+ fi
+ PKG_LIBS="-larrow"
+ if [ -n "$(find "$LIB_DIR" -name 'libarrow_bundled_dependencies.*')" ]; then
+ PKG_LIBS="$PKG_LIBS -larrow_bundled_dependencies"
+ fi
+ S3_LIBS="-lcurl -lssl -lcrypto"
+ GCS_LIBS="-lcurl -lssl -lcrypto"
fi
fi
fi
@@ -231,7 +248,6 @@ echo "#include $PKG_TEST_HEADER" | ${TEST_CMD} >/dev/null 2>&1
if [ $? -eq 0 ]; then
# Check for features
- LIB_DIR=`echo $PKG_DIRS | sed -e 's/^-L//'`
ARROW_OPTS_CMAKE="$LIB_DIR/cmake/Arrow/ArrowOptions.cmake"
arrow_built_with() {
@@ -262,21 +278,15 @@ if [ $? -eq 0 ]; then
fi
if arrow_built_with ARROW_S3; then
PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_S3"
- if [ "$BUNDLED_LIBS" != "" ]; then
- # We're depending on openssl/curl from the system, so they're not in the bundled deps
- BUNDLED_LIBS="$BUNDLED_LIBS -lssl -lcrypto -lcurl"
- fi
+ PKG_LIBS="$PKG_LIBS $S3_LIBS"
fi
if arrow_built_with ARROW_GCS; then
PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_GCS"
- if [ "$BUNDLED_LIBS" != "" ]; then
- # GCS also requires openssl and curl
- BUNDLED_LIBS="$BUNDLED_LIBS -lssl -lcrypto -lcurl"
- fi
+ PKG_LIBS="$PKG_LIBS $GCS_LIBS"
fi
- # prepend PKG_DIRS and append BUNDLED_LIBS to PKG_LIBS
- PKG_LIBS="$PKG_DIRS $PKG_LIBS $BUNDLED_LIBS"
+ # prepend PKG_DIRS to PKG_LIBS
+ PKG_LIBS="$PKG_DIRS $PKG_LIBS"
echo "PKG_CFLAGS=$PKG_CFLAGS"
echo "PKG_LIBS=$PKG_LIBS"
else
diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh
index 9c0ee21e46..800b9cde3f 100755
--- a/r/inst/build_arrow_static.sh
+++ b/r/inst/build_arrow_static.sh
@@ -86,7 +86,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \
-Dxsimd_SOURCE=${xsimd_SOURCE:-} \
-Dzstd_SOURCE=${zstd_SOURCE:-} \
${EXTRA_CMAKE_FLAGS} \
- -G ${CMAKE_GENERATOR:-"Unix Makefiles"} \
+ -G "${CMAKE_GENERATOR:-Unix Makefiles}" \
${SOURCE_DIR}
${CMAKE} --build . --target install -- -j $N_JOBS
diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R
index 7279e245f2..1f10712c33 100644
--- a/r/tests/testthat/helper-skip.R
+++ b/r/tests/testthat/helper-skip.R
@@ -102,6 +102,20 @@ skip_on_r_older_than <- function(r_version) {
}
}
+skip_on_python_older_than <- function(python_version) {
+ if (force_tests()) {
+ return()
+ }
+
+ if (!reticulate::py_available(initialize = TRUE)) {
+ skip("Python isn't available")
+ }
+
+ if (reticulate::py_version() < python_version) {
+ skip(paste("Python version:", reticulate::py_version()))
+ }
+}
+
process_is_running <- function(x) {
if (force_tests()) {
# Return TRUE as this is used as a condition in an if statement
diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R
index 1a83ebd85f..cf10579e2d 100644
--- a/r/tests/testthat/test-python.R
+++ b/r/tests/testthat/test-python.R
@@ -22,6 +22,8 @@ test_that("install_pyarrow", {
# Windows CI machine doesn't pick up the right python or something
skip_on_os("windows")
skip_if_not_installed("reticulate")
+ # PyArrow doesn't support Python 3.6 or earlier
+ skip_on_python_older_than("3.7")
venv <- try(reticulate::virtualenv_create("arrow-test"))
# Bail out if virtualenv isn't available