You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/11/27 21:52:48 UTC
[arrow] branch master updated: ARROW-3862: [C++] Improve
third-party dependencies download script
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 57b55de ARROW-3862: [C++] Improve third-party dependencies download script
57b55de is described below
commit 57b55de6cafd3dbeaec16973f029618bfb2c6905
Author: François Saint-Jacques <fs...@gmail.com>
AuthorDate: Tue Nov 27 16:52:41 2018 -0500
ARROW-3862: [C++] Improve third-party dependencies download script
The following changes were made:
- Refactored definitions such that adding a new dependency should not
involve modifying the script but only the `version.txt.` file (excluding cmake).
- Safer bash mode with `-u`
- Added version in the tarball at the cost of disk size, but safer
operation if content-length match for some unlucky reason.
Author: François Saint-Jacques <fs...@gmail.com>
Closes #3021 from fsaintjacques/ARROW-3862-download-script and squashes the following commits:
381bb7547 <François Saint-Jacques> ARROW-3862: Improve third-party dependencies download script
---
cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +-
cpp/thirdparty/README.md | 11 ++++
cpp/thirdparty/download_dependencies.sh | 83 ++++++++++-------------------
cpp/thirdparty/versions.txt | 34 +++++++++++-
4 files changed, 72 insertions(+), 58 deletions(-)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 6bae283..9829a4d 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -152,7 +152,7 @@ endif()
file(STRINGS "${THIRDPARTY_DIR}/versions.txt" TOOLCHAIN_VERSIONS_TXT)
foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
# Exclude comments
- if(_VERSION_ENTRY MATCHES "#.*")
+ if(NOT _VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_VERSION=")
continue()
endif()
diff --git a/cpp/thirdparty/README.md b/cpp/thirdparty/README.md
index f4f89f5..bd1cb28 100644
--- a/cpp/thirdparty/README.md
+++ b/cpp/thirdparty/README.md
@@ -68,6 +68,7 @@ bash-style environment variable statements at the end to use for your build
script:
```shell
+# Download tarballs into `$HOME/arrow-thirdparty-deps`
$ ./thirdparty/download_dependencies $HOME/arrow-thirdparty-deps
# some output omitted
@@ -87,3 +88,13 @@ export ARROW_PROTOBUF_URL=$HOME/arrow-thirdparty-deps/protobuf.tar.gz
export ARROW_GRPC_URL=$HOME/arrow-thirdparty-deps/grpc.tar.gz
export ARROW_ORC_URL=$HOME/arrow-thirdparty-deps/orc.tar.gz
```
+
+This can be automated by using inline source/eval:
+
+```shell
+$ source <(./thirdparty/download_dependencies $HOME/arrow-thirdparty-deps)
+```
+
+You can then invoke CMake to create the build directory and it will use the
+declared environment variable pointing to downloaded archives instead of
+downloading them (one for each build dir!).
diff --git a/cpp/thirdparty/download_dependencies.sh b/cpp/thirdparty/download_dependencies.sh
index 4be715f..ea63a8a 100755
--- a/cpp/thirdparty/download_dependencies.sh
+++ b/cpp/thirdparty/download_dependencies.sh
@@ -20,74 +20,45 @@
# This script downloads all the thirdparty dependencies as a series of tarballs
# that can be used for offline builds, etc.
-set -e
+set -eu
SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
if [ "$#" -ne 1 ]; then
- echo "Usage: $0 <destination-directory>"
- exit
+ DESTDIR=$(pwd)
+else
+ DESTDIR=$1
fi
-_DST=`realpath $1`
+DESTDIR=$(realpath "${DESTDIR}")
-# To change toolchain versions, edit versions.txt
-source $SOURCE_DIR/versions.txt
+download_dependency() {
+ local url=$1
+ local out=$2
-mkdir -p $_DST
+ # --show-progress will not output to stdout, it is safe to pipe the result of
+ # the script into eval.
+ wget --quiet --show-progress --continue --output-document="${out}" "${url}"
+}
-BOOST_UNDERSCORE_VERSION=`echo $BOOST_VERSION | sed 's/\./_/g'`
-wget -c -O $_DST/boost.tar.gz https://dl.bintray.com/boostorg/release/$BOOST_VERSION/source/boost_$BOOST_UNDERSCORE_VERSION.tar.gz
+main() {
+ mkdir -p "${DESTDIR}"
-wget -c -O $_DST/gtest.tar.gz https://github.com/google/googletest/archive/release-$GTEST_VERSION.tar.gz
+ # Load `DEPENDENCIES` variable.
+ source ${SOURCE_DIR}/versions.txt
-wget -c -O $_DST/gflags.tar.gz https://github.com/gflags/gflags/archive/$GFLAGS_VERSION.tar.gz
+ echo "# Environment variables for offline Arrow build"
+ for ((i = 0; i < ${#DEPENDENCIES[@]}; i++)); do
+ local dep_packed=${DEPENDENCIES[$i]}
-wget -c -O $_DST/gbenchmark.tar.gz https://github.com/google/benchmark/archive/$GBENCHMARK_VERSION.tar.gz
+ # Unpack each entry of the form "$home_var $tar_out $dep_url"
+ IFS=" " read -r dep_url_var dep_tar_name dep_url <<< "${dep_packed}"
-wget -c -O $_DST/flatbuffers.tar.gz https://github.com/google/flatbuffers/archive/$FLATBUFFERS_VERSION.tar.gz
+ local out=${DESTDIR}/${dep_tar_name}
+ download_dependency "${dep_url}" "${out}"
-wget -c -O $_DST/rapidjson.tar.gz https://github.com/miloyip/rapidjson/archive/$RAPIDJSON_VERSION.tar.gz
+ echo "export ${dep_url_var}=${out}"
+ done
+}
-wget -c -O $_DST/snappy.tar.gz https://github.com/google/snappy/releases/download/$SNAPPY_VERSION/snappy-$SNAPPY_VERSION.tar.gz
-
-wget -c -O $_DST/brotli.tar.gz https://github.com/google/brotli/archive/$BROTLI_VERSION.tar.gz
-
-wget -c -O $_DST/lz4.tar.gz https://github.com/lz4/lz4/archive/$LZ4_VERSION.tar.gz
-
-wget -c -O $_DST/zlib.tar.gz http://zlib.net/fossils/zlib-$ZLIB_VERSION.tar.gz
-
-wget -c -O $_DST/zstd.tar.gz https://github.com/facebook/zstd/archive/$ZSTD_VERSION.tar.gz
-
-wget -c -O $_DST/protobuf.tar.gz https://github.com/google/protobuf/releases/download/$PROTOBUF_VERSION/protobuf-all-${PROTOBUF_VERSION:1}.tar.gz
-
-wget -c -O $_DST/grpc.tar.gz https://github.com/grpc/grpc/archive/$GRPC_VERSION.tar.gz
-
-wget -c -O $_DST/orc.tar.gz https://github.com/apache/orc/archive/rel/release-$ORC_VERSION.tar.gz
-
-wget -c -O $_DST/thrift.tar.gz http://archive.apache.org/dist/thrift/${THRIFT_VERSION}/thrift-${THRIFT_VERSION}.tar.gz
-
-wget -c -O $_DST/glog.tar.gz https://github.com/google/glog/archive/${GLOG_VERSION}.tar.gz
-
-wget -c -O $_DST/double-conversion.tar.gz https://github.com/google/double-conversion/archive/${DOUBLE_CONVERSION_VERSION}.tar.gz
-
-echo "
-# Environment variables for offline Arrow build
-export ARROW_BOOST_URL=$_DST/boost.tar.gz
-export ARROW_GTEST_URL=$_DST/gtest.tar.gz
-export ARROW_GFLAGS_URL=$_DST/gflags.tar.gz
-export ARROW_GBENCHMARK_URL=$_DST/gbenchmark.tar.gz
-export ARROW_FLATBUFFERS_URL=$_DST/flatbuffers.tar.gz
-export ARROW_RAPIDJSON_URL=$_DST/rapidjson.tar.gz
-export ARROW_SNAPPY_URL=$_DST/snappy.tar.gz
-export ARROW_BROTLI_URL=$_DST/brotli.tar.gz
-export ARROW_LZ4_URL=$_DST/lz4.tar.gz
-export ARROW_ZLIB_URL=$_DST/zlib.tar.gz
-export ARROW_ZSTD_URL=$_DST/zstd.tar.gz
-export ARROW_PROTOBUF_URL=$_DST/protobuf.tar.gz
-export ARROW_GRPC_URL=$_DST/grpc.tar.gz
-export ARROW_ORC_URL=$_DST/orc.tar.gz
-export ARROW_THRIFT_URL=$_DST/thrift.tar.gz
-export ARROW_GLOG_URL=$_DST/glog.tar.gz
-export ARROW_DOUBLE_CONVERSION_URL=$_DST/double-conversion.tar.gz
-"
+main
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 51a0c5c..705f56c 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -16,6 +16,12 @@
# under the License.
# Toolchain library versions
+#
+# This file is used by `download_dependencies.sh` and cmake to figure out which
+# version of a dependency to fetch. In order to add a new dependency, add a
+# version variable, e.g. MY_DEP_VERSION and append an entry in the
+# `DEPENDENCIES` array (see the comment on top of the declaration for the
+# format).
BOOST_VERSION=1.67.0
BROTLI_VERSION=v0.6.0
@@ -31,8 +37,34 @@ LZ4_VERSION=v1.7.5
ORC_VERSION=1.5.1
PROTOBUF_VERSION=v3.6.1
RAPIDJSON_VERSION=v1.1.0
+RE2_VERSION=2018-10-01
SNAPPY_VERSION=1.1.3
THRIFT_VERSION=0.11.0
ZLIB_VERSION=1.2.8
ZSTD_VERSION=v1.3.7
-RE2_VERSION=2018-10-01
+
+# The first field is the name of the environment variable expected by cmake.
+# This _must_ match what is defined. The second field is the name of the
+# generated archive file. The third field is the url of the project for the
+# given version.
+DEPENDENCIES=(
+ "ARROW_BOOST_URL boost-${BOOST_VERSION}.tar.gz https://dl.bintray.com/boostorg/release/${BOOST_VERSION}/source/boost_${BOOST_VERSION//./_}.tar.gz"
+ "ARROW_BROTLI_URL brotli-${BROTLI_VERSION}.tar.gz https://github.com/google/brotli/archive/${BROTLI_VERSION}.tar.gz"
+ "ARROW_DOUBLE_CONVERSION_URL double-conversion-${DOUBLE_CONVERSION_VERSION}.tar.gz https://github.com/google/double-conversion/archive/${DOUBLE_CONVERSION_VERSION}.tar.gz"
+ "ARROW_FLATBUFFERS_URL flatbuffers-${FLATBUFFERS_VERSION}.tar.gz https://github.com/google/flatbuffers/archive/${FLATBUFFERS_VERSION}.tar.gz"
+ "ARROW_GBENCHMARK_URL gbenchmark-${GBENCHMARK_VERSION}.tar.gz https://github.com/google/benchmark/archive/${GBENCHMARK_VERSION}.tar.gz"
+ "ARROW_GFLAGS_URL gflags-${GFLAGS_VERSION}.tar.gz https://github.com/gflags/gflags/archive/${GFLAGS_VERSION}.tar.gz"
+ "ARROW_GLOG_URL glog-${GLOG_VERSION}.tar.gz https://github.com/google/glog/archive/${GLOG_VERSION}.tar.gz"
+ "ARROW_GRPC_URL grpc-${GRPC_VERSION}.tar.gz https://github.com/grpc/grpc/archive/${GRPC_VERSION}.tar.gz"
+ "ARROW_GTEST_URL gtest-${GTEST_VERSION}.tar.gz https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
+ "ARROW_LZ4_URL lz4-${LZ4_VERSION}.tar.gz https://github.com/lz4/lz4/archive/${LZ4_VERSION}.tar.gz"
+ "ARROW_ORC_URL orc-${ORC_VERSION}.tar.gz https://github.com/apache/orc/archive/rel/release-${ORC_VERSION}.tar.gz"
+ "ARROW_PROTOBUF_URL protobuf-${PROTOBUF_VERSION}.tar.gz https://github.com/google/protobuf/releases/download/${PROTOBUF_VERSION}/protobuf-all-${PROTOBUF_VERSION:1}.tar.gz"
+ "ARROW_RAPIDJSON_URL rapidjson-${RAPIDJSON_VERSION}.tar.gz https://github.com/miloyip/rapidjson/archive/${RAPIDJSON_VERSION}.tar.gz"
+ "ARROW_SNAPPY_URL snappy-${SNAPPY_VERSION}.tar.gz https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz"
+ "ARROW_THRIFT_URL thrift-${THRIFT_VERSION}.tar.gz http://archive.apache.org/dist/thrift/${THRIFT_VERSION}/thrift-${THRIFT_VERSION}.tar.gz"
+ "ARROW_ZLIB_URL zlib-${ZLIB_VERSION}.tar.gz http://zlib.net/fossils/zlib-${ZLIB_VERSION}.tar.gz"
+ "ARROW_ZSTD_URL zstd-${ZSTD_VERSION}.tar.gz https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz"
+)
+
+# vim: set filetype=sh: