You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/11/27 21:52:48 UTC

[arrow] branch master updated: ARROW-3862: [C++] Improve third-party dependencies download script

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 57b55de  ARROW-3862: [C++] Improve third-party dependencies download script
57b55de is described below

commit 57b55de6cafd3dbeaec16973f029618bfb2c6905
Author: François Saint-Jacques <fs...@gmail.com>
AuthorDate: Tue Nov 27 16:52:41 2018 -0500

    ARROW-3862: [C++] Improve third-party dependencies download script
    
    The following changes were made:
    
    - Refactored definitions such that adding a new dependency should not
      involve modifying the script but only the `version.txt.` file (excluding cmake).
    - Safer bash mode with `-u`
    - Added version in the tarball at the cost of disk size, but safer
      operation if content-length match for some unlucky reason.
    
    Author: François Saint-Jacques <fs...@gmail.com>
    
    Closes #3021 from fsaintjacques/ARROW-3862-download-script and squashes the following commits:
    
    381bb7547 <François Saint-Jacques> ARROW-3862:  Improve third-party dependencies download script
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake |  2 +-
 cpp/thirdparty/README.md                    | 11 ++++
 cpp/thirdparty/download_dependencies.sh     | 83 ++++++++++-------------------
 cpp/thirdparty/versions.txt                 | 34 +++++++++++-
 4 files changed, 72 insertions(+), 58 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 6bae283..9829a4d 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -152,7 +152,7 @@ endif()
 file(STRINGS "${THIRDPARTY_DIR}/versions.txt" TOOLCHAIN_VERSIONS_TXT)
 foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT})
   # Exclude comments
-  if(_VERSION_ENTRY MATCHES "#.*")
+  if(NOT _VERSION_ENTRY MATCHES "^[^#][A-Za-z0-9-_]+_VERSION=")
     continue()
   endif()
 
diff --git a/cpp/thirdparty/README.md b/cpp/thirdparty/README.md
index f4f89f5..bd1cb28 100644
--- a/cpp/thirdparty/README.md
+++ b/cpp/thirdparty/README.md
@@ -68,6 +68,7 @@ bash-style environment variable statements at the end to use for your build
 script:
 
 ```shell
+# Download tarballs into `$HOME/arrow-thirdparty-deps`
 $ ./thirdparty/download_dependencies $HOME/arrow-thirdparty-deps
 # some output omitted
 
@@ -87,3 +88,13 @@ export ARROW_PROTOBUF_URL=$HOME/arrow-thirdparty-deps/protobuf.tar.gz
 export ARROW_GRPC_URL=$HOME/arrow-thirdparty-deps/grpc.tar.gz
 export ARROW_ORC_URL=$HOME/arrow-thirdparty-deps/orc.tar.gz
 ```
+
+This can be automated by using inline source/eval:
+
+```shell
+$ source <(./thirdparty/download_dependencies $HOME/arrow-thirdparty-deps)
+```
+
+You can then invoke CMake to create the build directory and it will use the
+declared environment variable pointing to downloaded archives instead of
+downloading them (one for each build dir!).
diff --git a/cpp/thirdparty/download_dependencies.sh b/cpp/thirdparty/download_dependencies.sh
index 4be715f..ea63a8a 100755
--- a/cpp/thirdparty/download_dependencies.sh
+++ b/cpp/thirdparty/download_dependencies.sh
@@ -20,74 +20,45 @@
 # This script downloads all the thirdparty dependencies as a series of tarballs
 # that can be used for offline builds, etc.
 
-set -e
+set -eu
 
 SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
 if [ "$#" -ne 1 ]; then
-  echo "Usage: $0 <destination-directory>"
-  exit
+  DESTDIR=$(pwd)
+else
+  DESTDIR=$1
 fi
 
-_DST=`realpath $1`
+DESTDIR=$(realpath "${DESTDIR}")
 
-# To change toolchain versions, edit versions.txt
-source $SOURCE_DIR/versions.txt
+download_dependency() {
+  local url=$1
+  local out=$2
 
-mkdir -p $_DST
+  # --show-progress will not output to stdout, it is safe to pipe the result of
+  # the script into eval.
+  wget --quiet --show-progress --continue --output-document="${out}" "${url}"
+}
 
-BOOST_UNDERSCORE_VERSION=`echo $BOOST_VERSION | sed 's/\./_/g'`
-wget -c -O $_DST/boost.tar.gz https://dl.bintray.com/boostorg/release/$BOOST_VERSION/source/boost_$BOOST_UNDERSCORE_VERSION.tar.gz
+main() {
+  mkdir -p "${DESTDIR}"
 
-wget -c -O $_DST/gtest.tar.gz https://github.com/google/googletest/archive/release-$GTEST_VERSION.tar.gz
+  # Load `DEPENDENCIES` variable.
+  source ${SOURCE_DIR}/versions.txt
 
-wget -c -O $_DST/gflags.tar.gz https://github.com/gflags/gflags/archive/$GFLAGS_VERSION.tar.gz
+  echo "# Environment variables for offline Arrow build"
+  for ((i = 0; i < ${#DEPENDENCIES[@]}; i++)); do
+    local dep_packed=${DEPENDENCIES[$i]}
 
-wget -c -O $_DST/gbenchmark.tar.gz https://github.com/google/benchmark/archive/$GBENCHMARK_VERSION.tar.gz
+    # Unpack each entry of the form "$home_var $tar_out $dep_url"
+    IFS=" " read -r dep_url_var dep_tar_name dep_url <<< "${dep_packed}"
 
-wget -c -O $_DST/flatbuffers.tar.gz https://github.com/google/flatbuffers/archive/$FLATBUFFERS_VERSION.tar.gz
+    local out=${DESTDIR}/${dep_tar_name}
+    download_dependency "${dep_url}" "${out}"
 
-wget -c -O $_DST/rapidjson.tar.gz https://github.com/miloyip/rapidjson/archive/$RAPIDJSON_VERSION.tar.gz
+    echo "export ${dep_url_var}=${out}"
+  done
+}
 
-wget -c -O $_DST/snappy.tar.gz https://github.com/google/snappy/releases/download/$SNAPPY_VERSION/snappy-$SNAPPY_VERSION.tar.gz
-
-wget -c -O $_DST/brotli.tar.gz https://github.com/google/brotli/archive/$BROTLI_VERSION.tar.gz
-
-wget -c -O $_DST/lz4.tar.gz https://github.com/lz4/lz4/archive/$LZ4_VERSION.tar.gz
-
-wget -c -O $_DST/zlib.tar.gz http://zlib.net/fossils/zlib-$ZLIB_VERSION.tar.gz
-
-wget -c -O $_DST/zstd.tar.gz https://github.com/facebook/zstd/archive/$ZSTD_VERSION.tar.gz
-
-wget -c -O $_DST/protobuf.tar.gz https://github.com/google/protobuf/releases/download/$PROTOBUF_VERSION/protobuf-all-${PROTOBUF_VERSION:1}.tar.gz
-
-wget -c -O $_DST/grpc.tar.gz https://github.com/grpc/grpc/archive/$GRPC_VERSION.tar.gz
-
-wget -c -O $_DST/orc.tar.gz https://github.com/apache/orc/archive/rel/release-$ORC_VERSION.tar.gz
-
-wget -c -O $_DST/thrift.tar.gz http://archive.apache.org/dist/thrift/${THRIFT_VERSION}/thrift-${THRIFT_VERSION}.tar.gz
-
-wget -c -O $_DST/glog.tar.gz https://github.com/google/glog/archive/${GLOG_VERSION}.tar.gz
-
-wget -c -O $_DST/double-conversion.tar.gz  https://github.com/google/double-conversion/archive/${DOUBLE_CONVERSION_VERSION}.tar.gz
-
-echo "
-# Environment variables for offline Arrow build
-export ARROW_BOOST_URL=$_DST/boost.tar.gz
-export ARROW_GTEST_URL=$_DST/gtest.tar.gz
-export ARROW_GFLAGS_URL=$_DST/gflags.tar.gz
-export ARROW_GBENCHMARK_URL=$_DST/gbenchmark.tar.gz
-export ARROW_FLATBUFFERS_URL=$_DST/flatbuffers.tar.gz
-export ARROW_RAPIDJSON_URL=$_DST/rapidjson.tar.gz
-export ARROW_SNAPPY_URL=$_DST/snappy.tar.gz
-export ARROW_BROTLI_URL=$_DST/brotli.tar.gz
-export ARROW_LZ4_URL=$_DST/lz4.tar.gz
-export ARROW_ZLIB_URL=$_DST/zlib.tar.gz
-export ARROW_ZSTD_URL=$_DST/zstd.tar.gz
-export ARROW_PROTOBUF_URL=$_DST/protobuf.tar.gz
-export ARROW_GRPC_URL=$_DST/grpc.tar.gz
-export ARROW_ORC_URL=$_DST/orc.tar.gz
-export ARROW_THRIFT_URL=$_DST/thrift.tar.gz
-export ARROW_GLOG_URL=$_DST/glog.tar.gz
-export ARROW_DOUBLE_CONVERSION_URL=$_DST/double-conversion.tar.gz
-"
+main
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 51a0c5c..705f56c 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -16,6 +16,12 @@
 # under the License.
 
 # Toolchain library versions
+#
+# This file is used by `download_dependencies.sh` and cmake to figure out which
+# version of a dependency to fetch. In order to add a new dependency, add a
+# version variable, e.g. MY_DEP_VERSION and append an entry in the
+# `DEPENDENCIES` array (see the comment on top of the declaration for the
+# format).
 
 BOOST_VERSION=1.67.0
 BROTLI_VERSION=v0.6.0
@@ -31,8 +37,34 @@ LZ4_VERSION=v1.7.5
 ORC_VERSION=1.5.1
 PROTOBUF_VERSION=v3.6.1
 RAPIDJSON_VERSION=v1.1.0
+RE2_VERSION=2018-10-01
 SNAPPY_VERSION=1.1.3
 THRIFT_VERSION=0.11.0
 ZLIB_VERSION=1.2.8
 ZSTD_VERSION=v1.3.7
-RE2_VERSION=2018-10-01
+
+# The first field is the name of the environment variable expected by cmake.
+# This _must_ match what is defined. The second field is the name of the
+# generated archive file. The third field is the url of the project for the
+# given version.
+DEPENDENCIES=(
+  "ARROW_BOOST_URL boost-${BOOST_VERSION}.tar.gz https://dl.bintray.com/boostorg/release/${BOOST_VERSION}/source/boost_${BOOST_VERSION//./_}.tar.gz"
+  "ARROW_BROTLI_URL brotli-${BROTLI_VERSION}.tar.gz https://github.com/google/brotli/archive/${BROTLI_VERSION}.tar.gz"
+  "ARROW_DOUBLE_CONVERSION_URL double-conversion-${DOUBLE_CONVERSION_VERSION}.tar.gz https://github.com/google/double-conversion/archive/${DOUBLE_CONVERSION_VERSION}.tar.gz"
+  "ARROW_FLATBUFFERS_URL flatbuffers-${FLATBUFFERS_VERSION}.tar.gz https://github.com/google/flatbuffers/archive/${FLATBUFFERS_VERSION}.tar.gz"
+  "ARROW_GBENCHMARK_URL gbenchmark-${GBENCHMARK_VERSION}.tar.gz https://github.com/google/benchmark/archive/${GBENCHMARK_VERSION}.tar.gz"
+  "ARROW_GFLAGS_URL gflags-${GFLAGS_VERSION}.tar.gz https://github.com/gflags/gflags/archive/${GFLAGS_VERSION}.tar.gz"
+  "ARROW_GLOG_URL glog-${GLOG_VERSION}.tar.gz https://github.com/google/glog/archive/${GLOG_VERSION}.tar.gz"
+  "ARROW_GRPC_URL grpc-${GRPC_VERSION}.tar.gz https://github.com/grpc/grpc/archive/${GRPC_VERSION}.tar.gz"
+  "ARROW_GTEST_URL gtest-${GTEST_VERSION}.tar.gz https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
+  "ARROW_LZ4_URL lz4-${LZ4_VERSION}.tar.gz https://github.com/lz4/lz4/archive/${LZ4_VERSION}.tar.gz"
+  "ARROW_ORC_URL orc-${ORC_VERSION}.tar.gz https://github.com/apache/orc/archive/rel/release-${ORC_VERSION}.tar.gz"
+  "ARROW_PROTOBUF_URL protobuf-${PROTOBUF_VERSION}.tar.gz https://github.com/google/protobuf/releases/download/${PROTOBUF_VERSION}/protobuf-all-${PROTOBUF_VERSION:1}.tar.gz"
+  "ARROW_RAPIDJSON_URL rapidjson-${RAPIDJSON_VERSION}.tar.gz https://github.com/miloyip/rapidjson/archive/${RAPIDJSON_VERSION}.tar.gz"
+  "ARROW_SNAPPY_URL snappy-${SNAPPY_VERSION}.tar.gz https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz"
+  "ARROW_THRIFT_URL thrift-${THRIFT_VERSION}.tar.gz http://archive.apache.org/dist/thrift/${THRIFT_VERSION}/thrift-${THRIFT_VERSION}.tar.gz"
+  "ARROW_ZLIB_URL zlib-${ZLIB_VERSION}.tar.gz http://zlib.net/fossils/zlib-${ZLIB_VERSION}.tar.gz"
+  "ARROW_ZSTD_URL zstd-${ZSTD_VERSION}.tar.gz https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz"
+)
+
+# vim: set filetype=sh: