You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/08/14 03:30:24 UTC

[arrow] branch master updated: ARROW-2952: [C++] Dockerized include-what-you-use

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new affd6d7  ARROW-2952: [C++] Dockerized include-what-you-use
affd6d7 is described below

commit affd6d7be3d0d0684ced5471e144cb53d946cfbd
Author: Wes McKinney <we...@apache.org>
AuthorDate: Mon Aug 13 23:30:19 2018 -0400

    ARROW-2952: [C++] Dockerized include-what-you-use
    
    I'm trying to get this working but installing the LLVM tools takes a long time (I ran into https://issues.apache.org/jira/browse/ARROW-3033).
    
    Author: Wes McKinney <we...@apache.org>
    
    Closes #2416 from wesm/ARROW-2952 and squashes the following commits:
    
    22bdc9db <Wes McKinney> Restore all C++ files prior to IWYU edits so we can deal with issues in some other PR
    70a64fc1 <Wes McKinney> Restore forward declaration to try to fix msvc
    6da7cd74 <Wes McKinney> lint
    0f5fc6fc <Wes McKinney> Add note to cpp/README.md
    873d6459 <Wes McKinney> Fix up Dockerfiles to work again
    a487fc2e <Wes McKinney> Try to modularize installing conda environment. Why does this not work?
    611cd2ed <Wes McKinney> Clean up IWYU script more. Need to install Python toolchain in Dockerfile
    8501562c <Wes McKinney> Set CC/CXX compiler to clang-6
    5c4ade8a <Wes McKinney> First cut at IWYU docker setup
---
 cpp/README.md                                  | 16 +++---
 cpp/build-support/iwyu/iwyu.sh                 | 10 ++--
 cpp/build-support/iwyu/mappings/arrow-misc.imp | 26 ++++++++--
 dev/README.md                                  |  6 +++
 dev/dask_integration/Dockerfile                | 70 +-------------------------
 dev/dask_integration/dask_integration.sh       | 49 ++++++++++++++++++
 dev/docker-compose.yml                         |  6 +++
 dev/gen_apidocs/Dockerfile                     | 41 ++++-----------
 dev/hdfs_integration/Dockerfile                | 13 +++--
 dev/iwyu/Dockerfile                            | 23 +++++++++
 dev/iwyu/run_iwyu.sh                           | 66 ++++++++++++++++++++++++
 dev/spark_integration/Dockerfile               | 58 +++++++++++----------
 12 files changed, 232 insertions(+), 152 deletions(-)

diff --git a/cpp/README.md b/cpp/README.md
index 7d3823a..9371be7 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -288,14 +288,16 @@ CC="clang-4.0" CXX="clang++-4.0" cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ..
 
 This presumes that `include-what-you-use` and `iwyu_tool.py` are in your
 `$PATH`. If you compiled IWYU using a different version of clang, then
-substitute the version number above accordingly. The results of this script are
-logged to a temporary file, whose location can be found by examining the shell
-output:
+substitute the version number above accordingly.
 
-```
-...
-Logging IWYU to /tmp/arrow-cpp-iwyu.gT7XXV
-...
+We have provided a Docker-based IWYU to make it easier to run these
+checks. This can be run using the docker-compose setup in the `dev/` directory
+
+```shell
+# If you have not built the base image already
+docker build -t arrow_integration_xenial_base -f dev/docker_common/Dockerfile.xenial.base .
+
+dev/run_docker_compose.sh iwyu
 ```
 
 ### Linting
diff --git a/cpp/build-support/iwyu/iwyu.sh b/cpp/build-support/iwyu/iwyu.sh
index f3fadd6..865ac8e 100755
--- a/cpp/build-support/iwyu/iwyu.sh
+++ b/cpp/build-support/iwyu/iwyu.sh
@@ -27,21 +27,19 @@ trap "rm -f $IWYU_LOG" EXIT
 echo "Logging IWYU to $IWYU_LOG"
 
 IWYU_MAPPINGS_PATH="$ROOT/cpp/build-support/iwyu/mappings"
-IWYU_ARGS="\
-    --mapping_file=$IWYU_MAPPINGS_PATH/boost-all.imp \
+IWYU_ARGS="--mapping_file=$IWYU_MAPPINGS_PATH/boost-all.imp \
     --mapping_file=$IWYU_MAPPINGS_PATH/boost-all-private.imp \
     --mapping_file=$IWYU_MAPPINGS_PATH/boost-extra.imp \
     --mapping_file=$IWYU_MAPPINGS_PATH/gflags.imp \
     --mapping_file=$IWYU_MAPPINGS_PATH/glog.imp \
     --mapping_file=$IWYU_MAPPINGS_PATH/gtest.imp \
-    --mapping_file=$IWYU_MAPPINGS_PATH/arrow-misc.imp"
+     --mapping_file=$IWYU_MAPPINGS_PATH/arrow-misc.imp"
 
 set -e
 
 if [ "$1" == "all" ]; then
-  python $ROOT/cpp/build-support/iwyu/iwyu_tool.py -p . -- \
-       $IWYU_ARGS | awk -f $ROOT/cpp/build-support/iwyu/iwyu-filter.awk | \
-       tee $IWYU_LOG
+    python $ROOT/cpp/build-support/iwyu/iwyu_tool.py -p . \
+        -- $IWYU_ARGS | awk -f $ROOT/cpp/build-support/iwyu/iwyu-filter.awk
 else
   # Build the list of updated files which are of IWYU interest.
   file_list_tmp=$(git diff --name-only \
diff --git a/cpp/build-support/iwyu/mappings/arrow-misc.imp b/cpp/build-support/iwyu/mappings/arrow-misc.imp
index f39650d..cbe5bd1 100644
--- a/cpp/build-support/iwyu/mappings/arrow-misc.imp
+++ b/cpp/build-support/iwyu/mappings/arrow-misc.imp
@@ -15,19 +15,37 @@
 # specific language governing permissions and limitations
 # under the License.
 [
+  { include: ["<ext/alloc_traits.h>", private, "<memory>", public ] },
+  { include: ["<ext/alloc_traits.h>", private, "<condition_variable>", public ] },
+  { include: ["<ext/alloc_traits.h>", private, "<deque>", public ] },
+  { include: ["<ext/alloc_traits.h>", private, "<forward_list>", public ] },
+  { include: ["<ext/alloc_traits.h>", private, "<future>", public ] },
+  { include: ["<ext/alloc_traits.h>", private, "<map>", public ] },
+  { include: ["<ext/alloc_traits.h>", private, "<set>", public ] },
+  { include: ["<ext/alloc_traits.h>", private, "<string>", public ] },
+  { include: ["<ext/alloc_traits.h>", private, "<unordered_map>", public ] },
+  { include: ["<ext/alloc_traits.h>", private, "<unordered_set>", public ] },
+  { include: ["<ext/alloc_traits.h>", private, "<vector>", public ] },
+  { include: ["<bits/stdint-intn.h>", private, "<cstdint>", public ] },
+  { include: ["<bits/stdint-uintn.h>", private, "<cstdint>", public ] },
+  { include: ["<bits/shared_ptr.h>", private, "<memory>", public ] },
   { symbol: ["bool", private, "<cstdint>", public ] },
   { symbol: ["false", private, "<cstdint>", public ] },
   { symbol: ["true", private, "<cstdint>", public ] },
-  { symbol: ["int64_t", private, "<cstdint>", public ] },
+  { symbol: ["int8_t", private, "<cstdint>", public ] },
   { symbol: ["int16_t", private, "<cstdint>", public ] },
   { symbol: ["int32_t", private, "<cstdint>", public ] },
+  { symbol: ["int64_t", private, "<cstdint>", public ] },
   { symbol: ["uint8_t", private, "<cstdint>", public ] },
+  { symbol: ["uint16_t", private, "<cstdint>", public ] },
+  { symbol: ["uint32_t", private, "<cstdint>", public ] },
+  { symbol: ["uint64_t", private, "<cstdint>", public ] },
+  { symbol: ["make_shared", private, "<memory>", public ] },
+  { symbol: ["shared_ptr", private, "<memory>", public ] },
   { symbol: ["_Node_const_iterator", private, "<flatbuffers/flatbuffers.h>", public ] },
   { symbol: ["unordered_map<>::mapped_type", private, "<flatbuffers/flatbuffers.h>", public ] },
   { symbol: ["move", private, "<utility>", public ] },
   { symbol: ["pair", private, "<utility>", public ] },
   { symbol: ["errno", private, "<cerrno>", public ] },
-  { symbol: ["posix_memalign", private, "<cstdlib>", public ] },
-  { include: ["<ext/alloc_traits.h>", private, "<vector>", public ] },
-  { include: ["<string.h>", private, "<cstring>", public ] }
+  { symbol: ["posix_memalign", private, "<cstdlib>", public ] }
 ]
diff --git a/dev/README.md b/dev/README.md
index 276d75f..98aeef6 100644
--- a/dev/README.md
+++ b/dev/README.md
@@ -123,6 +123,12 @@ bash dev/release/js-verify-release-candidate.sh 0.7.0 0
 ```
 # Integration testing
 
+Build the following base image used by multiple tests:
+
+```shell
+docker build -t arrow_integration_xenial_base -f docker_common/Dockerfile.xenial.base .
+```
+
 ## HDFS C++ / Python support
 
 ```shell
diff --git a/dev/dask_integration/Dockerfile b/dev/dask_integration/Dockerfile
index f72ef8c..f0c1f03 100644
--- a/dev/dask_integration/Dockerfile
+++ b/dev/dask_integration/Dockerfile
@@ -14,75 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-FROM ubuntu:14.04
+FROM arrow_integration_xenial_base
+
 ADD . /apache-arrow
 WORKDIR /apache-arrow
-# Basic OS utilities
-RUN apt-get update && apt-get install -y \
-        wget \
-        git \
-        gcc \
-        g++
-# This will install conda in /home/ubuntu/miniconda
-RUN wget -O /tmp/miniconda.sh \
-    https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
-    bash /tmp/miniconda.sh -b -p /home/ubuntu/miniconda && \
-    rm /tmp/miniconda.sh
-# Create Conda environment
-ENV PATH="/home/ubuntu/miniconda/bin:${PATH}"
-RUN conda create -y -q -n test-environment \
-    python=3.6
-# Install dependencies
-RUN conda install -c conda-forge \
-    numpy \
-    pandas \
-    bcolz \
-    blosc \
-    bokeh \
-    boto3 \
-    chest \
-    cloudpickle \
-    coverage \
-    cytoolz \
-    distributed \
-    graphviz \
-    h5py \
-    ipython \
-    partd \
-    psutil \
-    "pytest<=3.1.1" \
-    scikit-image \
-    scikit-learn \
-    scipy \
-    sqlalchemy \
-    toolz
-# install pytables from defaults for now
-RUN conda install pytables
-
-RUN pip install -q git+https://github.com/dask/partd --upgrade --no-deps
-RUN pip install -q git+https://github.com/dask/zict --upgrade --no-deps
-RUN pip install -q git+https://github.com/dask/distributed --upgrade --no-deps
-RUN pip install -q git+https://github.com/mrocklin/sparse --upgrade --no-deps
-RUN pip install -q git+https://github.com/dask/s3fs --upgrade --no-deps
-
-RUN conda install -q -c conda-forge numba cython
-RUN pip install -q git+https://github.com/dask/fastparquet
-
-RUN pip install -q \
-    cachey \
-    graphviz \
-    moto \
-    pyarrow \
-    --upgrade --no-deps
-
-RUN pip install -q \
-    cityhash \
-    flake8 \
-    mmh3 \
-    pandas_datareader \
-    pytest-xdist \
-    xxhash \
-    pycodestyle
 
 CMD arrow/dev/dask_integration/dask_integration.sh
-
diff --git a/dev/dask_integration/dask_integration.sh b/dev/dask_integration/dask_integration.sh
index f5a24e4..fb519c8 100755
--- a/dev/dask_integration/dask_integration.sh
+++ b/dev/dask_integration/dask_integration.sh
@@ -19,6 +19,55 @@
 # Set up environment and working directory
 cd /apache-arrow
 
+source activate pyarrow-dev
+
+# install pytables from defaults for now
+conda install -y pytables
+
+pip install -q git+https://github.com/dask/partd --upgrade --no-deps
+pip install -q git+https://github.com/dask/zict --upgrade --no-deps
+pip install -q git+https://github.com/dask/distributed --upgrade --no-deps
+pip install -q git+https://github.com/mrocklin/sparse --upgrade --no-deps
+pip install -q git+https://github.com/dask/s3fs --upgrade --no-deps
+
+conda install -y -q -c conda-forge numba cython \
+    bcolz \
+    blosc \
+    bokeh \
+    boto3 \
+    chest \
+    cloudpickle \
+    coverage \
+    cytoolz \
+    distributed \
+    graphviz \
+    h5py \
+    partd \
+    psutil \
+    "pytest<=3.1.1" \
+    scikit-image \
+    scikit-learn \
+    sqlalchemy \
+    toolz
+
+pip install -q git+https://github.com/dask/fastparquet
+
+pip install -q \
+    cachey \
+    graphviz \
+    moto \
+    pyarrow \
+    --upgrade --no-deps
+
+pip install -q \
+    cityhash \
+    flake8 \
+    mmh3 \
+    pandas_datareader \
+    pytest-xdist \
+    xxhash \
+    pycodestyle
+
 export ARROW_BUILD_TYPE=release
 export ARROW_HOME=$(pwd)/dist
 export PARQUET_HOME=$(pwd)/dist
diff --git a/dev/docker-compose.yml b/dev/docker-compose.yml
index c450098..8f20dde 100644
--- a/dev/docker-compose.yml
+++ b/dev/docker-compose.yml
@@ -67,6 +67,12 @@ services:
     volumes:
      - ../..:/apache-arrow
 
+  iwyu:
+    build:
+      context: iwyu
+    volumes:
+     - ../..:/apache-arrow
+
   run_site:
     build:
       context: run_site
diff --git a/dev/gen_apidocs/Dockerfile b/dev/gen_apidocs/Dockerfile
index 13a6ff4..606ae2e 100644
--- a/dev/gen_apidocs/Dockerfile
+++ b/dev/gen_apidocs/Dockerfile
@@ -39,47 +39,24 @@ RUN apt-get update && \
         autoconf-archive \
         npm
 
-# This will install conda in /home/ubuntu/miniconda
-RUN wget -O /tmp/miniconda.sh \
-    https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
-    bash /tmp/miniconda.sh -b -p /home/ubuntu/miniconda && \
-    rm /tmp/miniconda.sh
+
+ENV CC=gcc-8
+ENV CXX=g++-8
+
+ADD . /apache-arrow
+WORKDIR /apache-arrow
+
+RUN arrow/dev/docker_common/install_pyarrow_conda_toolchain.sh 3.6
+ENV PATH="/opt/conda/bin:${PATH}"
 
 # Python dependencies
 # Create Conda environment
 RUN /home/ubuntu/miniconda/bin/conda create -y -q -n pyarrow-dev \
-        # Python
-        python=3.6 \
-        numpy \
-        pandas \
-        pytest \
-        cython \
-        ipython \
-        matplotlib \
         numpydoc \
         sphinx \
         sphinx_bootstrap_theme \
-        six \
-        setuptools \
-        # C++
-        boost-cpp \
-        cmake \
-        flatbuffers \
-        rapidjson \
-        thrift-cpp \
-        snappy \
-        zlib \
-        brotli \
-        jemalloc \
-        lz4-c \
-        zstd \
         doxygen \
         maven \
         -c conda-forge
 
-ENV CC=gcc-8
-ENV CXX=g++-8
-
-ADD . /apache-arrow
-WORKDIR /apache-arrow
 CMD arrow/dev/gen_apidocs/create_documents.sh
diff --git a/dev/hdfs_integration/Dockerfile b/dev/hdfs_integration/Dockerfile
index 71dcbe3..1279efc 100644
--- a/dev/hdfs_integration/Dockerfile
+++ b/dev/hdfs_integration/Dockerfile
@@ -18,8 +18,7 @@
 FROM gelog/hadoop
 
 ENV CC=gcc \
-    CXX=g++ \
-    PATH=/opt/conda/bin:$PATH
+    CXX=g++
 
 RUN apt-get update -y \
  && apt-get install -y \
@@ -30,14 +29,14 @@ RUN apt-get update -y \
 	  pkg-config \
 	  ninja-build
 
-# Miniconda - Python 3.6, 64-bit, x86, latest
 RUN wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O conda.sh \
  && /bin/bash conda.sh -b -p /opt/conda \
  && rm conda.sh
 
-# create conda env with the required dependences
 RUN conda create -y -q -c conda-forge -n pyarrow-dev \
-      python=3.6 \
+      python=$1 \
+      ipython \
+      matplotlib \
       nomkl \
       numpy \
       six \
@@ -57,8 +56,12 @@ RUN conda create -y -q -c conda-forge -n pyarrow-dev \
       jemalloc \
       lz4-c \
       zstd \
+      setuptools \
+      setuptools_scm \
  && conda clean --all
 
+ENV PATH="/opt/conda/bin:${PATH}"
+
 # installing in the previous step boost=1.60 and boost-cpp=1.67 gets installed,
 # cmake finds 1.60 and parquet fails to compile
 # installing it in a separate step, boost=1.60 and boost-cpp=1.64 gets
diff --git a/dev/iwyu/Dockerfile b/dev/iwyu/Dockerfile
new file mode 100644
index 0000000..3d93f90
--- /dev/null
+++ b/dev/iwyu/Dockerfile
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+FROM arrow_integration_xenial_base
+
+ADD . /apache-arrow
+WORKDIR /apache-arrow
+
+CMD arrow/dev/iwyu/run_iwyu.sh
diff --git a/dev/iwyu/run_iwyu.sh b/dev/iwyu/run_iwyu.sh
new file mode 100755
index 0000000..e33b3f7
--- /dev/null
+++ b/dev/iwyu/run_iwyu.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set up environment and working directory
+CLANG_VERSION=6.0
+IWYU_BUILD_DIR=`pwd`/arrow/cpp/docker-iwyu
+IWYU_SH=`pwd`/arrow/cpp/build-support/iwyu/iwyu.sh
+IWYU_URL=https://github.com/include-what-you-use/include-what-you-use/archive/clang_$CLANG_VERSION.tar.gz
+
+rm -rf $IWYU_BUILD_DIR
+mkdir -p $IWYU_BUILD_DIR
+pushd $IWYU_BUILD_DIR
+
+function cleanup {
+    popd
+    rm -rf $IWYU_BUILD_DIR
+}
+
+trap cleanup EXIT
+
+# Build IWYU
+wget -O iwyu.tar.gz $IWYU_URL
+tar xzf iwyu.tar.gz
+rm -f iwyu.tar.gz
+
+IWYU_SRC=`pwd`/include-what-you-use-clang_$CLANG_VERSION
+
+export CC=clang-$CLANG_VERSION
+export CXX=clang++-$CLANG_VERSION
+
+mkdir -p iwyu-build
+pushd iwyu-build
+
+# iwyu needs this
+apt-get install -y zlib1g-dev
+
+cmake -G "Unix Makefiles" -DIWYU_LLVM_ROOT_PATH=/usr/lib/llvm-$CLANG_VERSION $IWYU_SRC
+make -j4
+popd
+
+# Add iwyu and iwyu_tool.py to path
+export PATH=$IWYU_BUILD_DIR/iwyu-build:$PATH
+
+source activate pyarrow-dev
+
+cmake -GNinja -DARROW_PYTHON=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ..
+
+# Make so that vendored bits are built
+ninja
+
+$IWYU_SH all
diff --git a/dev/spark_integration/Dockerfile b/dev/spark_integration/Dockerfile
index f96ee35..84b93c9 100644
--- a/dev/spark_integration/Dockerfile
+++ b/dev/spark_integration/Dockerfile
@@ -27,41 +27,39 @@ RUN apt-get update && \
     # install conda in /home/ubuntu/miniconda
     wget -O /tmp/miniconda.sh \
     https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
-    bash /tmp/miniconda.sh -b -p /home/ubuntu/miniconda && \
+    bash /tmp/miniconda.sh -b -p /opt/conda && \
     rm /tmp/miniconda.sh && \
     # Python dependencies
     apt-get install -y \
         pkg-config && \
-    # Create Conda environment
-    /home/ubuntu/miniconda/bin/conda create -y -q -n pyarrow-dev \
-        # Python
-        python=2.7 \
-        numpy \
-        pandas \
-        pytest \
-        cython \
-        ipython \
-        matplotlib \
-        six \
-        setuptools \
-        setuptools_scm \
-        # C++
-        boost-cpp \
-        cmake \
-        flatbuffers \
-        rapidjson \
-        thrift-cpp \
-        snappy \
-        zlib \
-        gflags \
-        brotli \
-        jemalloc \
-        lz4-c \
-        zstd \
-        -c conda-forge && \
-    /home/ubuntu/miniconda/bin/conda clean --all
 
-ENV PATH="/home/ubuntu/miniconda/bin:${PATH}"
+ENV PATH="/opt/conda/bin:${PATH}"
+
+RUN conda create -y -q -c conda-forge -n pyarrow-dev \
+      python=2.7 \
+      ipython \
+      nomkl \
+      numpy \
+      six \
+      setuptools \
+      cython \
+      pandas \
+      pytest \
+      cmake \
+      flatbuffers \
+      rapidjson \
+      boost-cpp \
+      thrift-cpp \
+      snappy \
+      zlib \
+      gflags \
+      brotli \
+      jemalloc \
+      lz4-c \
+      zstd \
+      setuptools \
+      setuptools_scm \
+ && conda clean --all
 
 ADD . /apache-arrow
 WORKDIR /apache-arrow