You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2020/08/11 15:57:40 UTC

[arrow] 11/22: ARROW-9575: [R] gcc-UBSAN failure on CRAN

This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch maint-1.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 127ada2c9ed777d542f0a89e452abbe4b40ee0dd
Author: Neal Richardson <ne...@gmail.com>
AuthorDate: Wed Jul 29 16:30:21 2020 -0700

    ARROW-9575: [R] gcc-UBSAN failure on CRAN
    
    The cause of the failure itself turns out to be in cpp/src/parquet, not the R bindings.
    
    This patch reworks the existing r-sanitizer nightly job to (1) build the bundled C++ build like what happens on CRAN, and (2) actually fail the build if there is a UBSAN error. Previously no UBSAN error was reported because the Arrow C++ library was not built with sanitizers, and even if it were, the build was not set up to detect them and fail.
    
    Closes #7858 from nealrichardson/r-ubsan
    
    Authored-by: Neal Richardson <ne...@gmail.com>
    Signed-off-by: Neal Richardson <ne...@gmail.com>
---
 ci/docker/linux-r.dockerfile                  |   8 +--
 ci/docker/ubuntu-18.04-r-sanitizer.dockerfile | 100 --------------------------
 ci/scripts/r_docker_configure.sh              |  13 +++-
 ci/scripts/r_sanitize.sh                      |   6 +-
 cpp/src/arrow/dataset/file_parquet.h          |   2 +-
 cpp/src/parquet/arrow/path_internal.cc        |   3 +-
 docker-compose.yml                            |   7 +-
 7 files changed, 25 insertions(+), 114 deletions(-)

diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile
index 2ec42fb..1d963a2 100644
--- a/ci/docker/linux-r.dockerfile
+++ b/ci/docker/linux-r.dockerfile
@@ -21,15 +21,15 @@
 ARG base
 FROM ${base}
 
+ARG r_bin=R
+ENV R_BIN=${r_bin}
+
 # Make sure R is on the path for the R-hub devel versions (where RPREFIX is set in its dockerfile)
 ENV PATH "${RPREFIX}/bin:${PATH}"
-# Ensure parallel R package installation, set CRAN repo mirror,
-# and use pre-built binaries where possible
-COPY ci/etc/rprofile /arrow/ci/etc/
-RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
 
 # Patch up some of the docker images
 COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/
+COPY ci/etc/rprofile /arrow/ci/etc/
 RUN /arrow/ci/scripts/r_docker_configure.sh
 
 COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
diff --git a/ci/docker/ubuntu-18.04-r-sanitizer.dockerfile b/ci/docker/ubuntu-18.04-r-sanitizer.dockerfile
deleted file mode 100644
index 4bd0236..0000000
--- a/ci/docker/ubuntu-18.04-r-sanitizer.dockerfile
+++ /dev/null
@@ -1,100 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM wch1/r-debug:latest
-
-# Installs C++ toolchain and dependencies
-RUN apt-get update -y -q && \
-    apt-get install -y -q --no-install-recommends \
-      autoconf \
-      ca-certificates \
-      ccache \
-      cmake \
-      g++ \
-      gcc \
-      git \
-      libbenchmark-dev \
-      libboost-filesystem-dev \
-      libboost-regex-dev \
-      libboost-system-dev \
-      libbrotli-dev \
-      libbz2-dev \
-      libgflags-dev \
-      libgoogle-glog-dev \
-      liblz4-dev \
-      liblzma-dev \
-      libre2-dev \
-      libsnappy-dev \
-      libssl-dev \
-      libzstd-dev \
-      locales \
-      ninja-build \
-      pkg-config \
-      rapidjson-dev \
-      tzdata && \
-      locale-gen en_US.UTF-8 && \
-      apt-get clean && rm -rf /var/lib/apt/lists*
-
-# Ensure parallel R package installation, set CRAN repo mirror,
-# and use pre-built binaries where possible
-RUN printf "\
-    options(Ncpus = parallel::detectCores(), \
-            repos = 'https://packagemanager.rstudio.com/cran/__linux__/bionic/latest', \
-            HTTPUserAgent = sprintf(\
-                'R/%%s R (%%s)', getRversion(), \
-                paste(getRversion(), R.version\$platform, R.version\$arch, R.version\$os)))\n" \
-    >> /usr/local/RDsan/lib/R/etc/Rprofile.site
-
-# Also ensure parallel compilation of C/C++ code
-RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> /usr/local/RDsan/lib/R/etc/Makeconf
-
-# Prioritize system packages and local installation
-# The following dependencies will be downloaded due to missing/invalid packages
-# provided by the distribution:
-# - libc-ares-dev does not install CMake config files
-# - flatbuffer is not packaged
-# - libgtest-dev only provide sources
-# - libprotobuf-dev only provide sources
-# - thrift is too old
-ENV ARROW_DEPENDENCY_SOURCE=SYSTEM \
-    ARROW_FLIGHT=OFF \
-    ARROW_GANDIVA=OFF \
-    ARROW_HOME=/usr/local \
-    ARROW_HDFS=OFF \
-    ARROW_INSTALL_NAME_RPATH=OFF \
-    ARROW_NO_DEPRECATED_API=ON \
-    ARROW_ORC=OFF \
-    ARROW_PARQUET=ON \
-    ARROW_PLASMA=OFF \
-    ARROW_USE_ASAN=OFF \
-    ARROW_USE_CCACHE=ON \
-    ARROW_USE_UBSAN=OFF \
-    ARROW_WITH_BZ2=OFF \
-    ARROW_WITH_UTF8PROC=OFF \
-    ARROW_WITH_ZSTD=OFF \
-    cares_SOURCE=BUNDLED \
-    gRPC_SOURCE=BUNDLED \
-    GTest_SOURCE=BUNDLED \
-    LC_ALL=en_US.UTF-8 \
-    PATH=/usr/lib/ccache/:$PATH \
-    Protobuf_SOURCE=BUNDLED \
-    R_BIN=RDsan \
-    Thrift_SOURCE=BUNDLED
-
-COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
-COPY r/DESCRIPTION /arrow/r/
-RUN /arrow/ci/scripts/r_deps.sh /arrow
diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh
index 9b131f2..1d7e8de 100755
--- a/ci/scripts/r_docker_configure.sh
+++ b/ci/scripts/r_docker_configure.sh
@@ -20,16 +20,23 @@ set -ex
 
 : ${R_BIN:=R}
 
+# The Dockerfile should have put this file here
+if [ -f "/arrow/ci/etc/rprofile" ]; then
+  # Ensure parallel R package installation, set CRAN repo mirror,
+  # and use pre-built binaries where possible
+  cat /arrow/ci/etc/rprofile >> $(${R_BIN} RHOME)/etc/Rprofile.site
+fi
+
 # Ensure parallel compilation of C/C++ code
-echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Makeconf
+echo "MAKEFLAGS=-j$(${R_BIN} -s -e 'cat(parallel::detectCores())')" >> $(${R_BIN} RHOME)/etc/Makeconf
 
 # Special hacking to try to reproduce quirks on fedora-clang-devel on CRAN
 # which uses a bespoke clang compiled to use libc++
 # https://www.stats.ox.ac.uk/pub/bdr/Rconfig/r-devel-linux-x86_64-fedora-clang
 if [ "$RHUB_PLATFORM" = "linux-x86_64-fedora-clang" ]; then
   dnf install -y libcxx-devel
-  sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(R RHOME)/etc/Makeconf
-  rm -rf $(R RHOME)/etc/Makeconf.bak
+  sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(${R_BIN} RHOME)/etc/Makeconf
+  rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak
 fi
 
 # Workaround for html help install failure; see https://github.com/r-lib/devtools/issues/2084#issuecomment-530912786
diff --git a/ci/scripts/r_sanitize.sh b/ci/scripts/r_sanitize.sh
index de75e93..eacee5f 100755
--- a/ci/scripts/r_sanitize.sh
+++ b/ci/scripts/r_sanitize.sh
@@ -27,6 +27,10 @@ pushd ${source_dir}/tests
 
 export TEST_R_WITH_ARROW=TRUE
 export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp"
-${R_BIN} < testthat.R
+${R_BIN} < testthat.R > testthat.out 2>&1
 
+cat testthat.out
+if grep -q "runtime error" testthat.out; then
+  exit 1
+fi
 popd
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index c156bfb..13f1090 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -39,7 +39,7 @@ class ReaderProperties;
 class ArrowReaderProperties;
 namespace arrow {
 class FileReader;
-};  // namespace arrow
+}  // namespace arrow
 }  // namespace parquet
 
 namespace arrow {
diff --git a/cpp/src/parquet/arrow/path_internal.cc b/cpp/src/parquet/arrow/path_internal.cc
index 8d2790a..e2079b7 100644
--- a/cpp/src/parquet/arrow/path_internal.cc
+++ b/cpp/src/parquet/arrow/path_internal.cc
@@ -106,7 +106,6 @@
 #include "arrow/util/make_unique.h"
 #include "arrow/util/variant.h"
 #include "arrow/visitor_inline.h"
-
 #include "parquet/properties.h"
 
 namespace parquet {
@@ -525,7 +524,7 @@ struct PathInfo {
   std::shared_ptr<Array> primitive_array;
   int16_t max_def_level = 0;
   int16_t max_rep_level = 0;
-  bool has_dictionary;
+  bool has_dictionary = false;
 };
 
 /// Contains logic for writing a single leaf node to parquet.
diff --git a/docker-compose.yml b/docker-compose.yml
index fafbeae..1828adf 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -911,16 +911,17 @@ services:
       - SYS_PTRACE
     build:
       context: .
-      dockerfile: ci/docker/ubuntu-18.04-r-sanitizer.dockerfile
+      dockerfile: ci/docker/linux-r.dockerfile
       cache_from:
         - ${REPO}:amd64-ubuntu-18.04-r-sanitizer
+      args:
+        base: wch1/r-debug:latest
+        r_bin: RDsan
     environment:
       <<: *ccache
-      LIBARROW_BUILD: 'false'
     volumes: *ubuntu-volumes
     command: >
       /bin/bash -c "
-        /arrow/ci/scripts/cpp_build.sh /arrow /build &&
         /arrow/ci/scripts/r_sanitize.sh /arrow"
 
   ################################ Rust #######################################