You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2020/08/11 15:57:40 UTC
[arrow] 11/22: ARROW-9575: [R] gcc-UBSAN failure on CRAN
This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch maint-1.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 127ada2c9ed777d542f0a89e452abbe4b40ee0dd
Author: Neal Richardson <ne...@gmail.com>
AuthorDate: Wed Jul 29 16:30:21 2020 -0700
ARROW-9575: [R] gcc-UBSAN failure on CRAN
The cause of the failure itself turns out to be in cpp/src/parquet, not the R bindings.
This patch reworks the existing r-sanitizer nightly job to (1) build the bundled C++ build like what happens on CRAN, and (2) actually fail the build if there is a UBSAN error. Previously no UBSAN error was reported because the Arrow C++ library was not built with sanitizers, and even if it were, the build was not set up to detect them and fail.
Closes #7858 from nealrichardson/r-ubsan
Authored-by: Neal Richardson <ne...@gmail.com>
Signed-off-by: Neal Richardson <ne...@gmail.com>
---
ci/docker/linux-r.dockerfile | 8 +--
ci/docker/ubuntu-18.04-r-sanitizer.dockerfile | 100 --------------------------
ci/scripts/r_docker_configure.sh | 13 +++-
ci/scripts/r_sanitize.sh | 6 +-
cpp/src/arrow/dataset/file_parquet.h | 2 +-
cpp/src/parquet/arrow/path_internal.cc | 3 +-
docker-compose.yml | 7 +-
7 files changed, 25 insertions(+), 114 deletions(-)
diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile
index 2ec42fb..1d963a2 100644
--- a/ci/docker/linux-r.dockerfile
+++ b/ci/docker/linux-r.dockerfile
@@ -21,15 +21,15 @@
ARG base
FROM ${base}
+ARG r_bin=R
+ENV R_BIN=${r_bin}
+
# Make sure R is on the path for the R-hub devel versions (where RPREFIX is set in its dockerfile)
ENV PATH "${RPREFIX}/bin:${PATH}"
-# Ensure parallel R package installation, set CRAN repo mirror,
-# and use pre-built binaries where possible
-COPY ci/etc/rprofile /arrow/ci/etc/
-RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
# Patch up some of the docker images
COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/
+COPY ci/etc/rprofile /arrow/ci/etc/
RUN /arrow/ci/scripts/r_docker_configure.sh
COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
diff --git a/ci/docker/ubuntu-18.04-r-sanitizer.dockerfile b/ci/docker/ubuntu-18.04-r-sanitizer.dockerfile
deleted file mode 100644
index 4bd0236..0000000
--- a/ci/docker/ubuntu-18.04-r-sanitizer.dockerfile
+++ /dev/null
@@ -1,100 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM wch1/r-debug:latest
-
-# Installs C++ toolchain and dependencies
-RUN apt-get update -y -q && \
- apt-get install -y -q --no-install-recommends \
- autoconf \
- ca-certificates \
- ccache \
- cmake \
- g++ \
- gcc \
- git \
- libbenchmark-dev \
- libboost-filesystem-dev \
- libboost-regex-dev \
- libboost-system-dev \
- libbrotli-dev \
- libbz2-dev \
- libgflags-dev \
- libgoogle-glog-dev \
- liblz4-dev \
- liblzma-dev \
- libre2-dev \
- libsnappy-dev \
- libssl-dev \
- libzstd-dev \
- locales \
- ninja-build \
- pkg-config \
- rapidjson-dev \
- tzdata && \
- locale-gen en_US.UTF-8 && \
- apt-get clean && rm -rf /var/lib/apt/lists*
-
-# Ensure parallel R package installation, set CRAN repo mirror,
-# and use pre-built binaries where possible
-RUN printf "\
- options(Ncpus = parallel::detectCores(), \
- repos = 'https://packagemanager.rstudio.com/cran/__linux__/bionic/latest', \
- HTTPUserAgent = sprintf(\
- 'R/%%s R (%%s)', getRversion(), \
- paste(getRversion(), R.version\$platform, R.version\$arch, R.version\$os)))\n" \
- >> /usr/local/RDsan/lib/R/etc/Rprofile.site
-
-# Also ensure parallel compilation of C/C++ code
-RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> /usr/local/RDsan/lib/R/etc/Makeconf
-
-# Prioritize system packages and local installation
-# The following dependencies will be downloaded due to missing/invalid packages
-# provided by the distribution:
-# - libc-ares-dev does not install CMake config files
-# - flatbuffer is not packaged
-# - libgtest-dev only provide sources
-# - libprotobuf-dev only provide sources
-# - thrift is too old
-ENV ARROW_DEPENDENCY_SOURCE=SYSTEM \
- ARROW_FLIGHT=OFF \
- ARROW_GANDIVA=OFF \
- ARROW_HOME=/usr/local \
- ARROW_HDFS=OFF \
- ARROW_INSTALL_NAME_RPATH=OFF \
- ARROW_NO_DEPRECATED_API=ON \
- ARROW_ORC=OFF \
- ARROW_PARQUET=ON \
- ARROW_PLASMA=OFF \
- ARROW_USE_ASAN=OFF \
- ARROW_USE_CCACHE=ON \
- ARROW_USE_UBSAN=OFF \
- ARROW_WITH_BZ2=OFF \
- ARROW_WITH_UTF8PROC=OFF \
- ARROW_WITH_ZSTD=OFF \
- cares_SOURCE=BUNDLED \
- gRPC_SOURCE=BUNDLED \
- GTest_SOURCE=BUNDLED \
- LC_ALL=en_US.UTF-8 \
- PATH=/usr/lib/ccache/:$PATH \
- Protobuf_SOURCE=BUNDLED \
- R_BIN=RDsan \
- Thrift_SOURCE=BUNDLED
-
-COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
-COPY r/DESCRIPTION /arrow/r/
-RUN /arrow/ci/scripts/r_deps.sh /arrow
diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh
index 9b131f2..1d7e8de 100755
--- a/ci/scripts/r_docker_configure.sh
+++ b/ci/scripts/r_docker_configure.sh
@@ -20,16 +20,23 @@ set -ex
: ${R_BIN:=R}
+# The Dockerfile should have put this file here
+if [ -f "/arrow/ci/etc/rprofile" ]; then
+ # Ensure parallel R package installation, set CRAN repo mirror,
+ # and use pre-built binaries where possible
+ cat /arrow/ci/etc/rprofile >> $(${R_BIN} RHOME)/etc/Rprofile.site
+fi
+
# Ensure parallel compilation of C/C++ code
-echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Makeconf
+echo "MAKEFLAGS=-j$(${R_BIN} -s -e 'cat(parallel::detectCores())')" >> $(${R_BIN} RHOME)/etc/Makeconf
# Special hacking to try to reproduce quirks on fedora-clang-devel on CRAN
# which uses a bespoke clang compiled to use libc++
# https://www.stats.ox.ac.uk/pub/bdr/Rconfig/r-devel-linux-x86_64-fedora-clang
if [ "$RHUB_PLATFORM" = "linux-x86_64-fedora-clang" ]; then
dnf install -y libcxx-devel
- sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(R RHOME)/etc/Makeconf
- rm -rf $(R RHOME)/etc/Makeconf.bak
+ sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(${R_BIN} RHOME)/etc/Makeconf
+ rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak
fi
# Workaround for html help install failure; see https://github.com/r-lib/devtools/issues/2084#issuecomment-530912786
diff --git a/ci/scripts/r_sanitize.sh b/ci/scripts/r_sanitize.sh
index de75e93..eacee5f 100755
--- a/ci/scripts/r_sanitize.sh
+++ b/ci/scripts/r_sanitize.sh
@@ -27,6 +27,10 @@ pushd ${source_dir}/tests
export TEST_R_WITH_ARROW=TRUE
export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp"
-${R_BIN} < testthat.R
+${R_BIN} < testthat.R > testthat.out 2>&1
+cat testthat.out
+if grep -q "runtime error" testthat.out; then
+ exit 1
+fi
popd
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index c156bfb..13f1090 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -39,7 +39,7 @@ class ReaderProperties;
class ArrowReaderProperties;
namespace arrow {
class FileReader;
-}; // namespace arrow
+} // namespace arrow
} // namespace parquet
namespace arrow {
diff --git a/cpp/src/parquet/arrow/path_internal.cc b/cpp/src/parquet/arrow/path_internal.cc
index 8d2790a..e2079b7 100644
--- a/cpp/src/parquet/arrow/path_internal.cc
+++ b/cpp/src/parquet/arrow/path_internal.cc
@@ -106,7 +106,6 @@
#include "arrow/util/make_unique.h"
#include "arrow/util/variant.h"
#include "arrow/visitor_inline.h"
-
#include "parquet/properties.h"
namespace parquet {
@@ -525,7 +524,7 @@ struct PathInfo {
std::shared_ptr<Array> primitive_array;
int16_t max_def_level = 0;
int16_t max_rep_level = 0;
- bool has_dictionary;
+ bool has_dictionary = false;
};
/// Contains logic for writing a single leaf node to parquet.
diff --git a/docker-compose.yml b/docker-compose.yml
index fafbeae..1828adf 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -911,16 +911,17 @@ services:
- SYS_PTRACE
build:
context: .
- dockerfile: ci/docker/ubuntu-18.04-r-sanitizer.dockerfile
+ dockerfile: ci/docker/linux-r.dockerfile
cache_from:
- ${REPO}:amd64-ubuntu-18.04-r-sanitizer
+ args:
+ base: wch1/r-debug:latest
+ r_bin: RDsan
environment:
<<: *ccache
- LIBARROW_BUILD: 'false'
volumes: *ubuntu-volumes
command: >
/bin/bash -c "
- /arrow/ci/scripts/cpp_build.sh /arrow /build &&
/arrow/ci/scripts/r_sanitize.sh /arrow"
################################ Rust #######################################