You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2018/11/07 12:08:22 UTC

[arrow] branch master updated: ARROW-3707: [C++] Fix test regression with zstd 1.3.7

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 394b334  ARROW-3707: [C++] Fix test regression with zstd 1.3.7
394b334 is described below

commit 394b334bba1199bd2d98a158736a6652efce629f
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Wed Nov 7 13:08:14 2018 +0100

    ARROW-3707: [C++] Fix test regression with zstd 1.3.7
    
    Upstream issue: https://github.com/facebook/zstd/issues/1385
    
    Author: Antoine Pitrou <an...@python.org>
    
    Closes #2909 from pitrou/ARROW-3707-zstd-null-pointer and squashes the following commits:
    
    9fb06761 <Antoine Pitrou> Use cmake to build zstd
    8a2488df <Antoine Pitrou> ARROW-3707:  Fix test regression with zstd 1.3.7
---
 ci/appveyor-cpp-build.bat                   |  3 ++
 ci/cpp-msvc-build-main.bat                  |  2 +-
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 48 ++++++++++++-------------
 cpp/src/arrow/util/compression_zstd.cc      | 56 ++++++++++++++++-------------
 cpp/thirdparty/versions.txt                 |  2 +-
 5 files changed, 60 insertions(+), 51 deletions(-)

diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index bce722f..195941e 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -30,6 +30,7 @@ if "%JOB%" == "Static_Crt_Build" (
   pushd cpp\build-debug
 
   cmake -G "%GENERATOR%" ^
+        -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^
         -DARROW_USE_STATIC_CRT=ON ^
         -DARROW_BOOST_USE_SHARED=OFF ^
         -DARROW_BUILD_SHARED=OFF ^
@@ -45,6 +46,7 @@ if "%JOB%" == "Static_Crt_Build" (
   pushd cpp\build-release
 
   cmake -G "%GENERATOR%" ^
+        -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^
         -DARROW_USE_STATIC_CRT=ON ^
         -DARROW_BOOST_USE_SHARED=OFF ^
         -DARROW_BUILD_SHARED=OFF ^
@@ -70,6 +72,7 @@ if "%JOB%" == "Build_Debug" (
   pushd cpp\build-debug
 
   cmake -G "%GENERATOR%" ^
+        -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^
         -DARROW_BOOST_USE_SHARED=OFF ^
         -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
         -DARROW_BUILD_STATIC=OFF ^
diff --git a/ci/cpp-msvc-build-main.bat b/ci/cpp-msvc-build-main.bat
index e5eef1e..ef961b2 100644
--- a/ci/cpp-msvc-build-main.bat
+++ b/ci/cpp-msvc-build-main.bat
@@ -19,7 +19,7 @@
 @rem (i.e. for usual configurations)
 
 set ARROW_HOME=%CONDA_PREFIX%\Library
-set CMAKE_ARGS=
+set CMAKE_ARGS=-DARROW_VERBOSE_THIRDPARTY_BUILD=OFF
 
 if "%JOB%" == "Toolchain" (
     set CMAKE_ARGS=%CMAKE_ARGS% -DARROW_WITH_BZ2=ON
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index c35dd4a..bbac55b 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1053,38 +1053,38 @@ if (ARROW_WITH_ZSTD)
 # ZSTD
 
   if("${ZSTD_HOME}" STREQUAL "")
-    set(ZSTD_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-prefix/src/zstd_ep")
-    set(ZSTD_INCLUDE_DIR "${ZSTD_BUILD_DIR}/lib")
+    set(ZSTD_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-install")
+    set(ZSTD_INCLUDE_DIR "${ZSTD_PREFIX}/include")
+
+    set(ZSTD_CMAKE_ARGS
+        "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
+        "-DCMAKE_INSTALL_PREFIX=${ZSTD_PREFIX}"
+        "-DZSTD_BUILD_PROGRAMS=off"
+        "-DZSTD_BUILD_SHARED=off"
+        "-DZSTD_BUILD_STATIC=on"
+        "-DZSTD_MULTITHREAD_SUPPORT=off")
 
     if (MSVC)
+      set(ZSTD_STATIC_LIB "${ZSTD_PREFIX}/lib/zstd_static.lib")
       if (ARROW_USE_STATIC_CRT)
-        if (${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG")
-          set(ZSTD_RUNTIME_LIBRARY_LINKAGE "/p:RuntimeLibrary=MultiThreadedDebug")
-        else()
-          set(ZSTD_RUNTIME_LIBRARY_LINKAGE "/p:RuntimeLibrary=MultiThreaded")
-        endif()
+        set(ZSTD_CMAKE_ARGS ${ZSTD_CMAKE_ARGS} "-DZSTD_USE_STATIC_RUNTIME=on")
       endif()
-      set(ZSTD_STATIC_LIB "${ZSTD_BUILD_DIR}/build/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/libzstd_static.lib")
-      set(ZSTD_BUILD_COMMAND BUILD_COMMAND msbuild ${ZSTD_BUILD_DIR}/build/VS2010/zstd.sln /t:Build /v:minimal /p:Configuration=${CMAKE_BUILD_TYPE}
-                             ${ZSTD_RUNTIME_LIBRARY_LINKAGE} /p:Platform=x64 /p:PlatformToolset=v140
-                             /p:OutDir=${ZSTD_BUILD_DIR}/build/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/ /p:SolutionDir=${ZSTD_BUILD_DIR}/build/VS2010/ )
-      set(ZSTD_PATCH_COMMAND PATCH_COMMAND git --git-dir=. apply --verbose --whitespace=fix ${CMAKE_SOURCE_DIR}/build-support/zstd_msbuild_gl_runtimelibrary_params.patch)
     else()
-      set(ZSTD_STATIC_LIB "${ZSTD_BUILD_DIR}/lib/libzstd.a")
-      set(ZSTD_BUILD_COMMAND BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-zstd-lib.sh)
+      set(ZSTD_STATIC_LIB "${ZSTD_PREFIX}/lib/libzstd.a")
+      # Only pass our C flags on Unix as on MSVC it leads to a
+      # "incompatible command-line options" error
+      set(ZSTD_CMAKE_ARGS ${ZSTD_CMAKE_ARGS}
+          "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
+          "-DCMAKE_C_FLAGS=${EP_C_FLAGS}")
     endif()
 
     ExternalProject_Add(zstd_ep
-        URL ${ZSTD_SOURCE_URL}
-        ${EP_LOG_OPTIONS}
-        UPDATE_COMMAND ""
-        ${ZSTD_PATCH_COMMAND}
-        CONFIGURE_COMMAND ""
-        INSTALL_COMMAND ""
-        BINARY_DIR ${ZSTD_BUILD_DIR}
-        BUILD_BYPRODUCTS ${ZSTD_STATIC_LIB}
-        ${ZSTD_BUILD_COMMAND}
-        )
+      ${EP_LOG_OPTIONS}
+      CMAKE_ARGS ${ZSTD_CMAKE_ARGS}
+      SOURCE_SUBDIR "build/cmake"
+      INSTALL_DIR ${ZSTD_PREFIX}
+      URL ${ZSTD_SOURCE_URL}
+      BUILD_BYPRODUCTS "${ZSTD_STATIC_LIB}")
 
     set(ZSTD_VENDORED 1)
   else()
diff --git a/cpp/src/arrow/util/compression_zstd.cc b/cpp/src/arrow/util/compression_zstd.cc
index 4064f29..8bf02be 100644
--- a/cpp/src/arrow/util/compression_zstd.cc
+++ b/cpp/src/arrow/util/compression_zstd.cc
@@ -24,6 +24,7 @@
 #include <zstd.h>
 
 #include "arrow/status.h"
+#include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 
 using std::size_t;
@@ -34,6 +35,12 @@ namespace util {
 // XXX level = 1 probably doesn't compress very much
 constexpr int kZSTDDefaultCompressionLevel = 1;
 
+static Status ZSTDError(size_t ret, const char* prefix_msg) {
+  std::stringstream ss;
+  ss << prefix_msg << ZSTD_getErrorName(ret);
+  return Status::IOError(ss.str());
+}
+
 // ----------------------------------------------------------------------
 // ZSTD decompressor implementation
 
@@ -47,7 +54,7 @@ class ZSTDDecompressor : public Decompressor {
     finished_ = false;
     size_t ret = ZSTD_initDStream(stream_);
     if (ZSTD_isError(ret)) {
-      return ZSTDError(ret, "zstd init failed: ");
+      return ZSTDError(ret, "ZSTD init failed: ");
     } else {
       return Status::OK();
     }
@@ -69,7 +76,7 @@ class ZSTDDecompressor : public Decompressor {
     size_t ret;
     ret = ZSTD_decompressStream(stream_, &out_buf, &in_buf);
     if (ZSTD_isError(ret)) {
-      return ZSTDError(ret, "zstd decompress failed: ");
+      return ZSTDError(ret, "ZSTD decompress failed: ");
     }
     *bytes_read = static_cast<int64_t>(in_buf.pos);
     *bytes_written = static_cast<int64_t>(out_buf.pos);
@@ -81,12 +88,6 @@ class ZSTDDecompressor : public Decompressor {
   bool IsFinished() override { return finished_; }
 
  protected:
-  Status ZSTDError(size_t ret, const char* prefix_msg) {
-    std::stringstream ss;
-    ss << prefix_msg << ZSTD_getErrorName(ret);
-    return Status::IOError(ss.str());
-  }
-
   ZSTD_DStream* stream_;
   bool finished_;
 };
@@ -103,7 +104,7 @@ class ZSTDCompressor : public Compressor {
   Status Init() {
     size_t ret = ZSTD_initCStream(stream_, kZSTDDefaultCompressionLevel);
     if (ZSTD_isError(ret)) {
-      return ZSTDError(ret, "zstd init failed: ");
+      return ZSTDError(ret, "ZSTD init failed: ");
     } else {
       return Status::OK();
     }
@@ -119,12 +120,6 @@ class ZSTDCompressor : public Compressor {
              bool* should_retry) override;
 
  protected:
-  Status ZSTDError(size_t ret, const char* prefix_msg) {
-    std::stringstream ss;
-    ss << prefix_msg << ZSTD_getErrorName(ret);
-    return Status::IOError(ss.str());
-  }
-
   ZSTD_CStream* stream_;
 };
 
@@ -144,7 +139,7 @@ Status ZSTDCompressor::Compress(int64_t input_len, const uint8_t* input,
   size_t ret;
   ret = ZSTD_compressStream(stream_, &out_buf, &in_buf);
   if (ZSTD_isError(ret)) {
-    return ZSTDError(ret, "zstd compress failed: ");
+    return ZSTDError(ret, "ZSTD compress failed: ");
   }
   *bytes_read = static_cast<int64_t>(in_buf.pos);
   *bytes_written = static_cast<int64_t>(out_buf.pos);
@@ -162,7 +157,7 @@ Status ZSTDCompressor::Flush(int64_t output_len, uint8_t* output, int64_t* bytes
   size_t ret;
   ret = ZSTD_flushStream(stream_, &out_buf);
   if (ZSTD_isError(ret)) {
-    return ZSTDError(ret, "zstd flush failed: ");
+    return ZSTDError(ret, "ZSTD flush failed: ");
   }
   *bytes_written = static_cast<int64_t>(out_buf.pos);
   *should_retry = ret > 0;
@@ -180,7 +175,7 @@ Status ZSTDCompressor::End(int64_t output_len, uint8_t* output, int64_t* bytes_w
   size_t ret;
   ret = ZSTD_endStream(stream_, &out_buf);
   if (ZSTD_isError(ret)) {
-    return ZSTDError(ret, "zstd end failed: ");
+    return ZSTDError(ret, "ZSTD end failed: ");
   }
   *bytes_written = static_cast<int64_t>(out_buf.pos);
   *should_retry = ret > 0;
@@ -206,10 +201,20 @@ Status ZSTDCodec::MakeDecompressor(std::shared_ptr<Decompressor>* out) {
 
 Status ZSTDCodec::Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
                              uint8_t* output_buffer) {
-  int64_t decompressed_size =
-      ZSTD_decompress(output_buffer, static_cast<size_t>(output_len), input,
-                      static_cast<size_t>(input_len));
-  if (decompressed_size != output_len) {
+  if (output_buffer == nullptr) {
+    // We may pass a NULL 0-byte output buffer but some zstd versions demand
+    // a valid pointer: https://github.com/facebook/zstd/issues/1385
+    static uint8_t empty_buffer[1];
+    DCHECK_EQ(output_len, 0);
+    output_buffer = empty_buffer;
+  }
+
+  size_t ret = ZSTD_decompress(output_buffer, static_cast<size_t>(output_len), input,
+                               static_cast<size_t>(input_len));
+  if (ZSTD_isError(ret)) {
+    return ZSTDError(ret, "ZSTD decompression failed: ");
+  }
+  if (static_cast<int64_t>(ret) != output_len) {
     return Status::IOError("Corrupt ZSTD compressed data.");
   }
   return Status::OK();
@@ -223,12 +228,13 @@ int64_t ZSTDCodec::MaxCompressedLen(int64_t input_len,
 Status ZSTDCodec::Compress(int64_t input_len, const uint8_t* input,
                            int64_t output_buffer_len, uint8_t* output_buffer,
                            int64_t* output_length) {
-  *output_length =
+  size_t ret =
       ZSTD_compress(output_buffer, static_cast<size_t>(output_buffer_len), input,
                     static_cast<size_t>(input_len), kZSTDDefaultCompressionLevel);
-  if (ZSTD_isError(*output_length)) {
-    return Status::IOError("ZSTD compression failure.");
+  if (ZSTD_isError(ret)) {
+    return ZSTDError(ret, "ZSTD compression failed: ");
   }
+  *output_length = static_cast<int64_t>(ret);
   return Status::OK();
 }
 
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 8fbfb38..51a0c5c 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -34,5 +34,5 @@ RAPIDJSON_VERSION=v1.1.0
 SNAPPY_VERSION=1.1.3
 THRIFT_VERSION=0.11.0
 ZLIB_VERSION=1.2.8
-ZSTD_VERSION=v1.2.0
+ZSTD_VERSION=v1.3.7
 RE2_VERSION=2018-10-01