You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2017/07/03 20:39:14 UTC
arrow git commit: ARROW-600: ZSTD compression lib support
Repository: arrow
Updated Branches:
refs/heads/master e18abac19 -> cdee23c27
ARROW-600: ZSTD compression lib support
Author: Max Risukhin <ri...@gmail.com>
Closes #807 from MaxRis/ARROW-600 and squashes the following commits:
2fc4578 [Max Risukhin] ARROW-600: ZSTD compression lib support
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/cdee23c2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/cdee23c2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/cdee23c2
Branch: refs/heads/master
Commit: cdee23c27ac36f957512e33cc1ee49674c515dc8
Parents: e18abac
Author: Max Risukhin <ri...@gmail.com>
Authored: Mon Jul 3 22:39:09 2017 +0200
Committer: Uwe L. Korn <uw...@xhochy.com>
Committed: Mon Jul 3 22:39:09 2017 +0200
----------------------------------------------------------------------
cpp/CMakeLists.txt | 3 +-
cpp/build-support/build-zstd-lib.sh | 16 ++++++
cpp/cmake_modules/FindZSTD.cmake | 70 ++++++++++++++++++++++++
cpp/cmake_modules/ThirdpartyToolchain.cmake | 49 ++++++++++++++++-
cpp/src/arrow/util/compression-test.cc | 4 ++
cpp/src/arrow/util/compression.cc | 28 ++++++++++
cpp/src/arrow/util/compression.h | 16 +++++-
python/manylinux1/Dockerfile-x86_64_base | 8 +++
python/manylinux1/scripts/build_lz4.sh | 24 ++++++++
python/manylinux1/scripts/build_zstd.sh | 25 +++++++++
10 files changed, 239 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/cdee23c2/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5745338..28a3bb0 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -530,7 +530,8 @@ set(ARROW_STATIC_LINK_LIBS
brotli_enc
brotli_common
snappy
- zlib)
+ zlib
+ zstd_static)
add_dependencies(arrow_dependencies ${ARROW_STATIC_LINK_LIBS})
http://git-wip-us.apache.org/repos/asf/arrow/blob/cdee23c2/cpp/build-support/build-zstd-lib.sh
----------------------------------------------------------------------
diff --git a/cpp/build-support/build-zstd-lib.sh b/cpp/build-support/build-zstd-lib.sh
new file mode 100755
index 0000000..62805ba
--- /dev/null
+++ b/cpp/build-support/build-zstd-lib.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+export CFLAGS="${CFLAGS} -O3 -fPIC"
+make -j4
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/arrow/blob/cdee23c2/cpp/cmake_modules/FindZSTD.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindZSTD.cmake b/cpp/cmake_modules/FindZSTD.cmake
new file mode 100644
index 0000000..1fda29e
--- /dev/null
+++ b/cpp/cmake_modules/FindZSTD.cmake
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# - Find ZSTD (zstd.h, libzstd.a, libzstd.so, and libzstd.so.0)
+# This module defines
+# ZSTD_INCLUDE_DIR, directory containing headers
+# ZSTD_SHARED_LIB, path to libzstd shared library
+# ZSTD_STATIC_LIB, path to libzstd static library
+# ZSTD_FOUND, whether zstd has been found
+
+if( NOT "${ZSTD_HOME}" STREQUAL "")
+ file( TO_CMAKE_PATH "${ZSTD_HOME}" _native_path )
+ list( APPEND _zstd_roots ${_native_path} )
+elseif ( ZStd_HOME )
+ list( APPEND _zstd_roots ${ZStd_HOME} )
+endif()
+
+if (MSVC AND NOT ZSTD_MSVC_STATIC_LIB_SUFFIX)
+ set(ZSTD_MSVC_STATIC_LIB_SUFFIX "_static")
+endif()
+
+set(ZSTD_STATIC_LIB_SUFFIX
+ "${ZSTD_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+
+set(ZSTD_STATIC_LIB_NAME
+ ${CMAKE_STATIC_LIBRARY_PREFIX}zstd${ZSTD_STATIC_LIB_SUFFIX})
+
+if ( _zstd_roots )
+ find_path(ZSTD_INCLUDE_DIR NAMES zstd.h
+ PATHS ${_zstd_roots}
+ NO_DEFAULT_PATH
+ PATH_SUFFIXES "include" )
+ find_library(ZSTD_SHARED_LIB NAMES zstd
+ PATHS ${_zstd_roots}
+ NO_DEFAULT_PATH
+ PATH_SUFFIXES "lib" )
+ find_library(ZSTD_STATIC_LIB NAMES ${ZSTD_STATIC_LIB_NAME}
+ PATHS ${_zstd_roots}
+ NO_DEFAULT_PATH
+ PATH_SUFFIXES "lib" )
+else()
+ find_path(ZSTD_INCLUDE_DIR zstd.h
+ # make sure we don't accidentally pick up a different version
+ NO_CMAKE_SYSTEM_PATH
+ NO_SYSTEM_ENVIRONMENT_PATH)
+ find_library(ZSTD_SHARED_LIB zstd
+ NO_CMAKE_SYSTEM_PATH
+ NO_SYSTEM_ENVIRONMENT_PATH)
+ find_library(ZSTD_STATIC_LIB ${ZSTD_STATIC_LIB_NAME}
+ NO_CMAKE_SYSTEM_PATH
+ NO_SYSTEM_ENVIRONMENT_PATH)
+endif()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(ZSTD REQUIRED_VARS
+ ZSTD_SHARED_LIB ZSTD_STATIC_LIB ZSTD_INCLUDE_DIR)
http://git-wip-us.apache.org/repos/asf/arrow/blob/cdee23c2/cpp/cmake_modules/ThirdpartyToolchain.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 8573345..33447ae 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -28,6 +28,7 @@ set(JEMALLOC_VERSION "4.4.0")
set(SNAPPY_VERSION "1.1.3")
set(BROTLI_VERSION "v0.6.0")
set(LZ4_VERSION "1.7.5")
+set(ZSTD_VERSION "1.2.0")
string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
@@ -49,6 +50,7 @@ if (NOT "$ENV{ARROW_BUILD_TOOLCHAIN}" STREQUAL "")
set(ZLIB_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(BROTLI_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(LZ4_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
+ set(ZSTD_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
if (NOT DEFINED ENV{BOOST_ROOT})
# Since we have to set this in the environment, we check whether
@@ -89,6 +91,10 @@ if (DEFINED ENV{LZ4_HOME})
set(LZ4_HOME "$ENV{LZ4_HOME}")
endif()
+if (DEFINED ENV{ZSTD_HOME})
+ set(ZSTD_HOME "$ENV{ZSTD_HOME}")
+endif()
+
# ----------------------------------------------------------------------
# Find pthreads
@@ -653,8 +659,8 @@ if (NOT LZ4_FOUND)
set(LZ4_INCLUDE_DIR "${LZ4_BUILD_DIR}/lib")
if (MSVC)
- set(LZ4_STATIC_LIB "${LZ4_BUILD_DIR}/visual/VS2010/bin/x64_Release/liblz4_static.lib")
- set(LZ4_BUILD_COMMAND BUILD_COMMAND msbuild.exe /m /p:Configuration=Release /p:Platform=x64 /p:PlatformToolset=v140 /t:Build ${LZ4_BUILD_DIR}/visual/VS2010/lz4.sln)
+ set(LZ4_STATIC_LIB "${LZ4_BUILD_DIR}/visual/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/liblz4_static.lib")
+ set(LZ4_BUILD_COMMAND BUILD_COMMAND msbuild.exe /m /p:Configuration=${CMAKE_BUILD_TYPE} /p:Platform=x64 /p:PlatformToolset=v140 /t:Build ${LZ4_BUILD_DIR}/visual/VS2010/lz4.sln)
else()
set(LZ4_STATIC_LIB "${LZ4_BUILD_DIR}/lib/liblz4.a")
set(LZ4_BUILD_COMMAND BUILD_COMMAND make -j4)
@@ -682,3 +688,42 @@ ADD_THIRDPARTY_LIB(lz4_static
if (LZ4_VENDORED)
add_dependencies(lz4_static lz4_ep)
endif()
+
+# ----------------------------------------------------------------------
+# ZSTD
+
+find_package(ZSTD)
+if (NOT ZSTD_FOUND)
+ set(ZSTD_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-prefix/src/zstd_ep")
+ set(ZSTD_INCLUDE_DIR "${ZSTD_BUILD_DIR}/lib")
+
+ if (MSVC)
+ set(ZSTD_STATIC_LIB "${ZSTD_BUILD_DIR}/build/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/libzstd_static.lib")
+ set(ZSTD_BUILD_COMMAND BUILD_COMMAND msbuild ${ZSTD_BUILD_DIR}/build/VS2010/zstd.sln /t:Build /v:minimal /p:Configuration=${CMAKE_BUILD_TYPE} /p:Platform=x64 /p:PlatformToolset=v140 /p:OutDir=${ZSTD_BUILD_DIR}/build/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/ /p:SolutionDir=${ZSTD_BUILD_DIR}/build/VS2010/ )
+ else()
+ set(ZSTD_STATIC_LIB "${ZSTD_BUILD_DIR}/lib/libzstd.a")
+ set(ZSTD_BUILD_COMMAND BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-zstd-lib.sh)
+ endif()
+
+ ExternalProject_Add(zstd_ep
+ URL "https://github.com/facebook/zstd/archive/v${ZSTD_VERSION}.tar.gz"
+ UPDATE_COMMAND ""
+ PATCH_COMMAND ""
+ CONFIGURE_COMMAND ""
+ INSTALL_COMMAND ""
+ BINARY_DIR ${ZSTD_BUILD_DIR}
+ ${ZSTD_BUILD_COMMAND}
+ )
+
+ set(ZSTD_VENDORED 1)
+else()
+ set(ZSTD_VENDORED 0)
+endif()
+
+include_directories(SYSTEM ${ZSTD_INCLUDE_DIR})
+ADD_THIRDPARTY_LIB(zstd_static
+ STATIC_LIB ${ZSTD_STATIC_LIB})
+
+if (ZSTD_VENDORED)
+ add_dependencies(zstd_static zstd_ep)
+endif()
http://git-wip-us.apache.org/repos/asf/arrow/blob/cdee23c2/cpp/src/arrow/util/compression-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression-test.cc b/cpp/src/arrow/util/compression-test.cc
index 1a0e5d7..3b19a6d 100644
--- a/cpp/src/arrow/util/compression-test.cc
+++ b/cpp/src/arrow/util/compression-test.cc
@@ -86,4 +86,8 @@ TEST(TestCompressors, GZip) {
CheckCodec<GZipCodec>();
}
+TEST(TestCompressors, ZSTD) {
+ CheckCodec<ZSTDCodec>();
+}
+
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/cdee23c2/cpp/src/arrow/util/compression.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc
index 070f857..df1afa3 100644
--- a/cpp/src/arrow/util/compression.cc
+++ b/cpp/src/arrow/util/compression.cc
@@ -31,6 +31,7 @@
#include <brotli/encode.h>
#include <snappy.h>
#include <zlib.h>
+#include <zstd.h>
#include "arrow/status.h"
#include "arrow/util/logging.h"
@@ -329,4 +330,31 @@ Status BrotliCodec::Compress(int64_t input_len, const uint8_t* input,
return Status::OK();
}
+// ----------------------------------------------------------------------
+// ZSTD implementation
+
+Status ZSTDCodec::Decompress(
+ int64_t input_len, const uint8_t* input, int64_t output_len, uint8_t* output_buffer) {
+ int64_t decompressed_size = ZSTD_decompress(output_buffer,
+ static_cast<size_t>(output_len), input, static_cast<size_t>(input_len));
+ if (decompressed_size != output_len) {
+ return Status::IOError("Corrupt ZSTD compressed data.");
+ }
+ return Status::OK();
+}
+
+int64_t ZSTDCodec::MaxCompressedLen(int64_t input_len, const uint8_t* input) {
+ return ZSTD_compressBound(input_len);
+}
+
+Status ZSTDCodec::Compress(int64_t input_len, const uint8_t* input,
+ int64_t output_buffer_len, uint8_t* output_buffer, int64_t* output_length) {
+ *output_length = ZSTD_compress(output_buffer, static_cast<size_t>(output_buffer_len),
+ input, static_cast<size_t>(input_len), 1);
+ if (ZSTD_isError(*output_length)) {
+ return Status::IOError("ZSTD compression failure.");
+ }
+ return Status::OK();
+}
+
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/cdee23c2/cpp/src/arrow/util/compression.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression.h b/cpp/src/arrow/util/compression.h
index 6886d04..9e581d8 100644
--- a/cpp/src/arrow/util/compression.h
+++ b/cpp/src/arrow/util/compression.h
@@ -27,7 +27,7 @@
namespace arrow {
struct Compression {
- enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI };
+ enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, ZSTD };
};
class ARROW_EXPORT Codec {
@@ -104,6 +104,20 @@ class ARROW_EXPORT GZipCodec : public Codec {
std::unique_ptr<GZipCodecImpl> impl_;
};
+// ZSTD codec.
+class ARROW_EXPORT ZSTDCodec : public Codec {
+ public:
+ Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
+ uint8_t* output_buffer) override;
+
+ Status Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len,
+ uint8_t* output_buffer, int64_t* output_length) override;
+
+ int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) override;
+
+ const char* name() const override { return "zstd"; }
+};
+
} // namespace arrow
#endif
http://git-wip-us.apache.org/repos/asf/arrow/blob/cdee23c2/python/manylinux1/Dockerfile-x86_64_base
----------------------------------------------------------------------
diff --git a/python/manylinux1/Dockerfile-x86_64_base b/python/manylinux1/Dockerfile-x86_64_base
index 44a9888..cdd13e2 100644
--- a/python/manylinux1/Dockerfile-x86_64_base
+++ b/python/manylinux1/Dockerfile-x86_64_base
@@ -49,6 +49,14 @@ ADD scripts/build_snappy.sh /
RUN /build_snappy.sh
ENV SNAPPY_HOME /usr
+ADD scripts/build_lz4.sh /
+RUN /build_lz4.sh
+ENV LZ4_HOME /usr
+
+ADD scripts/build_zstd.sh /
+RUN /build_zstd.sh
+ENV ZSTD_HOME /usr
+
ADD scripts/build_ccache.sh /
RUN /build_ccache.sh
http://git-wip-us.apache.org/repos/asf/arrow/blob/cdee23c2/python/manylinux1/scripts/build_lz4.sh
----------------------------------------------------------------------
diff --git a/python/manylinux1/scripts/build_lz4.sh b/python/manylinux1/scripts/build_lz4.sh
new file mode 100755
index 0000000..5a25d3d
--- /dev/null
+++ b/python/manylinux1/scripts/build_lz4.sh
@@ -0,0 +1,24 @@
+#!/bin/bash -ex
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. See accompanying LICENSE file.
+
+export LZ4_VERSION="1.7.5"
+export PREFIX="/usr"
+export LDFLAGS="${LDFLAGS} -Wl,-rpath,${PREFIX}/lib -L${PREFIX}/lib"
+wget "https://github.com/lz4/lz4/archive/v${LZ4_VERSION}.tar.gz" -O lz4-${LZ4_VERSION}.tar.gz
+tar xf lz4-${LZ4_VERSION}.tar.gz
+pushd lz4-${LZ4_VERSION}
+
+make -j5 PREFIX=${PREFIX}
+make install PREFIX=$PREFIX
+popd
+rm -rf lz4-${LZ4_VERSION}.tar.gz lz4-${LZ4_VERSION}
http://git-wip-us.apache.org/repos/asf/arrow/blob/cdee23c2/python/manylinux1/scripts/build_zstd.sh
----------------------------------------------------------------------
diff --git a/python/manylinux1/scripts/build_zstd.sh b/python/manylinux1/scripts/build_zstd.sh
new file mode 100755
index 0000000..268e5c8
--- /dev/null
+++ b/python/manylinux1/scripts/build_zstd.sh
@@ -0,0 +1,25 @@
+#!/bin/bash -ex
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. See accompanying LICENSE file.
+
+export ZSTD_VERSION="1.2.0"
+export CFLAGS="${CFLAGS} -O3 -fPIC"
+export PREFIX="/usr"
+export LDFLAGS="${LDFLAGS} -Wl,-rpath,${PREFIX}/lib"
+wget "https://github.com/facebook/zstd/archive/v${ZSTD_VERSION}.tar.gz" -O zstd-${ZSTD_VERSION}.tar.gz
+tar xf zstd-${ZSTD_VERSION}.tar.gz
+pushd zstd-${ZSTD_VERSION}
+
+make -j5
+make install PREFIX=$PREFIX
+popd
+rm -rf zstd-${ZSTD_VERSION}.tar.gz zstd-${ZSTD_VERSION}