You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2017/07/06 12:33:56 UTC

arrow git commit: ARROW-599: [C++] Lz4 compression codec support

Repository: arrow
Updated Branches:
  refs/heads/master 00a7d55cc -> 83a4405ea


ARROW-599: [C++] Lz4 compression codec support

Author: Max Risukhin <ri...@gmail.com>

Closes #813 from MaxRis/ARROW-599 and squashes the following commits:

434a238 [Max Risukhin] ARROW-599: [C++] Lz4 compression codec support


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/83a4405e
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/83a4405e
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/83a4405e

Branch: refs/heads/master
Commit: 83a4405ea0bd1696aeec7677edaef8671faed1ea
Parents: 00a7d55
Author: Max Risukhin <ri...@gmail.com>
Authored: Thu Jul 6 14:33:52 2017 +0200
Committer: Uwe L. Korn <uw...@xhochy.com>
Committed: Thu Jul 6 14:33:52 2017 +0200

----------------------------------------------------------------------
 cpp/CMakeLists.txt                          |  3 ++-
 cpp/build-support/build-lz4-lib.sh          | 16 +++++++++++++++
 cpp/cmake_modules/ThirdpartyToolchain.cmake |  2 +-
 cpp/src/arrow/util/compression-test.cc      |  4 ++++
 cpp/src/arrow/util/compression.cc           | 26 ++++++++++++++++++++++++
 cpp/src/arrow/util/compression.h            | 16 ++++++++++++++-
 python/manylinux1/scripts/build_lz4.sh      |  1 +
 7 files changed, 65 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 76d89ce..6d01fd9 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -533,7 +533,8 @@ set(ARROW_STATIC_LINK_LIBS
   brotli_common
   snappy
   zlib
-  zstd_static)
+  zstd_static
+  lz4_static)
 
 add_dependencies(arrow_dependencies ${ARROW_STATIC_LINK_LIBS})
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/cpp/build-support/build-lz4-lib.sh
----------------------------------------------------------------------
diff --git a/cpp/build-support/build-lz4-lib.sh b/cpp/build-support/build-lz4-lib.sh
new file mode 100755
index 0000000..62805ba
--- /dev/null
+++ b/cpp/build-support/build-lz4-lib.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+export CFLAGS="${CFLAGS} -O3 -fPIC"
+make -j4
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/cpp/cmake_modules/ThirdpartyToolchain.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 2be7b5a..3eef2f7 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -623,7 +623,7 @@ if (NOT LZ4_FOUND)
     set(LZ4_BUILD_COMMAND BUILD_COMMAND msbuild.exe /m /p:Configuration=${CMAKE_BUILD_TYPE} /p:Platform=x64 /p:PlatformToolset=v140 /t:Build ${LZ4_BUILD_DIR}/visual/VS2010/lz4.sln)
   else()
     set(LZ4_STATIC_LIB "${LZ4_BUILD_DIR}/lib/liblz4.a")
-    set(LZ4_BUILD_COMMAND BUILD_COMMAND make -j4)
+    set(LZ4_BUILD_COMMAND BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-lz4-lib.sh)
   endif()
 
   ExternalProject_Add(lz4_ep

http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/cpp/src/arrow/util/compression-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression-test.cc b/cpp/src/arrow/util/compression-test.cc
index 3b19a6d..56dcc2a 100644
--- a/cpp/src/arrow/util/compression-test.cc
+++ b/cpp/src/arrow/util/compression-test.cc
@@ -90,4 +90,8 @@ TEST(TestCompressors, ZSTD) {
   CheckCodec<ZSTDCodec>();
 }
 
+TEST(TestCompressors, Lz4) {
+  CheckCodec<Lz4Codec>();
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/cpp/src/arrow/util/compression.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc
index df1afa3..0f17e7c 100644
--- a/cpp/src/arrow/util/compression.cc
+++ b/cpp/src/arrow/util/compression.cc
@@ -29,6 +29,7 @@
 
 #include <brotli/decode.h>
 #include <brotli/encode.h>
+#include <lz4.h>
 #include <snappy.h>
 #include <zlib.h>
 #include <zstd.h>
@@ -357,4 +358,29 @@ Status ZSTDCodec::Compress(int64_t input_len, const uint8_t* input,
   return Status::OK();
 }
 
+// ----------------------------------------------------------------------
+// Lz4 implementation
+
+Status Lz4Codec::Decompress(
+    int64_t input_len, const uint8_t* input, int64_t output_len, uint8_t* output_buffer) {
+  int64_t decompressed_size = LZ4_decompress_safe(reinterpret_cast<const char*>(input),
+      reinterpret_cast<char*>(output_buffer), static_cast<int>(input_len),
+      static_cast<int>(output_len));
+  if (decompressed_size < 1) { return Status::IOError("Corrupt Lz4 compressed data."); }
+  return Status::OK();
+}
+
+int64_t Lz4Codec::MaxCompressedLen(int64_t input_len, const uint8_t* input) {
+  return LZ4_compressBound(static_cast<int>(input_len));
+}
+
+Status Lz4Codec::Compress(int64_t input_len, const uint8_t* input,
+    int64_t output_buffer_len, uint8_t* output_buffer, int64_t* output_length) {
+  *output_length = LZ4_compress_default(reinterpret_cast<const char*>(input),
+      reinterpret_cast<char*>(output_buffer), static_cast<int>(input_len),
+      static_cast<int>(output_buffer_len));
+  if (*output_length < 1) { return Status::IOError("Lz4 compression failure."); }
+  return Status::OK();
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/cpp/src/arrow/util/compression.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/compression.h b/cpp/src/arrow/util/compression.h
index 9e581d8..d382153 100644
--- a/cpp/src/arrow/util/compression.h
+++ b/cpp/src/arrow/util/compression.h
@@ -27,7 +27,7 @@
 namespace arrow {
 
 struct Compression {
-  enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, ZSTD };
+  enum type { UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, ZSTD, LZ4 };
 };
 
 class ARROW_EXPORT Codec {
@@ -118,6 +118,20 @@ class ARROW_EXPORT ZSTDCodec : public Codec {
   const char* name() const override { return "zstd"; }
 };
 
+// Lz4 codec.
+class ARROW_EXPORT Lz4Codec : public Codec {
+ public:
+  Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
+      uint8_t* output_buffer) override;
+
+  Status Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len,
+      uint8_t* output_buffer, int64_t* output_length) override;
+
+  int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) override;
+
+  const char* name() const override { return "lz4"; }
+};
+
 }  // namespace arrow
 
 #endif

http://git-wip-us.apache.org/repos/asf/arrow/blob/83a4405e/python/manylinux1/scripts/build_lz4.sh
----------------------------------------------------------------------
diff --git a/python/manylinux1/scripts/build_lz4.sh b/python/manylinux1/scripts/build_lz4.sh
index 5a25d3d..975a301 100755
--- a/python/manylinux1/scripts/build_lz4.sh
+++ b/python/manylinux1/scripts/build_lz4.sh
@@ -13,6 +13,7 @@
 
 export LZ4_VERSION="1.7.5"
 export PREFIX="/usr"
+export CFLAGS="${CFLAGS} -O3 -fPIC"
 export LDFLAGS="${LDFLAGS} -Wl,-rpath,${PREFIX}/lib -L${PREFIX}/lib"
 wget "https://github.com/lz4/lz4/archive/v${LZ4_VERSION}.tar.gz" -O lz4-${LZ4_VERSION}.tar.gz
 tar xf lz4-${LZ4_VERSION}.tar.gz