You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@orc.apache.org by GitBox <gi...@apache.org> on 2021/04/08 02:31:37 UTC

[GitHub] [orc] wgtmac commented on a change in pull request #679: ORC-780: Add LZ4 Compression to the C++ Writer

wgtmac commented on a change in pull request #679:
URL: https://github.com/apache/orc/pull/679#discussion_r609218500



##########
File path: c++/src/Compression.cc
##########
@@ -897,6 +901,70 @@ DIAGNOSTIC_POP
     return true;
   }
 
+  /**
+   * LZ4 block compression
+   */
+  class Lz4CompressionSteam: public BlockCompressionStream {
+  public:
+    Lz4CompressionSteam(OutputStream * outStream,
+                        int compressionLevel,
+                        uint64_t capacity,
+                        uint64_t blockSize,
+                        MemoryPool& pool)
+                        : BlockCompressionStream(outStream,
+                                                 compressionLevel,
+                                                 capacity,
+                                                 blockSize,
+                                                 pool) {
+      this->init();
+    }
+
+    virtual std::string getName() const override {
+      return "Lz4CompressionStream";
+    }
+    
+    virtual ~Lz4CompressionSteam() override {
+      this->end();
+    }
+
+  protected:
+    virtual uint64_t doBlockCompression() override;
+
+    virtual uint64_t estimateMaxCompressionSize() override {
+      return static_cast<uint64_t>(LZ4_compressBound(bufferSize));
+    }
+
+  private:
+    void init();
+    void end();
+    LZ4_stream_t *state;
+  };
+
+  uint64_t Lz4CompressionSteam::doBlockCompression() {
+    int result = LZ4_compress_fast_extState(static_cast<void*>(state),
+                                            reinterpret_cast<const char*>(rawInputBuffer.data()),
+                                            reinterpret_cast<char*>(compressorBuffer.data()),
+                                            bufferSize,
+                                            static_cast<int>(compressorBuffer.size()),
+                                            level);
+    if (result == 0) {
+      throw std::runtime_error("Error during block compression using lz4.");
+    }
+    return static_cast<uint64_t>(result);
+  }
+
+  void Lz4CompressionSteam::init() {
+    state = static_cast<LZ4_stream_t*>(malloc(static_cast<size_t>(LZ4_sizeofState())));

Review comment:
       why not use LZ4_createStream() to create one and free it via LZ4_freeStream() ?

##########
File path: c++/src/Compression.cc
##########
@@ -36,6 +36,10 @@
 #define ZSTD_CLEVEL_DEFAULT 3
 #endif
 
+#ifndef LZ4_ACCELERATION_MAX
+#define LZ4_ACCELERATION_MAX 65537

Review comment:
       Why set to 65537? Will it overflows to be a negative number as the acceleration parameter used in lz4 is of type int.

##########
File path: c++/src/Compression.cc
##########
@@ -1064,9 +1132,15 @@ DIAGNOSTIC_PUSH
         (new ZSTDCompressionStream(
           outStream, level, bufferCapacity, compressionBlockSize, pool));
     }
+    case CompressionKind_LZ4: {
+      int level = (strategy == CompressionStrategy_SPEED) ?
+              LZ4_ACCELERATION_MAX : 1;

Review comment:
       replace magic number 1 with macro ACCELERATION_DEFAULT defined in lz4.h




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org