You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/11/07 22:52:12 UTC

[orc] branch branch-1.7 updated: ORC-1041: Use `memcpy` during LZO decompression (#958)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.7 by this push:
     new 42d11e9  ORC-1041: Use `memcpy` during LZO decompression (#958)
42d11e9 is described below

commit 42d11e9b8de906db5450ff4142fc53f7fc73f3b4
Author: Yiqun Zhang <gu...@gmail.com>
AuthorDate: Thu Nov 4 23:50:22 2021 +0800

    ORC-1041: Use `memcpy` during LZO decompression (#958)
    
    ### What changes were proposed in this pull request?
    
    This pr is aimed to fix the implementation of copy data blocks during LZO decompression.
    
    `*reinterpret_cast< int64_t*>(output) = *reinterpret_cast< int64_t*>(matchAddress);`  can lead to unexpected behavior, and in failed test cases it does not appear to be an atomic operation.
    
    This pr uses memcpy instead of the above statement.
    
    Here are the performance benchmarks, where `memcpy` is basically the same as `reinterpret_cast` + `assignment`.  The newer compilers outperform unfolded assignment, so here is a screenshot of the results with only some of the parameters, which you can reproduce with the following test code at https://quick-bench.com/
    
    ![WX20211104-104627](https://user-images.githubusercontent.com/4069905/140250827-6282739b-c060-43fa-b348-87ede15129fc.png)
    ![WX20211104-105010](https://user-images.githubusercontent.com/4069905/140250854-cf6da388-18d8-42f0-8cd6-18468633acc3.png)
    ![WX20211104-105348](https://user-images.githubusercontent.com/4069905/140250863-6c99cfcb-0b72-4ee0-a6b0-ac31344ac771.png)
    
    ```c++
    #include <string.h>
    
    static void use_memcpy(benchmark::State& state) {
      auto size = state.range(0);
      char buf[size];
      for (int i = 0; i < 8; ++i) {
        buf[i] = 'a';
      }
      for (auto _ : state) {
        char *output = buf + 8;
        char *matchAddress = buf;
        char *matchOutputLimit = buf + size;
        while (output < matchOutputLimit) {
          memcpy(output, matchAddress, 8);
          matchAddress += 8;
          output += 8;
        }
      }
    }
    
    static void use_expanded_assignment(benchmark::State& state) {
      auto size = state.range(0);
      char buf[size];
      for (int i = 0; i < 8; ++i) {
        buf[i] = 'a';
      }
      for (auto _ : state) {
        char *output = buf + 8;
        char *matchAddress = buf;
        char *matchOutputLimit = buf + size;
        while (output < matchOutputLimit) {
          output[0] = *matchOutputLimit;
          output[1] = *(matchOutputLimit + 1);
          output[2] = *(matchOutputLimit + 2);
          output[3] = *(matchOutputLimit + 3);
          output[4] = *(matchOutputLimit + 4);
          output[5] = *(matchOutputLimit + 5);
          output[6] = *(matchOutputLimit + 6);
          output[7] = *(matchOutputLimit + 7);
          matchAddress += 8;
          output += 8;
        }
      }
    }
    
    static void use_reinterpret_assignment(benchmark::State& state) {
      auto size = state.range(0);
      char buf[size];
      for (int i = 0; i < 8; ++i) {
        buf[i] = 'a';
      }
      for (auto _ : state) {
        char *output = buf + 8;
        char *matchAddress = buf;
        char *matchOutputLimit = buf + size;
        while (output < matchOutputLimit) {
          *reinterpret_cast<int64_t*>(output) =
                    *reinterpret_cast<int64_t*>(matchAddress);
          matchAddress += 8;
          output += 8;
        }
      }
    }
    
    BENCHMARK(use_memcpy)->Arg(100000);
    
    BENCHMARK(use_expanded_assignment)->Arg(100000);
    
    BENCHMARK(use_reinterpret_assignment)->Arg(100000);
    ```
    
    ### Why are the changes needed?
    
    Fix the bug of LZO decompression.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    (cherry picked from commit 502661aee2b1895ba4ff652297bf3a5f4ea8955a)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 c++/src/LzoDecompressor.cc | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/c++/src/LzoDecompressor.cc b/c++/src/LzoDecompressor.cc
index d1ba183..21bf194 100644
--- a/c++/src/LzoDecompressor.cc
+++ b/c++/src/LzoDecompressor.cc
@@ -312,13 +312,11 @@ namespace orc {
               output += SIZE_OF_INT;
               matchAddress += increment32;
 
-              *reinterpret_cast<int32_t*>(output) =
-                *reinterpret_cast<int32_t*>(matchAddress);
+              memcpy(output, matchAddress, SIZE_OF_INT);
               output += SIZE_OF_INT;
               matchAddress -= decrement64;
             } else {
-              *reinterpret_cast<int64_t*>(output) =
-                *reinterpret_cast<int64_t*>(matchAddress);
+              memcpy(output, matchAddress, SIZE_OF_LONG);
               matchAddress += SIZE_OF_LONG;
               output += SIZE_OF_LONG;
             }
@@ -329,8 +327,7 @@ namespace orc {
               }
 
               while (output < fastOutputLimit) {
-                *reinterpret_cast<int64_t*>(output) =
-                  *reinterpret_cast<int64_t*>(matchAddress);
+                memcpy(output, matchAddress, SIZE_OF_LONG);
                 matchAddress += SIZE_OF_LONG;
                 output += SIZE_OF_LONG;
               }
@@ -340,8 +337,7 @@ namespace orc {
               }
             } else {
               while (output < matchOutputLimit) {
-                *reinterpret_cast<int64_t*>(output) =
-                  *reinterpret_cast<int64_t*>(matchAddress);
+                memcpy(output, matchAddress, SIZE_OF_LONG);
                 matchAddress += SIZE_OF_LONG;
                 output += SIZE_OF_LONG;
               }
@@ -366,8 +362,7 @@ namespace orc {
           // fast copy. We may over-copy but there's enough room in input
           // and output to not overrun them
           do {
-            *reinterpret_cast<int64_t*>(output) =
-              *reinterpret_cast<const int64_t*>(input);
+            memcpy(output, input, SIZE_OF_LONG);
             input += SIZE_OF_LONG;
             output += SIZE_OF_LONG;
           } while (output < literalOutputLimit);