You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2020/04/18 10:19:53 UTC
[arrow] branch master updated: ARROW-8506: [C++] Add tests to verify the encoded stream of RLE with bit_width > 8

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new c152ab6  ARROW-8506: [C++] Add tests to verify the encoded stream of RLE with bit_width > 8
c152ab6 is described below

commit c152ab684c04d688e0f4d680e3b3da4afc2cde3e
Author: Kazuaki Ishizaki <is...@jp.ibm.com>
AuthorDate: Sat Apr 18 12:19:29 2020 +0200

    ARROW-8506: [C++] Add tests to verify the encoded stream of RLE with bit_width > 8
    
    This PR adds test cases to verify whether the encoded values are stored in a little-endian format when RLE with bit_width > 8 is applied. Parquet encoder/decoder handles the value as little-endian as follows:
    https://github.com/apache/parquet-mr/blob/master/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java#L244
    https://github.com/apache/parquet-mr/blob/master/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java#L87
    
    There is no test of this type in the current test cases of Rle.
    
    Closes #6976 from kiszk/ARROW-8506
    
    Authored-by: Kazuaki Ishizaki <is...@jp.ibm.com>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 cpp/src/arrow/util/rle_encoding_test.cc | 38 +++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc
index b9e62b1..4499565 100644
--- a/cpp/src/arrow/util/rle_encoding_test.cc
+++ b/cpp/src/arrow/util/rle_encoding_test.cc
@@ -306,6 +306,44 @@ TEST(Rle, SpecificSequences) {
     ValidateRle(values, width, nullptr,
                 1 + static_cast<int>(BitUtil::CeilDiv(width * num_values, 8)));
   }
+
+  // Test 16-bit values to confirm encoded values are stored in little endian
+  values.resize(28);
+  for (int i = 0; i < 16; ++i) {
+    values[i] = 0x55aa;
+  }
+  for (int i = 16; i < 28; ++i) {
+    values[i] = 0xaa55;
+  }
+  expected_buffer[0] = (16 << 1);
+  expected_buffer[1] = 0xaa;
+  expected_buffer[2] = 0x55;
+  expected_buffer[3] = (12 << 1);
+  expected_buffer[4] = 0x55;
+  expected_buffer[5] = 0xaa;
+
+  ValidateRle(values, 16, expected_buffer, 6);
+
+  // Test 32-bit values to confirm encoded values are stored in little endian
+  values.resize(28);
+  for (int i = 0; i < 16; ++i) {
+    values[i] = 0x555aaaa5;
+  }
+  for (int i = 16; i < 28; ++i) {
+    values[i] = 0x5aaaa555;
+  }
+  expected_buffer[0] = (16 << 1);
+  expected_buffer[1] = 0xa5;
+  expected_buffer[2] = 0xaa;
+  expected_buffer[3] = 0x5a;
+  expected_buffer[4] = 0x55;
+  expected_buffer[5] = (12 << 1);
+  expected_buffer[6] = 0x55;
+  expected_buffer[7] = 0xa5;
+  expected_buffer[8] = 0xaa;
+  expected_buffer[9] = 0x5a;
+
+  ValidateRle(values, 32, expected_buffer, 10);
 }
 
 // ValidateRle on 'num_vals' values with width 'bit_width'. If 'value' != -1, that value