You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2019/12/13 00:51:22 UTC

[incubator-doris] branch master updated: Fix RLE encoding/decoding bug upon large negative number. (#2448)

This is an automated email from the ASF dual-hosted git repository.

zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 14293b3  Fix RLE encoding/decoding bug upon large negative number. (#2448)
14293b3 is described below

commit 14293b39f3106736d4a7aa0c88b75955c3283b07
Author: lichaoyong <li...@baidu.com>
AuthorDate: Fri Dec 13 08:51:05 2019 +0800

    Fix RLE encoding/decoding bug upon large negative number. (#2448)
    
    Doris have use RLE to encoding/decoding integer.
    Four types are comprised of the RLE encoding/decoding algorithm.
    Short Repeat : used for short repeating integer sequences.
    Direct : used for integer sequences whose values have a relatively constant bit width.
    Patched Base : used for integer sequences whose bit widths varies a lot.
    Delta : used for monotonically increasing or decreasing sequences.
    
    This bug occurs in Patched Base Type for large negative number.
    In patched base, base value is stored 1 to 8 bytes and encoding to 0 ~ 7.
    If the base value is 8 byte, the encoding value for base width should be 7.
    But now will encoding to 8, this is problem.
    It will result in inconsistent data with loaded data because wrong encoding procedure.
    In extreme case, the BE process will be cored dump because illegal address.
---
 be/src/olap/rowset/run_length_integer_reader.cpp |  2 +-
 be/src/olap/rowset/run_length_integer_writer.cpp |  5 ++-
 be/test/olap/run_length_integer_test.cpp         | 42 ++++++++++++++++++++++--
 3 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/be/src/olap/rowset/run_length_integer_reader.cpp b/be/src/olap/rowset/run_length_integer_reader.cpp
index 2afa536..888ae52 100644
--- a/be/src/olap/rowset/run_length_integer_reader.cpp
+++ b/be/src/olap/rowset/run_length_integer_reader.cpp
@@ -432,4 +432,4 @@ OLAPStatus RunLengthIntegerReader::skip(uint64_t num_values) {
     return res;
 }
 
-}  // namespace doris
\ No newline at end of file
+}  // namespace doris
diff --git a/be/src/olap/rowset/run_length_integer_writer.cpp b/be/src/olap/rowset/run_length_integer_writer.cpp
index 44cb6fe..da823e9 100644
--- a/be/src/olap/rowset/run_length_integer_writer.cpp
+++ b/be/src/olap/rowset/run_length_integer_writer.cpp
@@ -394,6 +394,9 @@ OLAPStatus RunLengthIntegerWriter::_write_patched_base_values() {
     // find the number of bytes required for base.
     // The additional bit is used to store the sign of the base value
     uint32_t base_width = ser::find_closet_num_bits(_min) + 1;
+    if (base_width > 64) {
+        base_width = 64;
+    }
     uint32_t base_bytes = base_width % 8 == 0 ? base_width / 8 : (base_width / 8 + 1);
 
     if (is_negative) {
@@ -730,4 +733,4 @@ void RunLengthIntegerWriter::get_position(PositionEntryWriter* index_entry, bool
     }
 }
 
-}  // namespace doris
\ No newline at end of file
+}  // namespace doris
diff --git a/be/test/olap/run_length_integer_test.cpp b/be/test/olap/run_length_integer_test.cpp
index 9f56cc1..8b2cd8d 100755
--- a/be/test/olap/run_length_integer_test.cpp
+++ b/be/test/olap/run_length_integer_test.cpp
@@ -340,10 +340,8 @@ TEST_F(TestRunLengthUnsignInteger, PatchedBaseEncoding2) {
     }
     
     ASSERT_FALSE(_reader->has_next());
-    
 }
 
-
 class TestRunLengthSignInteger : public testing::Test {
 public:
     TestRunLengthSignInteger() {
@@ -740,6 +738,46 @@ TEST_F(TestRunLengthSignInteger, PatchedBaseEncoding5) {
     
 }
 
+// this case use to test large negative number.
+// The minimum of data sequence is -84742859065569280,
+// the positive form is 84742859065569280.
+// It is a 57 bit width integer and used 8 byte to encoding it.
+// The byte number is encoding as (8-1) = 7, in 111 binary form.
+TEST_F(TestRunLengthSignInteger, PatchedBaseEncoding6) {
+    // write data
+    int64_t write_data[] = {-17887939293638656, -15605417571528704,
+                            -15605417571528704, -13322895849418752,
+                            -13322895849418752, -84742859065569280,
+                            -15605417571528704, -13322895849418752,
+                            -13322895849418752, -15605417571528704,
+                            -13322895849418752, -13322895849418752,
+                            -15605417571528704, -15605417571528704,
+                            -13322895849418752, -13322895849418752,
+                            -15605417571528704, -15605417571528704,
+                            -13322895849418752, -13322895849418752,
+                            -11040374127308800, -15605417571528704,
+                            -13322895849418752, -13322895849418752,
+                            -15605417571528704, -15605417571528704,
+                            -13322895849418752, -13322895849418752,
+                            -15605417571528704, -13322895849418752};
+    for (int32_t i = 0; i < 30; i++) {
+        ASSERT_EQ(OLAP_SUCCESS, _writer->write(write_data[i]));
+    }
+    ASSERT_EQ(OLAP_SUCCESS, _writer->flush());
+
+    // read data
+    CreateReader();
+
+    for (int32_t i = 0; i < 30; i++) {
+        ASSERT_TRUE(_reader->has_next());
+        int64_t value = 0;
+        ASSERT_EQ(OLAP_SUCCESS, _reader->next(&value));
+        ASSERT_EQ(value, write_data[i]);
+    }
+
+    ASSERT_FALSE(_reader->has_next());
+}
+
 TEST_F(TestRunLengthSignInteger, DirectEncodingForDeltaOverflows1) {
     // write data
     int64_t write_data[] = {4513343538618202711, 2911390882471569739, -9181829309989854913};


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org