You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/08/08 16:05:29 UTC

[impala] 01/02: IMPALA-8840: Check failed: num_bytes <= sizeof(T) (5 vs. 4)

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit ef796639451af63bc9f4dbdc2a79bfc2f5048a93
Author: Daniel Becker <da...@cloudera.com>
AuthorDate: Wed Aug 7 14:12:51 2019 +0200

    IMPALA-8840: Check failed: num_bytes <= sizeof(T) (5 vs. 4)
    
    The actual DCHECK failure was fixed by the change for IMPALA-8833.
    
    Added a DCHECK to RleBatchDecoder so that it does not accept bit widths
    higher than the width of its type parameter.
    
    Also preventing UnpackAndDecodeValues from using higher bit widths than
    32, the width of the dictionary index type, with a static assert. This
    also reduces compile time because the compiler does not have to generate
    code for invalid bit widths.
    
    Change-Id: I93461ba2cabb5ec7e0b65dcd62844fcbfa597d16
    Reviewed-on: http://gerrit.cloudera.org:8080/14029
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/util/bit-packing.h        |  1 +
 be/src/util/bit-packing.inline.h |  7 +++++--
 be/src/util/dict-test.cc         |  2 +-
 be/src/util/rle-encoding.h       | 10 ++++++----
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/be/src/util/bit-packing.h b/be/src/util/bit-packing.h
index d449048..140525b 100644
--- a/be/src/util/bit-packing.h
+++ b/be/src/util/bit-packing.h
@@ -47,6 +47,7 @@ namespace impala {
 class BitPacking {
  public:
   static constexpr int MAX_BITWIDTH = sizeof(uint64_t) * 8;
+  static constexpr int MAX_DICT_BITWIDTH = sizeof(uint32_t) * 8;
 
   /// Unpack bit-packed values with 'bit_width' from 'in' to 'out'. Keeps unpacking until
   /// either all 'in_bytes' are read or 'num_values' values are unpacked. 'out' must have
diff --git a/be/src/util/bit-packing.inline.h b/be/src/util/bit-packing.inline.h
index 1e8d39a..a279fa7 100644
--- a/be/src/util/bit-packing.inline.h
+++ b/be/src/util/bit-packing.inline.h
@@ -105,8 +105,8 @@ std::pair<const uint8_t*, int64_t> BitPacking::UnpackAndDecodeValues(int bit_wid
         in, in_bytes, dict, dict_len, num_values, out, stride, decode_error);
 
   switch (bit_width) {
-    // Expand cases from 0 to 64.
-    BOOST_PP_REPEAT_FROM_TO(0, 65, UNPACK_VALUES_CASE, ignore);
+    // Expand cases from 0 to MAX_DICT_BITWIDTH.
+    BOOST_PP_REPEAT_FROM_TO(0, 33, UNPACK_VALUES_CASE, ignore);
     default:
       DCHECK(false);
       return std::make_pair(nullptr, -1);
@@ -271,6 +271,9 @@ const uint8_t* BitPacking::UnpackAndDecode32Values(const uint8_t* __restrict__ i
   // TODO: this could be optimised further by using SIMD instructions.
   // https://lemire.me/blog/2016/08/25/faster-dictionary-decoding-with-simd-instructions/
 
+  static_assert(BIT_WIDTH <= MAX_DICT_BITWIDTH,
+      "Too high bit width for dictionary index.");
+
   // Call UnpackValue() and DecodeValue() for 0 <= i < 32.
 #pragma push_macro("DECODE_VALUE_CALL")
 #define DECODE_VALUE_CALL(ignore1, i, ignore2)               \
diff --git a/be/src/util/dict-test.cc b/be/src/util/dict-test.cc
index 7fbdcef..875d4c3 100644
--- a/be/src/util/dict-test.cc
+++ b/be/src/util/dict-test.cc
@@ -284,7 +284,7 @@ TEST(DictTest, SetDataInvalidBitwidthFails) {
   for (int i = 0; i < high_bit_width; i++) {
     buffer[0] = i;
     Status status = decoder.SetData(buffer, 5);
-    EXPECT_TRUE(status.ok());
+    EXPECT_OK(status);
   }
 
   // Reject too high bit widths.
diff --git a/be/src/util/rle-encoding.h b/be/src/util/rle-encoding.h
index 322b5d7..4da9fd9 100644
--- a/be/src/util/rle-encoding.h
+++ b/be/src/util/rle-encoding.h
@@ -82,10 +82,11 @@ namespace impala {
 
 /// RLE decoder with a batch-oriented interface that enables fast decoding.
 /// Users of this class must first initialize the class to point to a buffer of
-/// RLE-encoded data, passed into the constructor or Reset(). Then they can
-/// decode data by checking NextNumRepeats()/NextNumLiterals() to see if the
-/// next run is a repeated or literal run, then calling GetRepeatedValue()
-/// or GetLiteralValues() respectively to read the values.
+/// RLE-encoded data, passed into the constructor or Reset(). The provided
+/// bit_width must be at most min(sizeof(T) * 8, BatchedBitReader::MAX_BITWIDTH).
+/// Then they can decode data by checking NextNumRepeats()/NextNumLiterals() to
+/// see if the next run is a repeated or literal run, then calling
+/// GetRepeatedValue() or GetLiteralValues() respectively to read the values.
 ///
 /// End-of-input is signalled by NextNumRepeats() == NextNumLiterals() == 0.
 /// Other decoding errors are signalled by functions returning false. If an
@@ -495,6 +496,7 @@ inline void RleBatchDecoder<T>::Reset(uint8_t* buffer, int buffer_len, int bit_w
   DCHECK(buffer != nullptr);
   DCHECK_GE(buffer_len, 0);
   DCHECK_GE(bit_width, 0);
+  DCHECK_LE(bit_width, sizeof(T) * 8);
   DCHECK_LE(bit_width, BatchedBitReader::MAX_BITWIDTH);
   bit_reader_.Reset(buffer, buffer_len);
   bit_width_ = bit_width;