You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/08/07 03:39:12 UTC

[impala] branch master updated: IMPALA-8833: Check failed in BatchedBitReader::UnpackBatch()

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 33f1e86  IMPALA-8833: Check failed in BatchedBitReader::UnpackBatch()
33f1e86 is described below

commit 33f1e86ce3dc5e278b804004671062f46d42d90e
Author: Daniel Becker <da...@cloudera.com>
AuthorDate: Tue Aug 6 15:52:37 2019 +0200

    IMPALA-8833: Check failed in BatchedBitReader::UnpackBatch()
    
    After raising the maximum bit width for bit packing to 64 bits,
    DictDecoder accepted bit widths between 32 and 64, but internally it
    uses 32 bit integers and unpacking ran into a DCHECK.
    
    Adding a check to DictDecoder to catch if the bit width is higher than
    32.
    
    Testing:
      Added a test that asserts that DictDecoder accepts bit widths 0-32
      and rejects higher bit widths which could still be unpacked
      otherwise.
    
    Change-Id: I4cba3338a93f8287c24abbe3ad9bfcbfa756bca4
    Reviewed-on: http://gerrit.cloudera.org:8080/14019
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
---
 be/src/util/dict-encoding.h | 10 ++++++----
 be/src/util/dict-test.cc    | 25 +++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/be/src/util/dict-encoding.h b/be/src/util/dict-encoding.h
index c7174f6..f440332 100644
--- a/be/src/util/dict-encoding.h
+++ b/be/src/util/dict-encoding.h
@@ -253,7 +253,8 @@ class DictDecoderBase {
     DCHECK_GE(buffer_len, 0);
     if (UNLIKELY(buffer_len == 0)) return Status("Dictionary cannot be 0 bytes");
     uint8_t bit_width = *buffer;
-    if (UNLIKELY(bit_width < 0 || bit_width > BatchedBitReader::MAX_BITWIDTH)) {
+    if (UNLIKELY(bit_width < 0 || bit_width > sizeof(IndexType) * 8
+          || bit_width > BatchedBitReader::MAX_BITWIDTH)) {
       return Status(strings::Substitute("Dictionary has invalid or unsupported bit "
           "width: $0", bit_width));
     }
@@ -283,7 +284,8 @@ class DictDecoderBase {
   }
 
  protected:
-  RleBatchDecoder<uint32_t> data_decoder_;
+  using IndexType = uint32_t;
+  RleBatchDecoder<IndexType> data_decoder_;
 
   /// Greater than zero if we've started decoding a repeated run.
   int64_t num_repeats_ = 0;
@@ -501,7 +503,7 @@ ALWAYS_INLINE inline bool DictDecoder<T>::GetNextValues(
     if (num_repeats > 0) {
       // Decode repeats directly to the output.
       uint32_t num_repeats_to_consume = std::min<uint32_t>(num_repeats, count);
-      uint32_t idx = data_decoder_.GetRepeatedValue(num_repeats_to_consume);
+      const IndexType idx = data_decoder_.GetRepeatedValue(num_repeats_to_consume);
       if (UNLIKELY(idx >= dict_.size())) return false;
       T repeated_val = dict_[idx];
       out.SetNext(repeated_val, num_repeats_to_consume);
@@ -574,7 +576,7 @@ bool DictDecoder<T>::DecodeNextValue(T* value) {
   int32_t num_repeats = data_decoder_.NextNumRepeats();
   DCHECK_GE(num_repeats, 0);
   if (num_repeats > 0) {
-    uint32_t idx = data_decoder_.GetRepeatedValue(num_repeats);
+    const IndexType idx = data_decoder_.GetRepeatedValue(num_repeats);
     if (UNLIKELY(idx >= dict_.size())) return false;
     memcpy(&decoded_values_[0], &dict_[idx], sizeof(T));
     memcpy(value, &decoded_values_[0], sizeof(T));
diff --git a/be/src/util/dict-test.cc b/be/src/util/dict-test.cc
index bb533d3..7fbdcef 100644
--- a/be/src/util/dict-test.cc
+++ b/be/src/util/dict-test.cc
@@ -270,6 +270,31 @@ TEST(DictTest, SetDataAfterPartialRead) {
   EXPECT_EQ(track_decoder.consumption(), bytes_alloc);
 }
 
+// Make sure the decoder detects if the bit width is too high.
+TEST(DictTest, SetDataInvalidBitwidthFails) {
+  // The maximum bit width that bit packing can handle is higher but DictDecoder uses 32
+  // bit integers to store the indices.
+  const int high_bit_width = 33;
+  uint8_t buffer[5] = {}; // Initialise the buffer.
+
+  MemTracker tracker;
+  DictDecoder<int> decoder(&tracker);
+
+  // Accept valid bit widths.
+  for (int i = 0; i < high_bit_width; i++) {
+    buffer[0] = i;
+    Status status = decoder.SetData(buffer, 5);
+    EXPECT_TRUE(status.ok());
+  }
+
+  // Reject too high bit widths.
+  for (int i = high_bit_width; i < BatchedBitReader::MAX_BITWIDTH; i++) {
+    buffer[0] = i;
+    Status status = decoder.SetData(buffer, 5);
+    EXPECT_FALSE(status.ok());
+  }
+}
+
 // Test handling of decode errors from out-of-range values.
 TEST(DictTest, DecodeErrors) {
   int bytes_alloc = 0;