You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2019/04/17 21:10:17 UTC

[kudu] 04/05: SelectionVector: pad extra bits with zeroes in constructor

This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 423814d2e4eb2281a0ccf5fc4702153a608e542a
Author: Adar Dembo <ad...@cloudera.com>
AuthorDate: Fri Apr 12 09:56:27 2019 -0700

    SelectionVector: pad extra bits with zeroes in constructor
    
    I found this after removing the MergeIterator's call to SetAllTrue(). It
    turns out that if you don't call any functions that set all bytes en masse,
    CountSelected() and AnySelected() will misbehave as they'll read garbage
    data from any trailing bits beyond the logical end of the bitmap.
    
    We can fix this in one of two ways:
    - Modify CountSelected()/AnySelected() to ignore the trailing bits.
    - Zero out the trailing bits in the constructor.
    
    I opted for the second approach as I found it easier to implement, and I
    suspect it's more performant than the first.
    
    Change-Id: I74baf87c7f38b7eea0ad46825c2421c99f8f42a1
    Reviewed-on: http://gerrit.cloudera.org:8080/13009
    Tested-by: Kudu Jenkins
    Reviewed-by: Mike Percy <mp...@apache.org>
---
 src/kudu/common/rowblock-test.cc | 20 ++++++++++++++++++++
 src/kudu/common/rowblock.cc      |  8 ++------
 src/kudu/common/rowblock.h       | 27 +++++++++++++++++++--------
 3 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/src/kudu/common/rowblock-test.cc b/src/kudu/common/rowblock-test.cc
index 0865289..1dd85b8 100644
--- a/src/kudu/common/rowblock-test.cc
+++ b/src/kudu/common/rowblock-test.cc
@@ -17,6 +17,8 @@
 
 #include "kudu/common/rowblock.h"
 
+#include <cstddef>
+
 #include <gtest/gtest.h>
 
 namespace kudu {
@@ -43,4 +45,22 @@ TEST(TestSelectionVector, TestEquals) {
   ASSERT_NE(sv1, sv3);
 }
 
+// Test that SelectionVector functions that operate on bytes rather
+// than bits work correctly even if we haven't set or unset all bytes en masse.
+TEST(TestSelectionVector, TestNonByteAligned) {
+  SelectionVector sv(3);
+
+  for (size_t i = 0; i < sv.nrows(); i++) {
+    sv.SetRowSelected(i);
+  }
+  ASSERT_EQ(sv.nrows(), sv.CountSelected());
+  ASSERT_TRUE(sv.AnySelected());
+
+  for (size_t i = 0; i < sv.nrows(); i++) {
+    sv.SetRowUnselected(i);
+  }
+  ASSERT_EQ(0, sv.CountSelected());
+  ASSERT_FALSE(sv.AnySelected());
+}
+
 } // namespace kudu
diff --git a/src/kudu/common/rowblock.cc b/src/kudu/common/rowblock.cc
index 8c4a160..4996973 100644
--- a/src/kudu/common/rowblock.cc
+++ b/src/kudu/common/rowblock.cc
@@ -30,6 +30,7 @@ SelectionVector::SelectionVector(size_t row_capacity)
     n_bytes_(bytes_capacity_),
     bitmap_(new uint8_t[n_bytes_]) {
   CHECK_GT(n_bytes_, 0);
+  PadExtraBitsWithZeroes();
 }
 
 void SelectionVector::Resize(size_t n_rows) {
@@ -41,12 +42,7 @@ void SelectionVector::Resize(size_t n_rows) {
   CHECK_LE(new_bytes, bytes_capacity_);
   n_rows_ = n_rows;
   n_bytes_ = new_bytes;
-  // Pad with zeroes up to the next byte in order to give CountSelected()
-  // and AnySelected() the assumption that the size is an even byte
-  size_t bits_in_last_byte = n_rows & 7;
-  if (bits_in_last_byte > 0) {
-    BitmapChangeBits(&bitmap_[0], n_rows_, 8 - bits_in_last_byte, 0);
-  }
+  PadExtraBitsWithZeroes();
 }
 
 void SelectionVector::ClearToSelectAtMost(size_t max_rows) {
diff --git a/src/kudu/common/rowblock.h b/src/kudu/common/rowblock.h
index aee55a1..660c272 100644
--- a/src/kudu/common/rowblock.h
+++ b/src/kudu/common/rowblock.h
@@ -111,15 +111,8 @@ class SelectionVector {
 
   // Set all bits in the bitmap to 1
   void SetAllTrue() {
-    // Initially all rows should be selected.
     memset(&bitmap_[0], 0xff, n_bytes_);
-    // the last byte in the bitmap may have a few extra bits - need to
-    // clear those
-
-    int trailer_bits = 8 - (n_rows_ % 8);
-    if (trailer_bits != 8) {
-      bitmap_[n_bytes_ - 1] >>= trailer_bits;
-    }
+    PadExtraBitsWithZeroes();
   }
 
   // Set all bits in the bitmap to 0
@@ -151,6 +144,24 @@ class SelectionVector {
   }
 
  private:
+
+  // Pads any non-byte-aligned bits at the end of the SelectionVector with zeroes.
+  //
+  // To improve performance, CountSelected() and AnySelected() evaluate the
+  // SelectionVector's bitmap in terms of bytes. As such, they consider all of
+  // the trailing bits, even if the bitmap's bit length is not byte-aligned and
+  // some trailing bits aren't part of the bitmap.
+  //
+  // To address this without sacrificing performance, we need to zero out all
+  // trailing bits at construction time, or after any operation that sets all
+  // bytes in bulk.
+  void PadExtraBitsWithZeroes() {
+    size_t bits_in_last_byte = n_rows_ & 7;
+    if (bits_in_last_byte > 0) {
+      BitmapChangeBits(&bitmap_[0], n_rows_, 8 - bits_in_last_byte, false);
+    }
+  }
+
   // The number of allocated bytes in bitmap_
   size_t bytes_capacity_;