You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pa...@apache.org on 2023/06/19 17:22:29 UTC

[arrow-nanoarrow] branch main updated: fix: Fix bad access crash in `ArrowBitmapByteCountSet()` (#242)

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new e811cfe  fix: Fix bad access crash in `ArrowBitmapByteCountSet()` (#242)
e811cfe is described below

commit e811cfe15a3989341702777276e594206bbfa37d
Author: Dewey Dunnington <de...@dunnington.ca>
AuthorDate: Mon Jun 19 14:22:24 2023 -0300

    fix: Fix bad access crash in `ArrowBitmapByteCountSet()` (#242)
    
    Closes #241.
---
 .github/workflows/r-check.yaml |  2 +-
 src/nanoarrow/buffer_inline.h  | 13 +++++++------
 src/nanoarrow/buffer_test.cc   | 33 ++++++++++++++++++++++-----------
 3 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/r-check.yaml b/.github/workflows/r-check.yaml
index 85ba356..1ffc967 100644
--- a/.github/workflows/r-check.yaml
+++ b/.github/workflows/r-check.yaml
@@ -73,7 +73,7 @@ jobs:
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::rcmdcheck, arrow=?ignore-before-r=4.0.0
+          extra-packages: any::rcmdcheck, arrow=?ignore-before-r=4.0.0, github::r-lib/pkgbuild@v1.4.0
           needs: check
           working-directory: r
 
diff --git a/src/nanoarrow/buffer_inline.h b/src/nanoarrow/buffer_inline.h
index 92a3c83..b270b33 100644
--- a/src/nanoarrow/buffer_inline.h
+++ b/src/nanoarrow/buffer_inline.h
@@ -296,36 +296,37 @@ static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t start_offset
 
   const int64_t i_begin = start_offset;
   const int64_t i_end = start_offset + length;
+  const int64_t i_last_valid = i_end - 1;
 
   const int64_t bytes_begin = i_begin / 8;
-  const int64_t bytes_end = i_end / 8 + 1;
+  const int64_t bytes_last_valid = i_last_valid / 8;
 
-  if (bytes_end == bytes_begin + 1) {
+  if (bytes_begin == bytes_last_valid) {
     // count bits within a single byte
     const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_end % 8];
     const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_begin % 8];
 
     const uint8_t only_byte_mask =
-        i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask & last_byte_mask);
+        i_end % 8 == 0 ? last_byte_mask : (uint8_t)(first_byte_mask & last_byte_mask);
 
     const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask;
     return _ArrowkBytePopcount[byte_masked];
   }
 
   const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
-  const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8];
+  const uint8_t last_byte_mask = i_end % 8 == 0 ? 0 : _ArrowkTrailingBitmask[i_end % 8];
   int64_t count = 0;
 
   // first byte
   count += _ArrowkBytePopcount[bits[bytes_begin] & ~first_byte_mask];
 
   // middle bytes
-  for (int64_t i = bytes_begin + 1; i < (bytes_end - 1); i++) {
+  for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) {
     count += _ArrowkBytePopcount[bits[i]];
   }
 
   // last byte
-  count += _ArrowkBytePopcount[bits[bytes_end - 1] & ~last_byte_mask];
+  count += _ArrowkBytePopcount[bits[bytes_last_valid] & ~last_byte_mask];
 
   return count;
 }
diff --git a/src/nanoarrow/buffer_test.cc b/src/nanoarrow/buffer_test.cc
index 80c6c39..9d5032c 100644
--- a/src/nanoarrow/buffer_test.cc
+++ b/src/nanoarrow/buffer_test.cc
@@ -306,18 +306,29 @@ TEST(BitmapTest, BitmapTestCountSet) {
   ArrowBitSet(bitmap, 23);
   ArrowBitSet(bitmap, 74);
 
+  // Check masking of the last byte
   EXPECT_EQ(ArrowBitCountSet(bitmap, 0, 80), 3);
-  EXPECT_EQ(ArrowBitCountSet(bitmap, 18, 57), 3);
-
-  EXPECT_EQ(ArrowBitCountSet(bitmap, 18, 0), 0);
-  EXPECT_EQ(ArrowBitCountSet(bitmap, 18, 1), 1);
-  EXPECT_EQ(ArrowBitCountSet(bitmap, 18, 2), 1);
-  EXPECT_EQ(ArrowBitCountSet(bitmap, 18, 3), 1);
-  EXPECT_EQ(ArrowBitCountSet(bitmap, 18, 4), 1);
-  EXPECT_EQ(ArrowBitCountSet(bitmap, 18, 5), 1);
-  EXPECT_EQ(ArrowBitCountSet(bitmap, 18, 6), 2);
-
-  EXPECT_EQ(ArrowBitCountSet(bitmap, 23, 1), 1);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 0, 79), 3);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 0, 78), 3);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 0, 77), 3);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 0, 76), 3);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 0, 75), 3);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 0, 74), 2);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 0, 73), 2);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 0, 72), 2);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 0, 71), 2);
+
+  // Check masking of the first byte
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 15, 17), 2);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 16, 16), 2);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 17, 15), 2);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 18, 14), 2);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 19, 13), 1);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 20, 12), 1);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 21, 11), 1);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 22, 10), 1);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 23, 9), 1);
+  EXPECT_EQ(ArrowBitCountSet(bitmap, 24, 8), 0);
 }
 
 TEST(BitmapTest, BitmapTestCountSetSingleByte) {