You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pa...@apache.org on 2022/08/19 16:08:43 UTC

[arrow-nanoarrow] branch main updated: Fix memcheck in CI (and memcheck errors in C) (#25)

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 2337a48  Fix memcheck in CI (and memcheck errors in C) (#25)
2337a48 is described below

commit 2337a48356c07389b9999ede3539b8874f36fa4b
Author: Dewey Dunnington <de...@fishandwhistle.net>
AuthorDate: Fri Aug 19 13:08:39 2022 -0300

    Fix memcheck in CI (and memcheck errors in C) (#25)
    
    * fix errors reported on gcc
    
    * fail on memcheck fail + upload results
    
    * try suppressions file in command options
    
    * maybe better output directly in CI
    
    * maybe fix invalid reads when validating arrays
    
    * maybe make sure that large malloc() values intended to cause a failed alloc stay positive
    
    * remove confusing CI output
---
 .github/workflows/build-and-test.yaml |  7 +++++++
 CMakeLists.txt                        |  4 ++--
 src/nanoarrow/array.c                 | 23 +++++++++++++++++++----
 src/nanoarrow/array_test.cc           |  3 ++-
 src/nanoarrow/array_view.c            |  3 +--
 src/nanoarrow/array_view_test.cc      |  6 +++---
 src/nanoarrow/bitmap_inline.h         |  4 ++++
 src/nanoarrow/schema_test.cc          |  6 ++++++
 8 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml
index a0a6023..13e4806 100644
--- a/.github/workflows/build-and-test.yaml
+++ b/.github/workflows/build-and-test.yaml
@@ -116,6 +116,13 @@ jobs:
           sudo ldconfig
           cd build
           ctest -T memcheck . 
+      
+      - name: Upload memcheck results
+        if: failure()
+        uses: actions/upload-artifact@main
+        with:
+          name: nanoarrow-memcheck
+          path: build/Testing/Temporary/MemoryChecker.*.log
 
       - name: Calculate coverage
         run: |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6121b05..12fe19a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,10 +44,10 @@ install(DIRECTORY src/ DESTINATION include FILES_MATCHING PATTERN "*.c")
 if (NANOARROW_BUILD_TESTS)
     # For testing we use GTest + Arrow C++ (both need C++11)
     include(FetchContent)
+    
+    set(MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --suppressions=${CMAKE_CURRENT_LIST_DIR}/valgrind.supp --error-exitcode=1")
     include(CTest)
 
-    set(MEMORYCHECK_SUPPRESSIONS_FILE "${CMAKE_CURRENT_LIST_DIR}/valgrind.supp")
-
     set(CMAKE_CXX_STANDARD 11)
     set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index e2a5c46..7d09130 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -282,6 +282,7 @@ static ArrowErrorCode ArrowArrayViewInitFromArray(struct ArrowArrayView* array_v
 
   ArrowArrayViewInit(array_view, private_data->storage_type);
   array_view->layout = private_data->layout;
+  array_view->array = array;
 
   int result = ArrowArrayViewAllocateChildren(array_view, array->n_children);
   if (result != NANOARROW_OK) {
@@ -364,7 +365,11 @@ static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) {
 
 static ArrowErrorCode ArrowArrayCheckInternalBufferSizes(
     struct ArrowArray* array, struct ArrowArrayView* array_view,
-    struct ArrowError* error) {
+    char set_length, struct ArrowError* error) {
+  if (set_length) {
+    ArrowArrayViewSetLength(array_view, array->offset + array->length);
+  }
+
   for (int64_t i = 0; i < array->n_buffers; i++) {
     if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
         array->null_count == 0 && array->buffers[i] == NULL) {
@@ -385,7 +390,7 @@ static ArrowErrorCode ArrowArrayCheckInternalBufferSizes(
 
   for (int64_t i = 0; i < array->n_children; i++) {
     NANOARROW_RETURN_NOT_OK(ArrowArrayCheckInternalBufferSizes(
-        array->children[i], array_view->children[i], error));
+        array->children[i], array_view->children[i], set_length, error));
   }
 
   return NANOARROW_OK;
@@ -405,13 +410,23 @@ ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
   struct ArrowArrayView array_view;
 
   NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array));
-  int result = ArrowArrayViewSetArray(&array_view, array, error);
+
+  // Check buffer sizes once without using internal buffer data since
+  // ArrowArrayViewSetArray() assumes that all the buffers are long enough
+  // and issues invalid reads on offset buffers if they are not
+  int result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 1, error);
+  if (result != NANOARROW_OK) {
+    ArrowArrayViewReset(&array_view);
+    return result;
+  }
+
+  result = ArrowArrayViewSetArray(&array_view, array, error);
   if (result != NANOARROW_OK) {
     ArrowArrayViewReset(&array_view);
     return result;
   }
 
-  result = ArrowArrayCheckInternalBufferSizes(array, &array_view, error);
+  result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 0, error);
   ArrowArrayViewReset(&array_view);
   return result;
 }
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index e422074..8f3a136 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -56,7 +56,8 @@ TEST(ArrayTest, ArrayTestAllocateChildren) {
   array.release(&array);
 
   ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_STRUCT), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayAllocateChildren(&array, std::numeric_limits<int64_t>::max()),
+  EXPECT_EQ(ArrowArrayAllocateChildren(
+                &array, std::numeric_limits<int64_t>::max() / sizeof(void*)),
             ENOMEM);
   array.release(&array);
 
diff --git a/src/nanoarrow/array_view.c b/src/nanoarrow/array_view.c
index 1914e48..84ccb07 100644
--- a/src/nanoarrow/array_view.c
+++ b/src/nanoarrow/array_view.c
@@ -105,10 +105,9 @@ void ArrowArrayViewReset(struct ArrowArrayView* array_view) {
 void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) {
   for (int i = 0; i < 3; i++) {
     int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
+    array_view->buffer_views[i].data.data = NULL;
 
     switch (array_view->layout.buffer_type[i]) {
-      array_view->buffer_views[i].data.data = NULL;
-
       case NANOARROW_BUFFER_TYPE_VALIDITY:
         array_view->buffer_views[i].n_bytes = _ArrowBytesForBits(length);
         continue;
diff --git a/src/nanoarrow/array_view_test.cc b/src/nanoarrow/array_view_test.cc
index 55b8bf0..f54eed5 100644
--- a/src/nanoarrow/array_view_test.cc
+++ b/src/nanoarrow/array_view_test.cc
@@ -185,9 +185,9 @@ TEST(ArrayTest, ArrayViewTestStruct) {
   EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
 
   // Exepct error for out-of-memory
-  EXPECT_EQ(
-      ArrowArrayViewAllocateChildren(&array_view, std::numeric_limits<int64_t>::max()),
-      ENOMEM);
+  EXPECT_EQ(ArrowArrayViewAllocateChildren(
+                &array_view, std::numeric_limits<int64_t>::max() / sizeof(void*)),
+            ENOMEM);
 
   EXPECT_EQ(ArrowArrayViewAllocateChildren(&array_view, 2), NANOARROW_OK);
   EXPECT_EQ(array_view.n_children, 2);
diff --git a/src/nanoarrow/bitmap_inline.h b/src/nanoarrow/bitmap_inline.h
index 6239ca7..fc80e80 100644
--- a/src/nanoarrow/bitmap_inline.h
+++ b/src/nanoarrow/bitmap_inline.h
@@ -249,6 +249,8 @@ static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
   out_i_cursor += n_full_bytes * 8;
   n_remaining -= n_full_bytes * 8;
   if (n_remaining > 0) {
+    // Zero out the last byte
+    *out_cursor = 0x00;
     for (int i = 0; i < n_remaining; i++) {
       ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
     }
@@ -294,6 +296,8 @@ static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap,
   out_i_cursor += n_full_bytes * 8;
   n_remaining -= n_full_bytes * 8;
   if (n_remaining > 0) {
+    // Zero out the last byte
+    *out_cursor = 0x00;
     for (int i = 0; i < n_remaining; i++) {
       ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
     }
diff --git a/src/nanoarrow/schema_test.cc b/src/nanoarrow/schema_test.cc
index e925638..c1fe17b 100644
--- a/src/nanoarrow/schema_test.cc
+++ b/src/nanoarrow/schema_test.cc
@@ -40,6 +40,12 @@ TEST(SchemaTest, SchemaInit) {
 
   schema.release(&schema);
   EXPECT_EQ(schema.release, nullptr);
+
+  ASSERT_EQ(ArrowSchemaInit(&schema, NANOARROW_TYPE_UNINITIALIZED), NANOARROW_OK);
+  EXPECT_EQ(ArrowSchemaAllocateChildren(
+                &schema, std::numeric_limits<int64_t>::max() / sizeof(void*)),
+            ENOMEM);
+  schema.release(&schema);
 }
 
 static void ExpectSchemaInitOk(enum ArrowType data_type,