You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pa...@apache.org on 2023/06/12 15:55:20 UTC

[arrow-nanoarrow] branch main updated: fix(extensions/nanoarrow_ipc): Don't produce arrays with NULL data buffers (#226)

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 7c6e4f3  fix(extensions/nanoarrow_ipc): Don't produce arrays with NULL data buffers (#226)
7c6e4f3 is described below

commit 7c6e4f3c134a17fc6f70ec61e0582cb199d36471
Author: Dewey Dunnington <de...@dunnington.ca>
AuthorDate: Mon Jun 12 11:55:14 2023 -0400

    fix(extensions/nanoarrow_ipc): Don't produce arrays with NULL data buffers (#226)
    
    This only affects Arrow C++ <= 9.0.0 consumers (i.e., the last Arrow C++
    that builds with default Centos 7 compilers and the version that exists
    on archlinux when you install from the system package manager).
    Technically it adds a redundant check when producing an Array; however,
    that check is cheap and the way to maximize performance would be to
    completely avoid the Array in the first place.
    
    Fixes verification failure on Centos 7 and Archlinux:
    https://github.com/apache/arrow-nanoarrow/actions/runs/5232667457/jobs/9447554040
    ,
    https://github.com/apache/arrow-nanoarrow/actions/runs/5232667457/jobs/9447553922
    , as tested by `export NANOARROW_PLATFORM=centos7 && docker compose run
    --rm verify`.
---
 .../src/nanoarrow/nanoarrow_ipc_decoder.c          | 26 +++++++++++++++-------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
index a38398f..9583565 100644
--- a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
+++ b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
@@ -1542,10 +1542,9 @@ static int ArrowIpcDecoderWalkSetArrayView(struct ArrowIpcArraySetter* setter,
   return NANOARROW_OK;
 }
 
-static ArrowErrorCode ArrowIpcDecoderDecodeArrayInternal(struct ArrowIpcDecoder* decoder,
-                                                         int64_t field_i,
-                                                         struct ArrowArray* out,
-                                                         struct ArrowError* error) {
+static ArrowErrorCode ArrowIpcDecoderDecodeArrayInternal(
+    struct ArrowIpcDecoder* decoder, int64_t field_i, struct ArrowArray* out,
+    enum ArrowValidationLevel validation_level, struct ArrowError* error) {
   struct ArrowIpcDecoderPrivate* private_data =
       (struct ArrowIpcDecoderPrivate*)decoder->private_data;
 
@@ -1569,8 +1568,17 @@ static ArrowErrorCode ArrowIpcDecoderDecodeArrayInternal(struct ArrowIpcDecoder*
         ArrowIpcDecoderWalkGetArray(root->array_view, root->array, out, error));
   }
 
-  // If validation is going to happen it has already occurred
-  ArrowArrayFinishBuilding(out, NANOARROW_VALIDATION_LEVEL_NONE, error);
+  // If validation is going to happen it has already occurred; however, the part of
+  // ArrowArrayFinishBuilding() that allocates a data buffer if the data buffer is
+  // NULL (required for compatability with Arrow <= 9.0.0) assumes CPU data access
+  // and thus needs a validation level >= default.
+  if (validation_level >= NANOARROW_VALIDATION_LEVEL_DEFAULT) {
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayFinishBuilding(out, NANOARROW_VALIDATION_LEVEL_DEFAULT, error));
+  } else {
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayFinishBuilding(out, NANOARROW_VALIDATION_LEVEL_NONE, error));
+  }
 
   return NANOARROW_OK;
 }
@@ -1644,7 +1652,8 @@ ArrowErrorCode ArrowIpcDecoderDecodeArray(struct ArrowIpcDecoder* decoder,
 
   struct ArrowArray temp;
   temp.release = NULL;
-  int result = ArrowIpcDecoderDecodeArrayInternal(decoder, i, &temp, error);
+  int result =
+      ArrowIpcDecoderDecodeArrayInternal(decoder, i, &temp, validation_level, error);
   if (result != NANOARROW_OK && temp.release != NULL) {
     temp.release(&temp);
   } else if (result != NANOARROW_OK) {
@@ -1667,7 +1676,8 @@ ArrowErrorCode ArrowIpcDecoderDecodeArrayFromShared(
 
   struct ArrowArray temp;
   temp.release = NULL;
-  int result = ArrowIpcDecoderDecodeArrayInternal(decoder, i, &temp, error);
+  int result =
+      ArrowIpcDecoderDecodeArrayInternal(decoder, i, &temp, validation_level, error);
   if (result != NANOARROW_OK && temp.release != NULL) {
     temp.release(&temp);
   } else if (result != NANOARROW_OK) {