You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2023/06/25 15:45:07 UTC
[arrow-adbc] branch main updated: chore(c/vendor): Update vendored nanoarrow (#850)
This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 0696bcfd chore(c/vendor): Update vendored nanoarrow (#850)
0696bcfd is described below
commit 0696bcfd1537d4edec24595f150becf7f3fca589
Author: Dewey Dunnington <de...@voltrondata.com>
AuthorDate: Sun Jun 25 12:45:01 2023 -0300
chore(c/vendor): Update vendored nanoarrow (#850)
It seems like an older version may be causing problems with MSVC
warnings (#847,
https://github.com/apache/arrow-adbc/pull/844#discussion_r1240442781 ).
---
c/vendor/nanoarrow/nanoarrow.c | 607 ++++++++++++++++++++++++++++-----------
c/vendor/nanoarrow/nanoarrow.h | 272 ++++++++++++++----
c/vendor/nanoarrow/nanoarrow.hpp | 51 ++++
c/vendor/vendor_nanoarrow.sh | 2 +
4 files changed, 698 insertions(+), 234 deletions(-)
diff --git a/c/vendor/nanoarrow/nanoarrow.c b/c/vendor/nanoarrow/nanoarrow.c
index 0b8fc359..ab3e3371 100644
--- a/c/vendor/nanoarrow/nanoarrow.c
+++ b/c/vendor/nanoarrow/nanoarrow.c
@@ -49,12 +49,21 @@ int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) {
}
}
-const char* ArrowErrorMessage(struct ArrowError* error) { return error->message; }
+const char* ArrowErrorMessage(struct ArrowError* error) {
+ if (error == NULL) {
+ return "";
+ } else {
+ return error->message;
+ }
+}
void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) {
layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY;
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+ layout->buffer_data_type[0] = NANOARROW_TYPE_BOOL;
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->buffer_data_type[1] = storage_type;
layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_NONE;
+ layout->buffer_data_type[2] = NANOARROW_TYPE_UNINITIALIZED;
layout->element_size_bits[0] = 1;
layout->element_size_bits[1] = 0;
@@ -66,43 +75,53 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) {
case NANOARROW_TYPE_UNINITIALIZED:
case NANOARROW_TYPE_NA:
layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE;
+ layout->buffer_data_type[0] = NANOARROW_TYPE_UNINITIALIZED;
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
layout->element_size_bits[0] = 0;
break;
case NANOARROW_TYPE_LIST:
case NANOARROW_TYPE_MAP:
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
layout->element_size_bits[1] = 32;
break;
case NANOARROW_TYPE_LARGE_LIST:
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
layout->element_size_bits[1] = 64;
break;
+ case NANOARROW_TYPE_STRUCT:
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
+ break;
+
case NANOARROW_TYPE_BOOL:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
layout->element_size_bits[1] = 1;
break;
case NANOARROW_TYPE_UINT8:
case NANOARROW_TYPE_INT8:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
layout->element_size_bits[1] = 8;
break;
case NANOARROW_TYPE_UINT16:
case NANOARROW_TYPE_INT16:
case NANOARROW_TYPE_HALF_FLOAT:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
layout->element_size_bits[1] = 16;
break;
case NANOARROW_TYPE_UINT32:
case NANOARROW_TYPE_INT32:
case NANOARROW_TYPE_FLOAT:
+ layout->element_size_bits[1] = 32;
+ break;
case NANOARROW_TYPE_INTERVAL_MONTHS:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
layout->element_size_bits[1] = 32;
break;
@@ -110,49 +129,61 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) {
case NANOARROW_TYPE_INT64:
case NANOARROW_TYPE_DOUBLE:
case NANOARROW_TYPE_INTERVAL_DAY_TIME:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
layout->element_size_bits[1] = 64;
break;
case NANOARROW_TYPE_DECIMAL128:
case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
layout->element_size_bits[1] = 128;
break;
case NANOARROW_TYPE_DECIMAL256:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
layout->element_size_bits[1] = 256;
break;
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY;
break;
case NANOARROW_TYPE_DENSE_UNION:
layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
+ layout->buffer_data_type[0] = NANOARROW_TYPE_INT8;
layout->element_size_bits[0] = 8;
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_UNION_OFFSET;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
layout->element_size_bits[1] = 32;
break;
case NANOARROW_TYPE_SPARSE_UNION:
layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
+ layout->buffer_data_type[0] = NANOARROW_TYPE_INT8;
layout->element_size_bits[0] = 8;
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
break;
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_BINARY:
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
layout->element_size_bits[1] = 32;
layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->buffer_data_type[2] = storage_type;
break;
case NANOARROW_TYPE_LARGE_STRING:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
+ layout->element_size_bits[1] = 64;
+ layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->buffer_data_type[2] = NANOARROW_TYPE_STRING;
+ break;
case NANOARROW_TYPE_LARGE_BINARY:
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
layout->element_size_bits[1] = 64;
layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY;
break;
default:
@@ -1892,24 +1923,43 @@ ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array,
return NANOARROW_OK;
}
-static ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
- struct ArrowArrayView* array_view,
- struct ArrowError* error) {
- ArrowArrayInitFromType(array, array_view->storage_type);
+ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
+ struct ArrowArrayView* array_view,
+ struct ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowArrayInitFromType(array, array_view->storage_type), error);
+ int result;
+
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
+ private_data->layout = array_view->layout;
- int result = ArrowArrayAllocateChildren(array, array_view->n_children);
- if (result != NANOARROW_OK) {
- array->release(array);
- return result;
+ if (array_view->n_children > 0) {
+ result = ArrowArrayAllocateChildren(array, array_view->n_children);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
+
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ result =
+ ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
+ }
}
- private_data->layout = array_view->layout;
+ if (array_view->dictionary != NULL) {
+ result = ArrowArrayAllocateDictionary(array);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
- for (int64_t i = 0; i < array_view->n_children; i++) {
- int result =
- ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error);
+ result =
+ ArrowArrayInitFromArrayView(array->dictionary, array_view->dictionary, error);
if (result != NANOARROW_OK) {
array->release(array);
return result;
@@ -1955,9 +2005,7 @@ ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_ch
return ENOMEM;
}
- for (int64_t i = 0; i < n_children; i++) {
- array->children[i] = NULL;
- }
+ memset(array->children, 0, n_children * sizeof(struct ArrowArray*));
for (int64_t i = 0; i < n_children; i++) {
array->children[i] = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray));
@@ -2025,6 +2073,16 @@ static ArrowErrorCode ArrowArrayViewInitFromArray(struct ArrowArrayView* array_v
ArrowArrayViewInitFromType(array_view, private_data->storage_type);
array_view->layout = private_data->layout;
array_view->array = array;
+ array_view->length = array->length;
+ array_view->offset = array->offset;
+ array_view->null_count = array->null_count;
+
+ array_view->buffer_views[0].data.as_uint8 = private_data->bitmap.buffer.data;
+ array_view->buffer_views[0].size_bytes = private_data->bitmap.buffer.size_bytes;
+ array_view->buffer_views[1].data.as_uint8 = private_data->buffers[0].data;
+ array_view->buffer_views[1].size_bytes = private_data->buffers[0].size_bytes;
+ array_view->buffer_views[2].data.as_uint8 = private_data->buffers[1].data;
+ array_view->buffer_views[2].size_bytes = private_data->buffers[1].size_bytes;
int result = ArrowArrayViewAllocateChildren(array_view, array->n_children);
if (result != NANOARROW_OK) {
@@ -2040,6 +2098,20 @@ static ArrowErrorCode ArrowArrayViewInitFromArray(struct ArrowArrayView* array_v
}
}
+ if (array->dictionary != NULL) {
+ result = ArrowArrayViewAllocateDictionary(array_view);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+
+ result = ArrowArrayViewInitFromArray(array_view->dictionary, array->dictionary);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+ }
+
return NANOARROW_OK;
}
@@ -2112,6 +2184,10 @@ static ArrowErrorCode ArrowArrayFinalizeBuffers(struct ArrowArray* array) {
NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->children[i]));
}
+ if (array->dictionary != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->dictionary));
+ }
+
return NANOARROW_OK;
}
@@ -2126,39 +2202,10 @@ static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) {
for (int64_t i = 0; i < array->n_children; i++) {
ArrowArrayFlushInternalPointers(array->children[i]);
}
-}
-
-static ArrowErrorCode ArrowArrayCheckInternalBufferSizes(
- struct ArrowArray* array, struct ArrowArrayView* array_view, char set_length,
- struct ArrowError* error) {
- if (set_length) {
- ArrowArrayViewSetLength(array_view, array->offset + array->length);
- }
-
- for (int64_t i = 0; i < array->n_buffers; i++) {
- if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
- array->null_count == 0 && array->buffers[i] == NULL) {
- continue;
- }
-
- int64_t expected_size = array_view->buffer_views[i].size_bytes;
- int64_t actual_size = ArrowArrayBuffer(array, i)->size_bytes;
-
- if (actual_size < expected_size) {
- ArrowErrorSet(
- error,
- "Expected buffer %d to size >= %ld bytes but found buffer with %ld bytes",
- (int)i, (long)expected_size, (long)actual_size);
- return EINVAL;
- }
- }
- for (int64_t i = 0; i < array->n_children; i++) {
- NANOARROW_RETURN_NOT_OK(ArrowArrayCheckInternalBufferSizes(
- array->children[i], array_view->children[i], set_length, error));
+ if (array->dictionary != NULL) {
+ ArrowArrayFlushInternalPointers(array->dictionary);
}
-
- return NANOARROW_OK;
}
ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
@@ -2168,7 +2215,7 @@ ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
// in some implementations (at least one version of Arrow C++ at the time this
// was added). Only do this fix if we can assume CPU data access.
if (validation_level >= NANOARROW_VALIDATION_LEVEL_DEFAULT) {
- NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array));
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayFinalizeBuffers(array), error);
}
// Make sure the value we get with array->buffers[i] is set to the actual
@@ -2179,44 +2226,11 @@ ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
return NANOARROW_OK;
}
- // Check buffer sizes to make sure we are not sending an ArrowArray
- // into the wild that is going to segfault
+ // For validation, initialize an ArrowArrayView with our known buffer sizes
struct ArrowArrayView array_view;
-
- NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array));
-
- // Check buffer sizes once without using internal buffer data since
- // ArrowArrayViewSetArray() assumes that all the buffers are long enough
- // and issues invalid reads on offset buffers if they are not
- int result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 1, error);
- if (result != NANOARROW_OK) {
- ArrowArrayViewReset(&array_view);
- return result;
- }
-
- if (validation_level == NANOARROW_VALIDATION_LEVEL_MINIMAL) {
- ArrowArrayViewReset(&array_view);
- return NANOARROW_OK;
- }
-
- result = ArrowArrayViewSetArray(&array_view, array, error);
- if (result != NANOARROW_OK) {
- ArrowArrayViewReset(&array_view);
- return result;
- }
-
- result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 0, error);
- if (result != NANOARROW_OK) {
- ArrowArrayViewReset(&array_view);
- return result;
- }
-
- if (validation_level == NANOARROW_VALIDATION_LEVEL_DEFAULT) {
- ArrowArrayViewReset(&array_view);
- return NANOARROW_OK;
- }
-
- result = ArrowArrayViewValidateFull(&array_view, error);
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayViewInitFromArray(&array_view, array),
+ error);
+ int result = ArrowArrayViewValidate(&array_view, validation_level, error);
ArrowArrayViewReset(&array_view);
return result;
}
@@ -2263,6 +2277,21 @@ ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
return NANOARROW_OK;
}
+ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view) {
+ if (array_view->dictionary != NULL) {
+ return EINVAL;
+ }
+
+ array_view->dictionary =
+ (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView));
+ if (array_view->dictionary == NULL) {
+ return ENOMEM;
+ }
+
+ ArrowArrayViewInitFromType(array_view->dictionary, NANOARROW_TYPE_UNINITIALIZED);
+ return NANOARROW_OK;
+}
+
ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
struct ArrowSchema* schema,
struct ArrowError* error) {
@@ -2277,6 +2306,7 @@ ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
result = ArrowArrayViewAllocateChildren(array_view, schema->n_children);
if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "ArrowArrayViewAllocateChildren() failed");
ArrowArrayViewReset(array_view);
return result;
}
@@ -2290,6 +2320,21 @@ ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
}
}
+ if (schema->dictionary != NULL) {
+ result = ArrowArrayViewAllocateDictionary(array_view);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+
+ result =
+ ArrowArrayViewInitFromSchema(array_view->dictionary, schema->dictionary, error);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+ }
+
if (array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION ||
array_view->storage_type == NANOARROW_TYPE_DENSE_UNION) {
array_view->union_type_id_map = (int8_t*)ArrowMalloc(256 * sizeof(int8_t));
@@ -2321,6 +2366,11 @@ void ArrowArrayViewReset(struct ArrowArrayView* array_view) {
ArrowFree(array_view->children);
}
+ if (array_view->dictionary != NULL) {
+ ArrowArrayViewReset(array_view->dictionary);
+ ArrowFree(array_view->dictionary);
+ }
+
if (array_view->union_type_id_map != NULL) {
ArrowFree(array_view->union_type_id_map);
}
@@ -2331,7 +2381,6 @@ void ArrowArrayViewReset(struct ArrowArrayView* array_view) {
void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) {
for (int i = 0; i < 3; i++) {
int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
- array_view->buffer_views[i].data.data = NULL;
switch (array_view->layout.buffer_type[i]) {
case NANOARROW_BUFFER_TYPE_VALIDITY:
@@ -2375,11 +2424,11 @@ void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length)
}
}
-ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
- struct ArrowArray* array,
- struct ArrowError* error) {
- array_view->array = array;
-
+// This version recursively extracts information from the array and stores it
+// in the array view, performing any checks that require the original array.
+static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
+ struct ArrowArray* array,
+ struct ArrowError* error) {
// Check length and offset
if (array->offset < 0) {
ArrowErrorSet(error, "Expected array offset >= 0 but found array offset of %ld",
@@ -2393,8 +2442,10 @@ ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
return EINVAL;
}
- // First pass setting lengths that do not depend on the data buffer
- ArrowArrayViewSetLength(array_view, array->offset + array->length);
+ array_view->array = array;
+ array_view->offset = array->offset;
+ array_view->length = array->length;
+ array_view->null_count = array->null_count;
int64_t buffers_required = 0;
for (int i = 0; i < 3; i++) {
@@ -2404,28 +2455,187 @@ ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
buffers_required++;
- // If the null_count is 0, the validity buffer can be NULL
- if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
- array->null_count == 0 && array->buffers[i] == NULL) {
+ // Set buffer pointer
+ array_view->buffer_views[i].data.data = array->buffers[i];
+
+ // If non-null, set buffer size to unknown.
+ if (array->buffers[i] == NULL) {
array_view->buffer_views[i].size_bytes = 0;
+ } else {
+ array_view->buffer_views[i].size_bytes = -1;
}
-
- array_view->buffer_views[i].data.data = array->buffers[i];
}
+ // Check the number of buffers
if (buffers_required != array->n_buffers) {
ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d buffer(s)",
(int)buffers_required, (int)array->n_buffers);
return EINVAL;
}
+ // Check number of children
if (array_view->n_children != array->n_children) {
ArrowErrorSet(error, "Expected %ld children but found %ld children",
(long)array_view->n_children, (long)array->n_children);
return EINVAL;
}
- // Check child sizes and calculate sizes that depend on data in the array buffers
+ // Recurse for children
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view->children[i],
+ array->children[i], error));
+ }
+
+ // Check dictionary
+ if (array->dictionary == NULL && array_view->dictionary != NULL) {
+ ArrowErrorSet(error, "Expected dictionary but found NULL");
+ return EINVAL;
+ }
+
+ if (array->dictionary != NULL && array_view->dictionary == NULL) {
+ ArrowErrorSet(error, "Expected NULL dictionary but found dictionary member");
+ return EINVAL;
+ }
+
+ if (array->dictionary != NULL) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayViewSetArrayInternal(array_view->dictionary, array->dictionary, error));
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
+ struct ArrowError* error) {
+ // Calculate buffer sizes that do not require buffer access. If marked as
+ // unknown, assign the buffer size; otherwise, validate it.
+ int64_t offset_plus_length = array_view->offset + array_view->length;
+
+ // Only loop over the first two buffers because the size of the third buffer
+ // is always data dependent for all current Arrow types.
+ for (int i = 0; i < 2; i++) {
+ int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
+ // Initialize with a value that will cause an error if accidentally used uninitialized
+ int64_t min_buffer_size_bytes = array_view->buffer_views[i].size_bytes + 1;
+
+ switch (array_view->layout.buffer_type[i]) {
+ case NANOARROW_BUFFER_TYPE_VALIDITY:
+ if (array_view->null_count == 0 && array_view->buffer_views[i].size_bytes == 0) {
+ continue;
+ }
+
+ min_buffer_size_bytes = _ArrowBytesForBits(offset_plus_length);
+ break;
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+ // Probably don't want/need to rely on the producer to have allocated an
+ // offsets buffer of length 1 for a zero-size array
+ min_buffer_size_bytes =
+ (offset_plus_length != 0) * element_size_bytes * (offset_plus_length + 1);
+ break;
+ case NANOARROW_BUFFER_TYPE_DATA:
+ min_buffer_size_bytes =
+ _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] *
+ offset_plus_length) /
+ 8;
+ break;
+ case NANOARROW_BUFFER_TYPE_TYPE_ID:
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+ min_buffer_size_bytes = element_size_bytes * offset_plus_length;
+ break;
+ case NANOARROW_BUFFER_TYPE_NONE:
+ continue;
+ }
+
+ // Assign or validate buffer size
+ if (array_view->buffer_views[i].size_bytes == -1) {
+ array_view->buffer_views[i].size_bytes = min_buffer_size_bytes;
+ } else if (array_view->buffer_views[i].size_bytes < min_buffer_size_bytes) {
+ ArrowErrorSet(error,
+ "Expected %s array buffer %d to have size >= %ld bytes but found "
+ "buffer with %ld bytes",
+ ArrowTypeString(array_view->storage_type), (int)i,
+ (long)min_buffer_size_bytes,
+ (long)array_view->buffer_views[i].size_bytes);
+ return EINVAL;
+ }
+ }
+
+ // For list, fixed-size list and map views, we can validate the number of children
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_LARGE_LIST:
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ case NANOARROW_TYPE_MAP:
+ if (array_view->n_children != 1) {
+ ArrowErrorSet(error, "Expected 1 child of %s array but found %ld child arrays",
+ ArrowTypeString(array_view->storage_type),
+ (long)array_view->n_children);
+ return EINVAL;
+ }
+ default:
+ break;
+ }
+
+ // For struct, the sparse union, and the fixed-size list views, we can validate child
+ // lengths.
+ int64_t child_min_length;
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_SPARSE_UNION:
+ case NANOARROW_TYPE_STRUCT:
+ child_min_length = (array_view->offset + array_view->length);
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ if (array_view->children[i]->length < child_min_length) {
+ ArrowErrorSet(
+ error,
+ "Expected struct child %d to have length >= %ld but found child with "
+ "length %ld",
+ (int)(i + 1), (long)(child_min_length),
+ (long)array_view->children[i]->length);
+ return EINVAL;
+ }
+ }
+ break;
+
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ child_min_length = (array_view->offset + array_view->length) *
+ array_view->layout.child_size_elements;
+ if (array_view->children[0]->length < child_min_length) {
+ ArrowErrorSet(error,
+ "Expected child of fixed_size_list array to have length >= %ld but "
+ "found array with length %ld",
+ (long)child_min_length, (long)array_view->children[0]->length);
+ return EINVAL;
+ }
+ break;
+ default:
+ break;
+ }
+
+ // Recurse for children
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayViewValidateMinimal(array_view->children[i], error));
+ }
+
+ // Recurse for dictionary
+ if (array_view->dictionary != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view->dictionary, error));
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view,
+ struct ArrowError* error) {
+ // Perform minimal validation. This will validate or assign
+ // buffer sizes as long as buffer access is not required.
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view, error));
+
+ // Calculate buffer sizes or child lengths that require accessing the offsets
+ // buffer. Where appropriate, validate that the first offset is >= 0.
+ // If a buffer size is marked as unknown, assign it; otherwise, validate it.
+ int64_t offset_plus_length = array_view->offset + array_view->length;
+
int64_t first_offset;
int64_t last_offset;
switch (array_view->storage_type) {
@@ -2439,11 +2649,22 @@ ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
return EINVAL;
}
- last_offset =
- array_view->buffer_views[1].data.as_int32[array->offset + array->length];
- array_view->buffer_views[2].size_bytes = last_offset;
+ last_offset = array_view->buffer_views[1].data.as_int32[offset_plus_length];
+
+ // If the data buffer size is unknown, assign it; otherwise, check it
+ if (array_view->buffer_views[2].size_bytes == -1) {
+ array_view->buffer_views[2].size_bytes = last_offset;
+ } else if (array_view->buffer_views[2].size_bytes < last_offset) {
+ ArrowErrorSet(error,
+ "Expected %s array buffer 2 to have size >= %ld bytes but found "
+ "buffer with %ld bytes",
+ ArrowTypeString(array_view->storage_type), (long)last_offset,
+ (long)array_view->buffer_views[2].size_bytes);
+ return EINVAL;
+ }
}
break;
+
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_LARGE_BINARY:
if (array_view->buffer_views[1].size_bytes != 0) {
@@ -2454,34 +2675,38 @@ ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
return EINVAL;
}
- last_offset =
- array_view->buffer_views[1].data.as_int64[array->offset + array->length];
- array_view->buffer_views[2].size_bytes = last_offset;
+ last_offset = array_view->buffer_views[1].data.as_int64[offset_plus_length];
+
+ // If the data buffer size is unknown, assign it; otherwise, check it
+ if (array_view->buffer_views[2].size_bytes == -1) {
+ array_view->buffer_views[2].size_bytes = last_offset;
+ } else if (array_view->buffer_views[2].size_bytes < last_offset) {
+ ArrowErrorSet(error,
+ "Expected %s array buffer 2 to have size >= %ld bytes but found "
+ "buffer with %ld bytes",
+ ArrowTypeString(array_view->storage_type), (long)last_offset,
+ (long)array_view->buffer_views[2].size_bytes);
+ return EINVAL;
+ }
}
break;
+
case NANOARROW_TYPE_STRUCT:
for (int64_t i = 0; i < array_view->n_children; i++) {
- if (array->children[i]->length < (array->offset + array->length)) {
+ if (array_view->children[i]->length < offset_plus_length) {
ArrowErrorSet(
error,
"Expected struct child %d to have length >= %ld but found child with "
"length %ld",
- (int)(i + 1), (long)(array->offset + array->length),
- (long)array->children[i]->length);
+ (int)(i + 1), (long)offset_plus_length,
+ (long)array_view->children[i]->length);
return EINVAL;
}
}
break;
- case NANOARROW_TYPE_LIST:
- case NANOARROW_TYPE_MAP: {
- const char* type_name =
- array_view->storage_type == NANOARROW_TYPE_LIST ? "list" : "map";
- if (array->n_children != 1) {
- ArrowErrorSet(error, "Expected 1 child of %s array but found %d child arrays",
- type_name, (int)array->n_children);
- return EINVAL;
- }
+ case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_MAP:
if (array_view->buffer_views[1].size_bytes != 0) {
first_offset = array_view->buffer_views[1].data.as_int32[0];
if (first_offset < 0) {
@@ -2490,27 +2715,20 @@ ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
return EINVAL;
}
- last_offset =
- array_view->buffer_views[1].data.as_int32[array->offset + array->length];
- if (array->children[0]->length < last_offset) {
+ last_offset = array_view->buffer_views[1].data.as_int32[offset_plus_length];
+ if (array_view->children[0]->length < last_offset) {
ArrowErrorSet(
error,
- "Expected child of %s array with length >= %ld but found array with "
+ "Expected child of %s array to have length >= %ld but found array with "
"length %ld",
- type_name, (long)last_offset, (long)array->children[0]->length);
+ ArrowTypeString(array_view->storage_type), (long)last_offset,
+ (long)array_view->children[0]->length);
return EINVAL;
}
}
break;
- }
- case NANOARROW_TYPE_LARGE_LIST:
- if (array->n_children != 1) {
- ArrowErrorSet(error,
- "Expected 1 child of large list array but found %d child arrays",
- (int)array->n_children);
- return EINVAL;
- }
+ case NANOARROW_TYPE_LARGE_LIST:
if (array_view->buffer_views[1].size_bytes != 0) {
first_offset = array_view->buffer_views[1].data.as_int64[0];
if (first_offset < 0) {
@@ -2519,49 +2737,61 @@ ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
return EINVAL;
}
- last_offset =
- array_view->buffer_views[1].data.as_int64[array->offset + array->length];
- if (array->children[0]->length < last_offset) {
+ last_offset = array_view->buffer_views[1].data.as_int64[offset_plus_length];
+ if (array_view->children[0]->length < last_offset) {
ArrowErrorSet(
error,
- "Expected child of large list array with length >= %ld but found array "
+ "Expected child of large list array to have length >= %ld but found array "
"with length %ld",
- (long)last_offset, (long)array->children[0]->length);
+ (long)last_offset, (long)array_view->children[0]->length);
return EINVAL;
}
}
break;
- case NANOARROW_TYPE_FIXED_SIZE_LIST:
- if (array->n_children != 1) {
- ArrowErrorSet(error,
- "Expected 1 child of fixed-size array but found %d child arrays",
- (int)array->n_children);
- return EINVAL;
- }
-
- last_offset =
- (array->offset + array->length) * array_view->layout.child_size_elements;
- if (array->children[0]->length < last_offset) {
- ArrowErrorSet(
- error,
- "Expected child of fixed-size list array with length >= %ld but found array "
- "with length %ld",
- (long)last_offset, (long)array->children[0]->length);
- return EINVAL;
- }
- break;
default:
break;
}
+ // Recurse for children
for (int64_t i = 0; i < array_view->n_children; i++) {
NANOARROW_RETURN_NOT_OK(
- ArrowArrayViewSetArray(array_view->children[i], array->children[i], error));
+ ArrowArrayViewValidateDefault(array_view->children[i], error));
+ }
+
+ // Recurse for dictionary
+ if (array_view->dictionary != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view->dictionary, error));
}
return NANOARROW_OK;
}
+ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
+ struct ArrowArray* array,
+ struct ArrowError* error) {
+ // Extract information from the array into the array view
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array, error));
+
+ // Run default validation. Because we've marked all non-NULL buffers as having unknown
+ // size, validation will also update the buffer sizes as it goes.
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view, error));
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view,
+ struct ArrowArray* array,
+ struct ArrowError* error) {
+ // Extract information from the array into the array view
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array, error));
+
+ // Run default validation. Because we've marked all non-NULL buffers as having unknown
+ // size, validation will also update the buffer sizes as it goes.
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view, error));
+
+ return NANOARROW_OK;
+}
+
static int ArrowAssertIncreasingInt32(struct ArrowBufferView view,
struct ArrowError* error) {
if (view.size_bytes <= (int64_t)sizeof(int32_t)) {
@@ -2633,8 +2863,8 @@ static int ArrowAssertInt8In(struct ArrowBufferView view, const int8_t* values,
return NANOARROW_OK;
}
-ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
- struct ArrowError* error) {
+static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
+ struct ArrowError* error) {
for (int i = 0; i < 3; i++) {
switch (array_view->layout.buffer_type[i]) {
case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
@@ -2653,17 +2883,18 @@ ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION ||
array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION) {
- // Check that we have valid type ids.
if (array_view->union_type_id_map == NULL) {
- // If the union_type_id map is NULL
- // (e.g., when using ArrowArrayInitFromType() + ArrowArrayAllocateChildren()
- // + ArrowArrayFinishBuilding()), we don't have enough information to validate
- // this buffer (GH-178).
+ // If the union_type_id map is NULL (e.g., when using ArrowArrayInitFromType() +
+ // ArrowArrayAllocateChildren() + ArrowArrayFinishBuilding()), we don't have enough
+ // information to validate this buffer.
+ ArrowErrorSet(error,
+ "Insufficient information provided for validation of union array");
+ return EINVAL;
} else if (_ArrowParsedUnionTypeIdsWillEqualChildIndices(
array_view->union_type_id_map, array_view->n_children,
array_view->n_children)) {
- NANOARROW_RETURN_NOT_OK(ArrowAssertRangeInt8(array_view->buffer_views[0], 0,
- array_view->n_children - 1, error));
+ NANOARROW_RETURN_NOT_OK(ArrowAssertRangeInt8(
+ array_view->buffer_views[0], 0, (int8_t)(array_view->n_children - 1), error));
} else {
NANOARROW_RETURN_NOT_OK(ArrowAssertInt8In(array_view->buffer_views[0],
array_view->union_type_id_map + 128,
@@ -2674,10 +2905,10 @@ ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION &&
array_view->union_type_id_map != NULL) {
// Check that offsets refer to child elements that actually exist
- for (int64_t i = 0; i < array_view->array->length; i++) {
+ for (int64_t i = 0; i < array_view->length; i++) {
int8_t child_id = ArrowArrayViewUnionChildIndex(array_view, i);
int64_t offset = ArrowArrayViewUnionChildOffset(array_view, i);
- int64_t child_length = array_view->array->children[child_id]->length;
+ int64_t child_length = array_view->children[child_id]->length;
if (offset < 0 || offset > child_length) {
ArrowErrorSet(
error,
@@ -2689,12 +2920,38 @@ ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
}
}
+ // Recurse for children
for (int64_t i = 0; i < array_view->n_children; i++) {
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i], error));
}
+ // Dictionary valiation not implemented
+ if (array_view->dictionary != NULL) {
+ ArrowErrorSet(error, "Validation for dictionary-encoded arrays is not implemented");
+ return ENOTSUP;
+ }
+
return NANOARROW_OK;
}
+
+ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view,
+ enum ArrowValidationLevel validation_level,
+ struct ArrowError* error) {
+ switch (validation_level) {
+ case NANOARROW_VALIDATION_LEVEL_NONE:
+ return NANOARROW_OK;
+ case NANOARROW_VALIDATION_LEVEL_MINIMAL:
+ return ArrowArrayViewValidateMinimal(array_view, error);
+ case NANOARROW_VALIDATION_LEVEL_DEFAULT:
+ return ArrowArrayViewValidateDefault(array_view, error);
+ case NANOARROW_VALIDATION_LEVEL_FULL:
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view, error));
+ return ArrowArrayViewValidateFull(array_view, error);
+ }
+
+ ArrowErrorSet(error, "validation_level not recognized");
+ return EINVAL;
+}
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
diff --git a/c/vendor/nanoarrow/nanoarrow.h b/c/vendor/nanoarrow/nanoarrow.h
index 759c969b..85353df4 100644
--- a/c/vendor/nanoarrow/nanoarrow.h
+++ b/c/vendor/nanoarrow/nanoarrow.h
@@ -19,9 +19,9 @@
#define NANOARROW_BUILD_ID_H_INCLUDED
#define NANOARROW_VERSION_MAJOR 0
-#define NANOARROW_VERSION_MINOR 2
+#define NANOARROW_VERSION_MINOR 3
#define NANOARROW_VERSION_PATCH 0
-#define NANOARROW_VERSION "0.2.0-SNAPSHOT"
+#define NANOARROW_VERSION "0.3.0-SNAPSHOT"
#define NANOARROW_VERSION_INT \
(NANOARROW_VERSION_MAJOR * 10000 + NANOARROW_VERSION_MINOR * 100 + \
@@ -55,6 +55,11 @@
+#if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE)
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -194,6 +199,27 @@ static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src,
#define _NANOARROW_CHECK_UPPER_LIMIT(x_, max_) \
NANOARROW_RETURN_NOT_OK((x_ <= max_) ? NANOARROW_OK : EINVAL)
+#if defined(NANOARROW_DEBUG)
+#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \
+ do { \
+ const int NAME = (EXPR); \
+ if (NAME) { \
+ ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d\n* %s:%d", EXPR_STR, \
+ NAME, __FILE__, __LINE__); \
+ return NAME; \
+ } \
+ } while (0)
+#else
+#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR, EXPR_STR) \
+ do { \
+ const int NAME = (EXPR); \
+ if (NAME) { \
+ ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d", EXPR_STR, NAME); \
+ return NAME; \
+ } \
+ } while (0)
+#endif
+
/// \brief Return code for success.
/// \ingroup nanoarrow-errors
#define NANOARROW_OK 0
@@ -207,6 +233,47 @@ typedef int ArrowErrorCode;
#define NANOARROW_RETURN_NOT_OK(EXPR) \
_NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR)
+/// \brief Check the result of an expression and return it if not NANOARROW_OK,
+/// adding an auto-generated message to an ArrowError.
+/// \ingroup nanoarrow-errors
+///
+/// This macro is used to ensure that functions that accept an ArrowError
+/// as input always set its message when returning an error code (e.g., when calling
+/// a nanoarrow function that does *not* accept ArrowError).
+#define NANOARROW_RETURN_NOT_OK_WITH_ERROR(EXPR, ERROR_EXPR) \
+ _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL( \
+ _NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, ERROR_EXPR, #EXPR)
+
+#if defined(NANOARROW_DEBUG) && !defined(NANOARROW_PRINT_AND_DIE)
+#define NANOARROW_PRINT_AND_DIE(VALUE, EXPR_STR) \
+ do { \
+ fprintf(stderr, "%s failed with errno %d\n* %s:%d\n", EXPR_STR, (int)(VALUE), \
+ __FILE__, (int)__LINE__); \
+ abort(); \
+ } while (0)
+#endif
+
+#if defined(NANOARROW_DEBUG)
+#define _NANOARROW_ASSERT_OK_IMPL(NAME, EXPR, EXPR_STR) \
+ do { \
+ const int NAME = (EXPR); \
+ if (NAME) NANOARROW_PRINT_AND_DIE(NAME, EXPR_STR); \
+ } while (0)
+
+/// \brief Assert that an expression's value is NANOARROW_OK
+/// \ingroup nanoarrow-errors
+///
+/// If nanoarrow was built in debug mode (i.e., defined(NANOARROW_DEBUG) is true),
+/// print a message to stderr and abort. If nanoarrow was bulit in release mode,
+/// this statement has no effect. You can customize fatal error behaviour
+/// be defining the NANOARROW_PRINT_AND_DIE macro before including nanoarrow.h
+/// This macro is provided as a convenience for users and is not used internally.
+#define NANOARROW_ASSERT_OK(EXPR) \
+ _NANOARROW_ASSERT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, #EXPR)
+#else
+#define NANOARROW_ASSERT_OK(EXPR) EXPR
+#endif
+
static char _ArrowIsLittleEndian(void) {
uint32_t check = 1;
char first_byte;
@@ -266,6 +333,8 @@ enum ArrowType {
/// \ingroup nanoarrow-utils
///
/// Returns NULL for invalid values for type
+static inline const char* ArrowTypeString(enum ArrowType type);
+
static inline const char* ArrowTypeString(enum ArrowType type) {
switch (type) {
case NANOARROW_TYPE_NA:
@@ -384,6 +453,8 @@ enum ArrowValidationLevel {
/// \ingroup nanoarrow-utils
///
/// Returns NULL for invalid values for time_unit
+static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit);
+
static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) {
switch (time_unit) {
case NANOARROW_TIME_UNIT_SECOND:
@@ -426,6 +497,8 @@ struct ArrowStringView {
/// \brief Return a view of a const C string
/// \ingroup nanoarrow-utils
+static inline struct ArrowStringView ArrowCharView(const char* value);
+
static inline struct ArrowStringView ArrowCharView(const char* value) {
struct ArrowStringView out;
@@ -439,26 +512,28 @@ static inline struct ArrowStringView ArrowCharView(const char* value) {
return out;
}
+union ArrowBufferViewData {
+ const void* data;
+ const int8_t* as_int8;
+ const uint8_t* as_uint8;
+ const int16_t* as_int16;
+ const uint16_t* as_uint16;
+ const int32_t* as_int32;
+ const uint32_t* as_uint32;
+ const int64_t* as_int64;
+ const uint64_t* as_uint64;
+ const double* as_double;
+ const float* as_float;
+ const char* as_char;
+};
+
/// \brief An non-owning view of a buffer
/// \ingroup nanoarrow-utils
struct ArrowBufferView {
/// \brief A pointer to the start of the buffer
///
/// If size_bytes is 0, this value may be NULL.
- union {
- const void* data;
- const int8_t* as_int8;
- const uint8_t* as_uint8;
- const int16_t* as_int16;
- const uint16_t* as_uint16;
- const int32_t* as_int32;
- const uint32_t* as_uint32;
- const int64_t* as_int64;
- const uint64_t* as_uint64;
- const double* as_double;
- const float* as_float;
- const char* as_char;
- } data;
+ union ArrowBufferViewData data;
/// \brief The size of the buffer in bytes
int64_t size_bytes;
@@ -520,6 +595,9 @@ struct ArrowLayout {
/// \brief The function of each buffer
enum ArrowBufferType buffer_type[3];
+ /// \brief The data type of each buffer
+ enum ArrowType buffer_data_type[3];
+
/// \brief The size of an element each buffer or 0 if this size is variable or unknown
int64_t element_size_bits[3];
@@ -534,12 +612,23 @@ struct ArrowLayout {
/// This data structure provides access to the values contained within
/// an ArrowArray with fields provided in a more readily-extractible
/// form. You can re-use an ArrowArrayView for multiple ArrowArrays
-/// with the same storage type, or use it to represent a hypothetical
-/// ArrowArray that does not exist yet.
+/// with the same storage type, use it to represent a hypothetical
+/// ArrowArray that does not exist yet, or use it to validate the buffers
+/// of a future ArrowArray.
struct ArrowArrayView {
- /// \brief The underlying ArrowArray or NULL if it has not been set
+ /// \brief The underlying ArrowArray or NULL if it has not been set or
+ /// if the buffers in this ArrowArrayView are not backed by an ArrowArray.
struct ArrowArray* array;
+ /// \brief The number of elements from the physical start of the buffers.
+ int64_t offset;
+
+ /// \brief The number of elements in this view.
+ int64_t length;
+
+ /// \brief A cached null count or -1 to indicate that this value is unknown.
+ int64_t null_count;
+
/// \brief The type used to store values in this array
///
/// This type represents only the minimum required information to
@@ -560,6 +649,9 @@ struct ArrowArrayView {
/// \brief Pointers to views of this array's children
struct ArrowArrayView** children;
+ /// \brief Pointer to a view of this array's dictionary
+ struct ArrowArrayView* dictionary;
+
/// \brief Union type id to child index mapping
///
/// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer
@@ -779,6 +871,10 @@ static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal,
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromType)
#define ArrowArrayInitFromSchema \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromSchema)
+#define ArrowArrayInitFromArrayView \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView)
+#define ArrowArrayInitFromArrayView \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromArrayView)
#define ArrowArrayAllocateChildren \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateChildren)
#define ArrowArrayAllocateDictionary \
@@ -797,12 +893,16 @@ static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal,
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromSchema)
#define ArrowArrayViewAllocateChildren \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateChildren)
+#define ArrowArrayViewAllocateDictionary \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateDictionary)
#define ArrowArrayViewSetLength \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength)
#define ArrowArrayViewSetArray \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray)
-#define ArrowArrayViewValidateFull \
- NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidateFull)
+#define ArrowArrayViewSetArrayMinimal \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArrayMinimal)
+#define ArrowArrayViewValidate \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidate)
#define ArrowArrayViewReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewReset)
#define ArrowBasicArrayStreamInit \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit)
@@ -869,7 +969,16 @@ struct ArrowBufferAllocator ArrowBufferDeallocator(
/// need to communicate more verbose error information accept a pointer
/// to an ArrowError. This can be stack or statically allocated. The
/// content of the message is undefined unless an error code has been
-/// returned.
+/// returned. If a nanoarrow function is passed a non-null ArrowError pointer, the
+/// ArrowError pointed to by the argument will be propagated with a
+/// null-terminated error message. It is safe to pass a NULL ArrowError anywhere
+/// in the nanoarrow API.
+///
+/// Except where documented, it is generally not safe to continue after a
+/// function has returned a non-zero ArrowErrorCode. The NANOARROW_RETURN_NOT_OK and
+/// NANOARROW_ASSERT_OK macros are provided to help propagate errors. C++ clients can use
+/// the helpers provided in the nanoarrow.hpp header to facilitate using C++ idioms
+/// for memory management and error propgagtion.
///
/// @{
@@ -879,10 +988,24 @@ struct ArrowError {
char message[1024];
};
-/// \brief Set the contents of an error using printf syntax
+/// \brief Ensure an ArrowError is null-terminated by zeroing the first character.
+///
+/// If error is NULL, this function does nothing.
+static inline void ArrowErrorInit(struct ArrowError* error) {
+ if (error) {
+ error->message[0] = '\0';
+ }
+}
+
+/// \brief Set the contents of an error using printf syntax.
+///
+/// If error is NULL, this function does nothing and returns NANOARROW_OK.
ArrowErrorCode ArrowErrorSet(struct ArrowError* error, const char* fmt, ...);
/// \brief Get the contents of an error
+///
+/// If error is NULL, returns "", or returns the contents of the error message
+/// otherwise.
const char* ArrowErrorMessage(struct ArrowError* error);
/// @}
@@ -1416,6 +1539,14 @@ ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array,
struct ArrowSchema* schema,
struct ArrowError* error);
+/// \brief Initialize the contents of an ArrowArray from an ArrowArrayView
+///
+/// Caller is responsible for calling the array->release callback if
+/// NANOARROW_OK is returned.
+ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
+ struct ArrowArrayView* array_view,
+ struct ArrowError* error);
+
/// \brief Allocate the array->children array
///
/// Includes the memory for each child struct ArrowArray,
@@ -1573,7 +1704,7 @@ ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
/// \defgroup nanoarrow-array-view Reading arrays
///
-/// These functions read and validate the contents ArrowArray structures
+/// These functions read and validate the contents ArrowArray structures.
///
/// @{
@@ -1593,12 +1724,15 @@ ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
struct ArrowSchema* schema,
struct ArrowError* error);
-/// \brief Allocate the schema_view->children array
+/// \brief Allocate the array_view->children array
///
/// Includes the memory for each child struct ArrowArrayView
ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
int64_t n_children);
+/// \brief Allocate array_view->dictionary
+ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view);
+
/// \brief Set data-independent buffer sizes from length
void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length);
@@ -1606,9 +1740,23 @@ void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length);
ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
struct ArrowArray* array, struct ArrowError* error);
-/// \brief Performs extra checks on the array that was set via ArrowArrayViewSetArray()
-ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
- struct ArrowError* error);
+/// \brief Set buffer sizes and data pointers from an ArrowArray except for those
+/// that require dereferencing buffer content.
+ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view,
+ struct ArrowArray* array,
+ struct ArrowError* error);
+
+/// \brief Performs checks on the content of an ArrowArrayView
+///
+/// If using ArrowArrayViewSetArray() to back array_view with an ArrowArray,
+/// the buffer sizes and some content (fist and last offset) have already
+/// been validated at the "default" level. If setting the buffer pointers
+/// and sizes otherwise, you may wish to perform checks at a different level. See
+/// documentation for ArrowValidationLevel for the details of checks performed
+/// at each level.
+ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view,
+ enum ArrowValidationLevel validation_level,
+ struct ArrowError* error);
/// \brief Reset the contents of an ArrowArrayView and frees resources
void ArrowArrayViewReset(struct ArrowArrayView* array_view);
@@ -1628,10 +1776,6 @@ static inline int8_t ArrowArrayViewUnionChildIndex(struct ArrowArrayView* array_
static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView* array_view,
int64_t i);
-/// \brief Get the index to use into the relevant list child array
-static inline int64_t ArrowArrayViewListChildOffset(struct ArrowArrayView* array_view,
- int64_t i);
-
/// \brief Get an element in an ArrowArrayView as an integer
///
/// This function does not check for null values, that values are actually integers, or
@@ -2019,36 +2163,37 @@ static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t start_offset
const int64_t i_begin = start_offset;
const int64_t i_end = start_offset + length;
+ const int64_t i_last_valid = i_end - 1;
const int64_t bytes_begin = i_begin / 8;
- const int64_t bytes_end = i_end / 8 + 1;
+ const int64_t bytes_last_valid = i_last_valid / 8;
- if (bytes_end == bytes_begin + 1) {
+ if (bytes_begin == bytes_last_valid) {
// count bits within a single byte
const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_end % 8];
const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_begin % 8];
const uint8_t only_byte_mask =
- i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask & last_byte_mask);
+ i_end % 8 == 0 ? last_byte_mask : (uint8_t)(first_byte_mask & last_byte_mask);
const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask;
return _ArrowkBytePopcount[byte_masked];
}
const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
- const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8];
+ const uint8_t last_byte_mask = i_end % 8 == 0 ? 0 : _ArrowkTrailingBitmask[i_end % 8];
int64_t count = 0;
// first byte
count += _ArrowkBytePopcount[bits[bytes_begin] & ~first_byte_mask];
// middle bytes
- for (int64_t i = bytes_begin + 1; i < (bytes_end - 1); i++) {
+ for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) {
count += _ArrowkBytePopcount[bits[i]];
}
// last byte
- count += _ArrowkBytePopcount[bits[bytes_end - 1] & ~last_byte_mask];
+ count += _ArrowkBytePopcount[bits[bytes_last_valid] & ~last_byte_mask];
return count;
}
@@ -2293,7 +2438,7 @@ static inline int8_t _ArrowParseUnionTypeIds(const char* type_ids, int8_t* out)
}
if (out != NULL) {
- out[i] = type_id;
+ out[i] = (int8_t)type_id;
}
i++;
@@ -2367,11 +2512,15 @@ static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array)
}
}
- // Start building any child arrays
+ // Start building any child arrays or dictionaries
for (int64_t i = 0; i < array->n_children; i++) {
NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i]));
}
+ if (array->dictionary != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->dictionary));
+ }
+
return NANOARROW_OK;
}
@@ -2385,6 +2534,10 @@ static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) {
NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i]));
}
+ if (array->dictionary != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->dictionary));
+ }
+
return NANOARROW_OK;
}
@@ -2566,10 +2719,10 @@ static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array,
_NANOARROW_CHECK_RANGE(value, 0, INT64_MAX);
return ArrowArrayAppendUInt(array, value);
case NANOARROW_TYPE_DOUBLE:
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, value));
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, (double)value));
break;
case NANOARROW_TYPE_FLOAT:
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value));
break;
case NANOARROW_TYPE_BOOL:
NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
@@ -2616,10 +2769,10 @@ static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array,
_NANOARROW_CHECK_UPPER_LIMIT(value, INT64_MAX);
return ArrowArrayAppendInt(array, value);
case NANOARROW_TYPE_DOUBLE:
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, value));
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, (double)value));
break;
case NANOARROW_TYPE_FLOAT:
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value));
break;
case NANOARROW_TYPE_BOOL:
NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
@@ -2682,7 +2835,7 @@ static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
return EINVAL;
}
- offset += value.size_bytes;
+ offset += (int32_t)value.size_bytes;
NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(offset_buffer, &offset, sizeof(int32_t)));
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes));
@@ -2730,6 +2883,8 @@ static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array,
switch (private_data->storage_type) {
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ case NANOARROW_TYPE_LARGE_BINARY:
return ArrowArrayAppendBytes(array, buffer_view);
default:
return EINVAL;
@@ -2871,7 +3026,7 @@ static inline void ArrowArrayViewMove(struct ArrowArrayView* src,
static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i) {
const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8;
- i += array_view->array->offset;
+ i += array_view->offset;
switch (array_view->storage_type) {
case NANOARROW_TYPE_NA:
return 0x01;
@@ -2917,7 +3072,6 @@ static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView* arra
}
}
-
static inline int64_t ArrowArrayViewListChildOffset(struct ArrowArrayView* array_view,
int64_t i) {
switch (array_view->storage_type) {
@@ -2933,7 +3087,7 @@ static inline int64_t ArrowArrayViewListChildOffset(struct ArrowArrayView* array
static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view,
int64_t i) {
struct ArrowBufferView* data_view = &array_view->buffer_views[1];
- i += array_view->array->offset;
+ i += array_view->offset;
switch (array_view->storage_type) {
case NANOARROW_TYPE_INT64:
return data_view->data.as_int64[i];
@@ -2952,9 +3106,9 @@ static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_vi
case NANOARROW_TYPE_UINT8:
return data_view->data.as_uint8[i];
case NANOARROW_TYPE_DOUBLE:
- return data_view->data.as_double[i];
+ return (int64_t)data_view->data.as_double[i];
case NANOARROW_TYPE_FLOAT:
- return data_view->data.as_float[i];
+ return (int64_t)data_view->data.as_float[i];
case NANOARROW_TYPE_BOOL:
return ArrowBitGet(data_view->data.as_uint8, i);
default:
@@ -2964,7 +3118,7 @@ static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_vi
static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view,
int64_t i) {
- i += array_view->array->offset;
+ i += array_view->offset;
struct ArrowBufferView* data_view = &array_view->buffer_views[1];
switch (array_view->storage_type) {
case NANOARROW_TYPE_INT64:
@@ -2984,9 +3138,9 @@ static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_
case NANOARROW_TYPE_UINT8:
return data_view->data.as_uint8[i];
case NANOARROW_TYPE_DOUBLE:
- return data_view->data.as_double[i];
+ return (uint64_t)data_view->data.as_double[i];
case NANOARROW_TYPE_FLOAT:
- return data_view->data.as_float[i];
+ return (uint64_t)data_view->data.as_float[i];
case NANOARROW_TYPE_BOOL:
return ArrowBitGet(data_view->data.as_uint8, i);
default:
@@ -2996,13 +3150,13 @@ static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_
static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view,
int64_t i) {
- i += array_view->array->offset;
+ i += array_view->offset;
struct ArrowBufferView* data_view = &array_view->buffer_views[1];
switch (array_view->storage_type) {
case NANOARROW_TYPE_INT64:
- return data_view->data.as_int64[i];
+ return (double)data_view->data.as_int64[i];
case NANOARROW_TYPE_UINT64:
- return data_view->data.as_uint64[i];
+ return (double)data_view->data.as_uint64[i];
case NANOARROW_TYPE_INT32:
return data_view->data.as_int32[i];
case NANOARROW_TYPE_UINT32:
@@ -3028,7 +3182,7 @@ static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_
static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
struct ArrowArrayView* array_view, int64_t i) {
- i += array_view->array->offset;
+ i += array_view->offset;
struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
const char* data_view = array_view->buffer_views[2].data.as_char;
@@ -3061,7 +3215,7 @@ static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
struct ArrowArrayView* array_view, int64_t i) {
- i += array_view->array->offset;
+ i += array_view->offset;
struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8;
@@ -3095,7 +3249,7 @@ static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
static inline void ArrowArrayViewGetDecimalUnsafe(struct ArrowArrayView* array_view,
int64_t i, struct ArrowDecimal* out) {
- i += array_view->array->offset;
+ i += array_view->offset;
const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8;
switch (array_view->storage_type) {
case NANOARROW_TYPE_DECIMAL128:
diff --git a/c/vendor/nanoarrow/nanoarrow.hpp b/c/vendor/nanoarrow/nanoarrow.hpp
index 468e9115..da54a573 100644
--- a/c/vendor/nanoarrow/nanoarrow.hpp
+++ b/c/vendor/nanoarrow/nanoarrow.hpp
@@ -15,7 +15,9 @@
// specific language governing permissions and limitations
// under the License.
+#include <stdexcept>
#include <vector>
+#include <string>
#include "nanoarrow.h"
@@ -31,6 +33,55 @@
namespace nanoarrow {
+/// \defgroup nanoarrow_hpp-errors Error handling helpers
+///
+/// Most functions in the C API return an ArrowErrorCode to communicate
+/// possible failure. Except where documented, it is usually not safe to
+/// continue after a non-zero value has been returned. While the
+/// nanoarrow C++ helpers do not throw any exceptions of their own,
+/// these helpers are provided to facilitate using the nanoarrow C++ helpers
+/// in frameworks where this is a useful error handling idiom.
+///
+/// @{
+
+class Exception : public std::exception {
+ public:
+ Exception(const std::string& msg) : msg_(msg) {}
+ const char* what() const noexcept { return msg_.c_str(); }
+
+ private:
+ std::string msg_;
+};
+
+#if defined(NANOARROW_DEBUG)
+#define _NANOARROW_THROW_NOT_OK_IMPL(NAME, EXPR, EXPR_STR) \
+ do { \
+ const int NAME = (EXPR); \
+ if (NAME) { \
+ throw nanoarrow::Exception( \
+ std::string(EXPR_STR) + std::string(" failed with errno ") + \
+ std::to_string(NAME) + std::string("\n * ") + std::string(__FILE__) + \
+ std::string(":") + std::to_string(__LINE__) + std::string("\n")); \
+ } \
+ } while (0)
+#else
+#define _NANOARROW_THROW_NOT_OK_IMPL(NAME, EXPR, EXPR_STR) \
+ do { \
+ const int NAME = (EXPR); \
+ if (NAME) { \
+ throw nanoarrow::Exception(std::string(EXPR_STR) + \
+ std::string(" failed with errno ") + \
+ std::to_string(NAME)); \
+ } \
+ } while (0)
+#endif
+
+#define NANOARROW_THROW_NOT_OK(EXPR) \
+ _NANOARROW_THROW_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR, \
+ #EXPR)
+
+/// @}
+
namespace internal {
/// \defgroup nanoarrow_hpp-unique_base Base classes for Unique wrappers
diff --git a/c/vendor/vendor_nanoarrow.sh b/c/vendor/vendor_nanoarrow.sh
index b7da540b..45aa64fe 100755
--- a/c/vendor/vendor_nanoarrow.sh
+++ b/c/vendor/vendor_nanoarrow.sh
@@ -28,6 +28,7 @@ main() {
local -r tarball="$SCRATCH/nanoarrow.tar.gz"
wget -O "$tarball" "$repo_url/archive/$commit_sha.tar.gz"
+ mv nanoarrow/CMakeLists.txt CMakeLists.nanoarrow.tmp
rm -rf nanoarrow
mkdir -p nanoarrow
tar --strip-components 1 -C "$SCRATCH" -xf "$tarball"
@@ -45,6 +46,7 @@ main() {
cp "$SCRATCH/dist-adbc/nanoarrow.c" nanoarrow/
cp "$SCRATCH/dist-adbc/nanoarrow.h" nanoarrow/
cp "$SCRATCH/dist-adbc/nanoarrow.hpp" nanoarrow/
+ mv CMakeLists.nanoarrow.tmp nanoarrow/CMakeLists.txt
}
main "$@"