You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2022/08/22 16:36:43 UTC
[arrow-adbc] branch main updated: [C] Update vendored nanoarrow (#74)
This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new bedf870 [C] Update vendored nanoarrow (#74)
bedf870 is described below
commit bedf8709507818618012f83b3620933af29aff41
Author: David Li <li...@gmail.com>
AuthorDate: Mon Aug 22 12:36:38 2022 -0400
[C] Update vendored nanoarrow (#74)
---
c/validation/adbc_validation.c | 4 +-
c/vendor/nanoarrow/allocator.c | 29 ++-
c/vendor/nanoarrow/array.c | 264 +++++++++++++++++++---
c/vendor/nanoarrow/array_inline.h | 397 ++++++++++++++++++++++++++++++++-
c/vendor/nanoarrow/array_view.c | 288 ++++++++++++++++++++++++
c/vendor/nanoarrow/bitmap_inline.h | 23 +-
c/vendor/nanoarrow/buffer_inline.h | 25 ++-
c/vendor/nanoarrow/build-and-test.yaml | 30 ++-
c/vendor/nanoarrow/error.c | 4 +
c/vendor/nanoarrow/metadata.c | 41 +---
c/vendor/nanoarrow/nanoarrow.c | 2 +
c/vendor/nanoarrow/nanoarrow.h | 163 ++++++++++++--
c/vendor/nanoarrow/schema.c | 25 +--
c/vendor/nanoarrow/schema_view.c | 57 +----
c/vendor/nanoarrow/typedefs_inline.h | 65 +++++-
c/vendor/nanoarrow/utils.c | 126 +++++++++++
c/vendor/nanoarrow/utils_inline.h | 16 ++
17 files changed, 1361 insertions(+), 198 deletions(-)
diff --git a/c/validation/adbc_validation.c b/c/validation/adbc_validation.c
index 165d9e6..a5e2b63 100644
--- a/c/validation/adbc_validation.c
+++ b/c/validation/adbc_validation.c
@@ -420,15 +420,15 @@ void AdbcValidateStatementSqlIngest(struct AdbcValidateTestContext* adbc_context
struct ArrowBitmap* bitmap = ArrowArrayValidityBitmap(export_array.children[0]);
struct ArrowBuffer* buffer = ArrowArrayBuffer(export_array.children[0], 1);
+ NA_ASSERT_OK(ArrowArrayReserve(&export_array, 5));
+ // XXX: ArrowArrayReserve never allocates bitmaps
NA_ASSERT_OK(ArrowBitmapReserve(bitmap, 5));
- NA_ASSERT_OK(ArrowBufferReserve(buffer, 5 * sizeof(int64_t)));
ArrowBitmapAppendInt8Unsafe(bitmap, (int8_t[]){1, 1, 0, 0, 1}, 5);
NA_ASSERT_OK(ArrowBufferAppendInt64(buffer, 16));
NA_ASSERT_OK(ArrowBufferAppendInt64(buffer, -1));
NA_ASSERT_OK(ArrowBufferAppendInt64(buffer, 0));
NA_ASSERT_OK(ArrowBufferAppendInt64(buffer, 0));
NA_ASSERT_OK(ArrowBufferAppendInt64(buffer, 42));
- NA_ASSERT_OK(ArrowArrayFinishBuilding(export_array.children[0], 0));
NA_ASSERT_OK(ArrowArrayFinishBuilding(&export_array, 0));
export_array.children[0]->length = 5;
export_array.length = 5;
diff --git a/c/vendor/nanoarrow/allocator.c b/c/vendor/nanoarrow/allocator.c
index 8495037..a016bce 100644
--- a/c/vendor/nanoarrow/allocator.c
+++ b/c/vendor/nanoarrow/allocator.c
@@ -43,9 +43,30 @@ static void ArrowBufferAllocatorMallocFree(struct ArrowBufferAllocator* allocato
}
static struct ArrowBufferAllocator ArrowBufferAllocatorMalloc = {
- &ArrowBufferAllocatorMallocAllocate, &ArrowBufferAllocatorMallocReallocate,
- &ArrowBufferAllocatorMallocFree, NULL};
+ &ArrowBufferAllocatorMallocReallocate, &ArrowBufferAllocatorMallocFree, NULL};
-struct ArrowBufferAllocator* ArrowBufferAllocatorDefault() {
- return &ArrowBufferAllocatorMalloc;
+struct ArrowBufferAllocator ArrowBufferAllocatorDefault() {
+ return ArrowBufferAllocatorMalloc;
+}
+
+static uint8_t* ArrowBufferAllocatorNeverAllocate(struct ArrowBufferAllocator* allocator,
+ int64_t size) {
+ return NULL;
+}
+
+static uint8_t* ArrowBufferAllocatorNeverReallocate(
+ struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size,
+ int64_t new_size) {
+ return NULL;
+}
+
+struct ArrowBufferAllocator ArrowBufferDeallocator(
+ void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
+ int64_t size),
+ void* private_data) {
+ struct ArrowBufferAllocator allocator;
+ allocator.reallocate = &ArrowBufferAllocatorNeverReallocate;
+ allocator.free = custom_free;
+ allocator.private_data = private_data;
+ return allocator;
}
diff --git a/c/vendor/nanoarrow/array.c b/c/vendor/nanoarrow/array.c
index 89e6cac..7d09130 100644
--- a/c/vendor/nanoarrow/array.c
+++ b/c/vendor/nanoarrow/array.c
@@ -16,7 +16,6 @@
// under the License.
#include <errno.h>
-#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -24,12 +23,13 @@
static void ArrowArrayRelease(struct ArrowArray* array) {
// Release buffers held by this array
- struct ArrowArrayPrivateData* data = (struct ArrowArrayPrivateData*)array->private_data;
- if (data != NULL) {
- ArrowBitmapReset(&data->bitmap);
- ArrowBufferReset(&data->buffers[0]);
- ArrowBufferReset(&data->buffers[1]);
- ArrowFree(data);
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ if (private_data != NULL) {
+ ArrowBitmapReset(&private_data->bitmap);
+ ArrowBufferReset(&private_data->buffers[0]);
+ ArrowBufferReset(&private_data->buffers[1]);
+ ArrowFree(private_data);
}
// This object owns the memory for all the children, but those
@@ -72,8 +72,6 @@ ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
array->n_buffers = 0;
break;
- case NANOARROW_TYPE_LIST:
- case NANOARROW_TYPE_LARGE_LIST:
case NANOARROW_TYPE_FIXED_SIZE_LIST:
case NANOARROW_TYPE_STRUCT:
case NANOARROW_TYPE_MAP:
@@ -81,6 +79,8 @@ ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
array->n_buffers = 1;
break;
+ case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_LARGE_LIST:
case NANOARROW_TYPE_BOOL:
case NANOARROW_TYPE_UINT8:
case NANOARROW_TYPE_INT8:
@@ -93,6 +93,8 @@ ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
case NANOARROW_TYPE_HALF_FLOAT:
case NANOARROW_TYPE_FLOAT:
case NANOARROW_TYPE_DOUBLE:
+ case NANOARROW_TYPE_DECIMAL128:
+ case NANOARROW_TYPE_DECIMAL256:
case NANOARROW_TYPE_INTERVAL_MONTHS:
case NANOARROW_TYPE_INTERVAL_DAY_TIME:
case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
@@ -110,16 +112,19 @@ ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
default:
return EINVAL;
+
+ return NANOARROW_OK;
}
- struct ArrowArrayPrivateData* data = (struct ArrowArrayPrivateData*)array->private_data;
- data->storage_type = storage_type;
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ private_data->storage_type = storage_type;
return NANOARROW_OK;
}
ArrowErrorCode ArrowArrayInit(struct ArrowArray* array, enum ArrowType storage_type) {
array->length = 0;
- array->null_count = -1;
+ array->null_count = 0;
array->offset = 0;
array->n_buffers = 0;
array->n_children = 0;
@@ -129,22 +134,22 @@ ArrowErrorCode ArrowArrayInit(struct ArrowArray* array, enum ArrowType storage_t
array->release = &ArrowArrayRelease;
array->private_data = NULL;
- struct ArrowArrayPrivateData* data =
+ struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)ArrowMalloc(sizeof(struct ArrowArrayPrivateData));
- if (data == NULL) {
+ if (private_data == NULL) {
array->release = NULL;
return ENOMEM;
}
- ArrowBitmapInit(&data->bitmap);
- ArrowBufferInit(&data->buffers[0]);
- ArrowBufferInit(&data->buffers[1]);
- data->buffer_data[0] = NULL;
- data->buffer_data[1] = NULL;
- data->buffer_data[2] = NULL;
+ ArrowBitmapInit(&private_data->bitmap);
+ ArrowBufferInit(&private_data->buffers[0]);
+ ArrowBufferInit(&private_data->buffers[1]);
+ private_data->buffer_data[0] = NULL;
+ private_data->buffer_data[1] = NULL;
+ private_data->buffer_data[2] = NULL;
- array->private_data = data;
- array->buffers = (const void**)(&data->buffer_data);
+ array->private_data = private_data;
+ array->buffers = (const void**)(&private_data->buffer_data);
int result = ArrowArraySetStorageType(array, storage_type);
if (result != NANOARROW_OK) {
@@ -152,6 +157,44 @@ ArrowErrorCode ArrowArrayInit(struct ArrowArray* array, enum ArrowType storage_t
return result;
}
+ ArrowLayoutInit(&private_data->layout, storage_type);
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
+ struct ArrowArrayView* array_view,
+ struct ArrowError* error) {
+ ArrowArrayInit(array, array_view->storage_type);
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ int result = ArrowArrayAllocateChildren(array, array_view->n_children);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
+
+ private_data->layout = array_view->layout;
+
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ int result =
+ ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array,
+ struct ArrowSchema* schema,
+ struct ArrowError* error) {
+ struct ArrowArrayView array_view;
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(&array_view, schema, error));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromArrayView(array, &array_view, error));
+ ArrowArrayViewReset(&array_view);
return NANOARROW_OK;
}
@@ -201,26 +244,29 @@ ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array) {
}
void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap) {
- struct ArrowArrayPrivateData* data = (struct ArrowArrayPrivateData*)array->private_data;
- ArrowBufferMove(&bitmap->buffer, &data->bitmap.buffer);
- data->bitmap.size_bits = bitmap->size_bits;
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ ArrowBufferMove(&bitmap->buffer, &private_data->bitmap.buffer);
+ private_data->bitmap.size_bits = bitmap->size_bits;
bitmap->size_bits = 0;
- data->buffer_data[0] = data->bitmap.buffer.data;
+ private_data->buffer_data[0] = private_data->bitmap.buffer.data;
+ array->null_count = -1;
}
ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i,
struct ArrowBuffer* buffer) {
- struct ArrowArrayPrivateData* data = (struct ArrowArrayPrivateData*)array->private_data;
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
switch (i) {
case 0:
- ArrowBufferMove(buffer, &data->bitmap.buffer);
- data->buffer_data[i] = data->bitmap.buffer.data;
+ ArrowBufferMove(buffer, &private_data->bitmap.buffer);
+ private_data->buffer_data[i] = private_data->bitmap.buffer.data;
break;
case 1:
case 2:
- ArrowBufferMove(buffer, &data->buffers[i - 1]);
- data->buffer_data[i] = data->buffers[i - 1].data;
+ ArrowBufferMove(buffer, &private_data->buffers[i - 1]);
+ private_data->buffer_data[i] = private_data->buffers[i - 1].data;
break;
default:
return EINVAL;
@@ -228,3 +274,159 @@ ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i,
return NANOARROW_OK;
}
+
+static ArrowErrorCode ArrowArrayViewInitFromArray(struct ArrowArrayView* array_view,
+ struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ ArrowArrayViewInit(array_view, private_data->storage_type);
+ array_view->layout = private_data->layout;
+ array_view->array = array;
+
+ int result = ArrowArrayViewAllocateChildren(array_view, array->n_children);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+
+ for (int64_t i = 0; i < array->n_children; i++) {
+ result = ArrowArrayViewInitFromArray(array_view->children[i], array->children[i]);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowArrayReserveInternal(struct ArrowArray* array,
+ struct ArrowArrayView* array_view) {
+ // Loop through buffers and reserve the extra space that we know about
+ for (int64_t i = 0; i < array->n_buffers; i++) {
+ // Don't reserve on a validity buffer that hasn't been allocated yet
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
+ ArrowArrayBuffer(array, i)->data == NULL) {
+ continue;
+ }
+
+ int64_t additional_size_bytes =
+ array_view->buffer_views[i].n_bytes - ArrowArrayBuffer(array, i)->size_bytes;
+
+ if (additional_size_bytes > 0) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferReserve(ArrowArrayBuffer(array, i), additional_size_bytes));
+ }
+ }
+
+ // Recursively reserve children
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayReserveInternal(array->children[i], array_view->children[i]));
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array,
+ int64_t additional_size_elements) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowArrayView array_view;
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array));
+
+ // Calculate theoretical buffer sizes (recursively)
+ ArrowArrayViewSetLength(&array_view, array->length + additional_size_elements);
+
+ // Walk the structure (recursively)
+ int result = ArrowArrayReserveInternal(array, &array_view);
+ ArrowArrayViewReset(&array_view);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ for (int64_t i = 0; i < 3; i++) {
+ private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
+ }
+
+ for (int64_t i = 0; i < array->n_children; i++) {
+ ArrowArrayFlushInternalPointers(array->children[i]);
+ }
+}
+
+static ArrowErrorCode ArrowArrayCheckInternalBufferSizes(
+ struct ArrowArray* array, struct ArrowArrayView* array_view,
+ char set_length, struct ArrowError* error) {
+ if (set_length) {
+ ArrowArrayViewSetLength(array_view, array->offset + array->length);
+ }
+
+ for (int64_t i = 0; i < array->n_buffers; i++) {
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
+ array->null_count == 0 && array->buffers[i] == NULL) {
+ continue;
+ }
+
+ int64_t expected_size = array_view->buffer_views[i].n_bytes;
+ int64_t actual_size = ArrowArrayBuffer(array, i)->size_bytes;
+
+ if (actual_size < expected_size) {
+ ArrowErrorSet(
+ error,
+ "Expected buffer %d to size >= %ld bytes but found buffer with %ld bytes", i,
+ (long)expected_size, (long)actual_size);
+ return EINVAL;
+ }
+ }
+
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayCheckInternalBufferSizes(
+ array->children[i], array_view->children[i], set_length, error));
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
+ struct ArrowError* error) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ // Make sure the value we get with array->buffers[i] is set to the actual
+ // pointer (which may have changed from the original due to reallocation)
+ ArrowArrayFlushInternalPointers(array);
+
+ // Check buffer sizes to make sure we are not sending an ArrowArray
+ // into the wild that is going to segfault
+ struct ArrowArrayView array_view;
+
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array));
+
+ // Check buffer sizes once without using internal buffer data since
+ // ArrowArrayViewSetArray() assumes that all the buffers are long enough
+ // and issues invalid reads on offset buffers if they are not
+ int result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 1, error);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(&array_view);
+ return result;
+ }
+
+ result = ArrowArrayViewSetArray(&array_view, array, error);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(&array_view);
+ return result;
+ }
+
+ result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 0, error);
+ ArrowArrayViewReset(&array_view);
+ return result;
+}
diff --git a/c/vendor/nanoarrow/array_inline.h b/c/vendor/nanoarrow/array_inline.h
index 7b74534..14335a8 100644
--- a/c/vendor/nanoarrow/array_inline.h
+++ b/c/vendor/nanoarrow/array_inline.h
@@ -19,12 +19,15 @@
#define NANOARROW_ARRAY_INLINE_H_INCLUDED
#include <errno.h>
+#include <float.h>
+#include <limits.h>
#include <stdint.h>
#include <string.h>
#include "bitmap_inline.h"
#include "buffer_inline.h"
#include "typedefs_inline.h"
+#include "utils_inline.h"
#ifdef __cplusplus
extern "C" {
@@ -47,26 +50,398 @@ static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int
}
}
-static inline ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
- char shrink_to_fit) {
+static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ if (private_data->storage_type == NANOARROW_TYPE_UNINITIALIZED) {
+ return EINVAL;
+ }
+
+ // Initialize any data offset buffer with a single zero
+ for (int i = 0; i < 3; i++) {
+ if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
+ private_data->layout.element_size_bits[i] == 64) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, i), 0));
+ } else if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
+ private_data->layout.element_size_bits[i] == 32) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, i), 0));
+ }
+ }
+
+ // Start building any child arrays
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i]));
+ }
+
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
- // Make sure the value we get with array->buffers[i] is set to the actual
- // pointer (which may have changed from the original due to reallocation)
- int result;
for (int64_t i = 0; i < 3; i++) {
struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
- if (shrink_to_fit) {
- result = ArrowBufferResize(buffer, buffer->size_bytes, shrink_to_fit);
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1));
+ }
+
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i]));
+ }
+
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode _ArrowArrayAppendBits(struct ArrowArray* array,
+ int64_t buffer_i, uint8_t value,
+ int64_t n) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ struct ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i);
+ int64_t bytes_required =
+ _ArrowRoundUpToMultipleOf8(private_data->layout.element_size_bits[buffer_i] *
+ (array->length + 1)) /
+ 8;
+ if (bytes_required > buffer->size_bytes) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendFill(buffer, 0, bytes_required - buffer->size_bytes));
+ }
+
+ ArrowBitsSetTo(buffer->data, array->length, n, value);
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ if (n == 0) {
+ return NANOARROW_OK;
+ }
+
+ if (private_data->storage_type == NANOARROW_TYPE_NA) {
+ array->null_count += n;
+ array->length += n;
+ return NANOARROW_OK;
+ }
+
+ // Append n 0 bits to the validity bitmap. If we haven't allocated a bitmap yet, do it
+ // now
+ if (private_data->bitmap.buffer.data == NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, array->length + n));
+ ArrowBitmapAppendUnsafe(&private_data->bitmap, 1, array->length);
+ ArrowBitmapAppendUnsafe(&private_data->bitmap, 0, n);
+ } else {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, n));
+ ArrowBitmapAppendUnsafe(&private_data->bitmap, 0, n);
+ }
+
+ // Add appropriate buffer fill
+ struct ArrowBuffer* buffer;
+ int64_t size_bytes;
+
+ for (int i = 0; i < 3; i++) {
+ buffer = ArrowArrayBuffer(array, i);
+ size_bytes = private_data->layout.element_size_bits[i] / 8;
+
+ switch (private_data->layout.buffer_type[i]) {
+ case NANOARROW_BUFFER_TYPE_NONE:
+ case NANOARROW_BUFFER_TYPE_VALIDITY:
+ continue;
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+ // Append the current value at the end of the offset buffer for each element
+ NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n));
+
+ for (int64_t j = 0; j < n; j++) {
+ ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes * (array->length + j),
+ size_bytes);
+ }
+
+ // Skip the data buffer
+ i++;
+ continue;
+ case NANOARROW_BUFFER_TYPE_DATA:
+ // Zero out the next bit of memory
+ if (private_data->layout.element_size_bits[i] % 8 == 0) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n));
+ } else {
+ NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, i, 0, n));
+ }
+ continue;
+
+ case NANOARROW_BUFFER_TYPE_TYPE_ID:
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+ // Not supported
+ return EINVAL;
}
+ }
+
+ // For fixed-size list and struct we need to append some nulls to
+ // children for the lengths to line up properly
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendNull(
+ array->children[0], n * private_data->layout.child_size_elements));
+ break;
+ case NANOARROW_TYPE_STRUCT:
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendNull(array->children[i], n));
+ }
+ default:
+ break;
+ }
+
+ array->length += n;
+ array->null_count += n;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array,
+ int64_t value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_INT64:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(int64_t)));
+ break;
+ case NANOARROW_TYPE_INT32:
+ _NANOARROW_CHECK_RANGE(value, INT32_MIN, INT32_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_INT16:
+ _NANOARROW_CHECK_RANGE(value, INT16_MIN, INT16_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt16(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_INT8:
+ _NANOARROW_CHECK_RANGE(value, INT8_MIN, INT8_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_UINT64:
+ case NANOARROW_TYPE_UINT32:
+ case NANOARROW_TYPE_UINT16:
+ case NANOARROW_TYPE_UINT8:
+ _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX);
+ return ArrowArrayAppendUInt(array, value);
+ case NANOARROW_TYPE_DOUBLE:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_FLOAT:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_BOOL:
+ NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+ }
+
+ array->length++;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array,
+ uint64_t value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_UINT64:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(uint64_t)));
+ break;
+ case NANOARROW_TYPE_UINT32:
+ _NANOARROW_CHECK_RANGE(value, 0, UINT32_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt32(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_UINT16:
+ _NANOARROW_CHECK_RANGE(value, 0, UINT16_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt16(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_UINT8:
+ _NANOARROW_CHECK_RANGE(value, 0, UINT8_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_INT64:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_INT8:
+ _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX);
+ return ArrowArrayAppendInt(array, value);
+ case NANOARROW_TYPE_DOUBLE:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_FLOAT:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_BOOL:
+ NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+ }
+
+ array->length++;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array,
+ double value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_DOUBLE:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(double)));
+ break;
+ case NANOARROW_TYPE_FLOAT:
+ _NANOARROW_CHECK_RANGE(value, FLT_MIN, FLT_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+ }
+
+ array->length++;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
+ struct ArrowBufferView value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(
+ array, 1 + (private_data->storage_type != NANOARROW_TYPE_FIXED_SIZE_BINARY));
+ int32_t offset;
+ int64_t large_offset;
+ int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8;
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ offset = ((int32_t*)offset_buffer->data)[array->length];
+ if ((offset + value.n_bytes) > INT32_MAX) {
+ return EINVAL;
+ }
+
+ offset += value.n_bytes;
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(offset_buffer, &offset, sizeof(int32_t)));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value.data.data, value.n_bytes));
+ break;
+
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ large_offset = ((int64_t*)offset_buffer->data)[array->length];
+ large_offset += value.n_bytes;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(offset_buffer, &large_offset, sizeof(int64_t)));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value.data.data, value.n_bytes));
+ break;
+
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ if (value.n_bytes != fixed_size_bytes) {
+ return EINVAL;
+ }
+
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value.data.data, value.n_bytes));
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+ }
+
+ array->length++;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array,
+ struct ArrowStringView value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBufferView buffer_view;
+ buffer_view.data.data = value.data;
+ buffer_view.n_bytes = value.n_bytes;
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_LARGE_STRING:
+ return ArrowArrayAppendBytes(array, buffer_view);
+ default:
+ return EINVAL;
+ }
+}
+
+static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ int64_t child_length;
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_LIST:
+ child_length = array->children[0]->length;
+ if (child_length > INT32_MAX) {
+ return EINVAL;
+ }
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), child_length));
+ break;
+ case NANOARROW_TYPE_LARGE_LIST:
+ child_length = array->children[0]->length;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1), child_length));
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ child_length = array->children[0]->length;
+ if (child_length !=
+ ((array->length + 1) * private_data->layout.child_size_elements)) {
+ return EINVAL;
+ }
+ break;
+ case NANOARROW_TYPE_STRUCT:
+ for (int64_t i = 0; i < array->n_children; i++) {
+ child_length = array->children[i]->length;
+ if (child_length != (array->length + 1)) {
+ return EINVAL;
+ }
+ }
+ break;
+ default:
+ return EINVAL;
+ }
- private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
}
+ array->length++;
return NANOARROW_OK;
}
diff --git a/c/vendor/nanoarrow/array_view.c b/c/vendor/nanoarrow/array_view.c
new file mode 100644
index 0000000..84ccb07
--- /dev/null
+++ b/c/vendor/nanoarrow/array_view.c
@@ -0,0 +1,288 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nanoarrow.h"
+
+void ArrowArrayViewInit(struct ArrowArrayView* array_view, enum ArrowType storage_type) {
+ memset(array_view, 0, sizeof(struct ArrowArrayView));
+ array_view->storage_type = storage_type;
+ ArrowLayoutInit(&array_view->layout, storage_type);
+}
+
+ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
+ int64_t n_children) {
+ if (array_view->children != NULL) {
+ return EINVAL;
+ }
+
+ array_view->children =
+ (struct ArrowArrayView**)ArrowMalloc(n_children * sizeof(struct ArrowArrayView*));
+ if (array_view->children == NULL) {
+ return ENOMEM;
+ }
+
+ for (int64_t i = 0; i < n_children; i++) {
+ array_view->children[i] = NULL;
+ }
+
+ array_view->n_children = n_children;
+
+ for (int64_t i = 0; i < n_children; i++) {
+ array_view->children[i] =
+ (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView));
+ if (array_view->children[i] == NULL) {
+ return ENOMEM;
+ }
+ ArrowArrayViewInit(array_view->children[i], NANOARROW_TYPE_UNINITIALIZED);
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
+ struct ArrowSchema* schema,
+ struct ArrowError* error) {
+ struct ArrowSchemaView schema_view;
+ int result = ArrowSchemaViewInit(&schema_view, schema, error);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ ArrowArrayViewInit(array_view, schema_view.storage_data_type);
+ array_view->layout = schema_view.layout;
+
+ result = ArrowArrayViewAllocateChildren(array_view, schema->n_children);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+
+ for (int64_t i = 0; i < schema->n_children; i++) {
+ result =
+ ArrowArrayViewInitFromSchema(array_view->children[i], schema->children[i], error);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+void ArrowArrayViewReset(struct ArrowArrayView* array_view) {
+ if (array_view->children != NULL) {
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ if (array_view->children[i] != NULL) {
+ ArrowArrayViewReset(array_view->children[i]);
+ ArrowFree(array_view->children[i]);
+ }
+ }
+
+ ArrowFree(array_view->children);
+ }
+
+ ArrowArrayViewInit(array_view, NANOARROW_TYPE_UNINITIALIZED);
+}
+
+void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) {
+ for (int i = 0; i < 3; i++) {
+ int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
+ array_view->buffer_views[i].data.data = NULL;
+
+ switch (array_view->layout.buffer_type[i]) {
+ case NANOARROW_BUFFER_TYPE_VALIDITY:
+ array_view->buffer_views[i].n_bytes = _ArrowBytesForBits(length);
+ continue;
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+ // Probably don't want/need to rely on the producer to have allocated an
+ // offsets buffer of length 1 for a zero-size array
+ array_view->buffer_views[i].n_bytes =
+ (length != 0) * element_size_bytes * (length + 1);
+ continue;
+ case NANOARROW_BUFFER_TYPE_DATA:
+ array_view->buffer_views[i].n_bytes =
+ _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] * length) /
+ 8;
+ continue;
+ case NANOARROW_BUFFER_TYPE_TYPE_ID:
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+ array_view->buffer_views[i].n_bytes = element_size_bytes * length;
+ continue;
+ case NANOARROW_BUFFER_TYPE_NONE:
+ array_view->buffer_views[i].n_bytes = 0;
+ continue;
+ }
+ }
+
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_STRUCT:
+ case NANOARROW_TYPE_SPARSE_UNION:
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ ArrowArrayViewSetLength(array_view->children[i], length);
+ }
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ if (array_view->n_children >= 1) {
+ ArrowArrayViewSetLength(array_view->children[0],
+ length * array_view->layout.child_size_elements);
+ }
+ default:
+ break;
+ }
+}
+
+ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
+ struct ArrowArray* array,
+ struct ArrowError* error) {
+ array_view->array = array;
+ ArrowArrayViewSetLength(array_view, array->offset + array->length);
+
+ int64_t buffers_required = 0;
+ for (int i = 0; i < 3; i++) {
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
+ break;
+ }
+
+ buffers_required++;
+
+ // If the null_count is 0, the validity buffer can be NULL
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
+ array->null_count == 0 && array->buffers[i] == NULL) {
+ array_view->buffer_views[i].n_bytes = 0;
+ }
+
+ array_view->buffer_views[i].data.data = array->buffers[i];
+ }
+
+ if (buffers_required != array->n_buffers) {
+ ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d buffer(s)",
+ (int)buffers_required, (int)array->n_buffers);
+ return EINVAL;
+ }
+
+ if (array_view->n_children != array->n_children) {
+ return EINVAL;
+ }
+
+ // Check child sizes and calculate sizes that depend on data in the array buffers
+ int64_t last_offset;
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ if (array_view->buffer_views[1].n_bytes != 0) {
+ last_offset =
+ array_view->buffer_views[1].data.as_int32[array->offset + array->length];
+ array_view->buffer_views[2].n_bytes = last_offset;
+ }
+ break;
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ if (array_view->buffer_views[1].n_bytes != 0) {
+ last_offset =
+ array_view->buffer_views[1].data.as_int64[array->offset + array->length];
+ array_view->buffer_views[2].n_bytes = last_offset;
+ }
+ break;
+ case NANOARROW_TYPE_STRUCT:
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ if (array->children[i]->length < (array->offset + array->length)) {
+ ArrowErrorSet(
+ error,
+ "Expected struct child %d to have length >= %ld but found child with "
+ "length %ld",
+ (int)(i + 1), (long)(array->offset + array->length),
+ (long)array->children[i]->length);
+ return EINVAL;
+ }
+ }
+ break;
+ case NANOARROW_TYPE_LIST:
+ if (array->n_children != 1) {
+ ArrowErrorSet(error,
+ "Expected 1 child of list array but found %d child arrays",
+ (int)array->n_children);
+ return EINVAL;
+ }
+
+ if (array_view->buffer_views[1].n_bytes != 0) {
+ last_offset =
+ array_view->buffer_views[1].data.as_int32[array->offset + array->length];
+ if (array->children[0]->length < last_offset) {
+ ArrowErrorSet(
+ error,
+ "Expected child of list array with length >= %ld but found array with "
+ "length %ld",
+ (long)last_offset, (long)array->children[0]->length);
+ return EINVAL;
+ }
+ }
+ break;
+ case NANOARROW_TYPE_LARGE_LIST:
+ if (array->n_children != 1) {
+ ArrowErrorSet(error,
+ "Expected 1 child of large list array but found %d child arrays",
+ (int)array->n_children);
+ return EINVAL;
+ }
+
+ if (array_view->buffer_views[1].n_bytes != 0) {
+ last_offset =
+ array_view->buffer_views[1].data.as_int64[array->offset + array->length];
+ if (array->children[0]->length < last_offset) {
+ ArrowErrorSet(
+ error,
+ "Expected child of large list array with length >= %ld but found array "
+ "with length %ld",
+ (long)last_offset, (long)array->children[0]->length);
+ return EINVAL;
+ }
+ }
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ if (array->n_children != 1) {
+ ArrowErrorSet(error,
+ "Expected 1 child of fixed-size array but found %d child arrays",
+ (int)array->n_children);
+ return EINVAL;
+ }
+
+ last_offset =
+ (array->offset + array->length) * array_view->layout.child_size_elements;
+ if (array->children[0]->length < last_offset) {
+ ArrowErrorSet(
+ error,
+ "Expected child of fixed-size list array with length >= %ld but found array "
+ "with length %ld",
+ (long)last_offset, (long)array->children[0]->length);
+ return EINVAL;
+ }
+ break;
+ default:
+ break;
+ }
+
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayViewSetArray(array_view->children[i], array->children[i], error));
+ }
+
+ return NANOARROW_OK;
+}
diff --git a/c/vendor/nanoarrow/bitmap_inline.h b/c/vendor/nanoarrow/bitmap_inline.h
index 763da2a..fc80e80 100644
--- a/c/vendor/nanoarrow/bitmap_inline.h
+++ b/c/vendor/nanoarrow/bitmap_inline.h
@@ -23,6 +23,7 @@
#include "buffer_inline.h"
#include "typedefs_inline.h"
+#include "utils_inline.h"
#ifdef __cplusplus
extern "C" {
@@ -173,11 +174,8 @@ static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
return NANOARROW_OK;
}
- int result =
- ArrowBufferReserve(&bitmap->buffer, _ArrowBytesForBits(additional_size_bits));
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferReserve(&bitmap->buffer, _ArrowBytesForBits(additional_size_bits)));
bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0;
return NANOARROW_OK;
@@ -191,10 +189,8 @@ static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
}
int64_t new_capacity_bytes = _ArrowBytesForBits(new_capacity_bits);
- int result = ArrowBufferResize(&bitmap->buffer, new_capacity_bytes, shrink_to_fit);
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferResize(&bitmap->buffer, new_capacity_bytes, shrink_to_fit));
if (new_capacity_bits < bitmap->size_bits) {
bitmap->size_bits = new_capacity_bits;
@@ -205,10 +201,7 @@ static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,
uint8_t bits_are_set, int64_t length) {
- int result = ArrowBitmapReserve(bitmap, length);
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(bitmap, length));
ArrowBitmapAppendUnsafe(bitmap, bits_are_set, length);
return NANOARROW_OK;
@@ -256,6 +249,8 @@ static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
out_i_cursor += n_full_bytes * 8;
n_remaining -= n_full_bytes * 8;
if (n_remaining > 0) {
+ // Zero out the last byte
+ *out_cursor = 0x00;
for (int i = 0; i < n_remaining; i++) {
ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
}
@@ -301,6 +296,8 @@ static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap,
out_i_cursor += n_full_bytes * 8;
n_remaining -= n_full_bytes * 8;
if (n_remaining > 0) {
+ // Zero out the last byte
+ *out_cursor = 0x00;
for (int i = 0; i < n_remaining; i++) {
ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
}
diff --git a/c/vendor/nanoarrow/buffer_inline.h b/c/vendor/nanoarrow/buffer_inline.h
index 5400a1b..1b6c43b 100644
--- a/c/vendor/nanoarrow/buffer_inline.h
+++ b/c/vendor/nanoarrow/buffer_inline.h
@@ -23,6 +23,7 @@
#include <string.h>
#include "typedefs_inline.h"
+#include "utils_inline.h"
#ifdef __cplusplus
extern "C" {
@@ -45,7 +46,7 @@ static inline void ArrowBufferInit(struct ArrowBuffer* buffer) {
}
static inline ArrowErrorCode ArrowBufferSetAllocator(
- struct ArrowBuffer* buffer, struct ArrowBufferAllocator* allocator) {
+ struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator) {
if (buffer->data == NULL) {
buffer->allocator = allocator;
return NANOARROW_OK;
@@ -56,8 +57,8 @@ static inline ArrowErrorCode ArrowBufferSetAllocator(
static inline void ArrowBufferReset(struct ArrowBuffer* buffer) {
if (buffer->data != NULL) {
- buffer->allocator->free(buffer->allocator, (uint8_t*)buffer->data,
- buffer->capacity_bytes);
+ buffer->allocator.free(&buffer->allocator, (uint8_t*)buffer->data,
+ buffer->capacity_bytes);
buffer->data = NULL;
}
@@ -80,8 +81,8 @@ static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
}
if (new_capacity_bytes > buffer->capacity_bytes || shrink_to_fit) {
- buffer->data = buffer->allocator->reallocate(
- buffer->allocator, buffer->data, buffer->capacity_bytes, new_capacity_bytes);
+ buffer->data = buffer->allocator.reallocate(
+ &buffer->allocator, buffer->data, buffer->capacity_bytes, new_capacity_bytes);
if (buffer->data == NULL && new_capacity_bytes > 0) {
buffer->capacity_bytes = 0;
buffer->size_bytes = 0;
@@ -120,10 +121,7 @@ static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const voi
static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer,
const void* data, int64_t size_bytes) {
- int result = ArrowBufferReserve(buffer, size_bytes);
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes));
ArrowBufferAppendUnsafe(buffer, data, size_bytes);
return NANOARROW_OK;
@@ -179,6 +177,15 @@ static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer,
return ArrowBufferAppend(buffer, &value, sizeof(float));
}
+static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer,
+ uint8_t value, int64_t size_bytes) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes));
+
+ memset(buffer->data + buffer->size_bytes, value, size_bytes);
+ buffer->size_bytes += size_bytes;
+ return NANOARROW_OK;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/c/vendor/nanoarrow/build-and-test.yaml b/c/vendor/nanoarrow/build-and-test.yaml
index a2ad105..13e4806 100644
--- a/c/vendor/nanoarrow/build-and-test.yaml
+++ b/c/vendor/nanoarrow/build-and-test.yaml
@@ -41,7 +41,7 @@ jobs:
uses: actions/cache@v3
with:
path: build-deps
- key: ${{ runner.os }}-3
+ key: ${{ runner.os }}-5
- name: Init build dir
if: steps.cache-deps-build.outputs.cache-hit != 'true'
@@ -62,10 +62,15 @@ jobs:
if: steps.cache-deps-build.outputs.cache-hit != 'true'
run: |
cd build-deps/googletest
- cmake .
+ cmake . -DCMAKE_CXX_FLAGS=-fPIC
cmake --build .
cmake --install . --prefix ../../dist
+ - name: Install googletest
+ run: |
+ cd build-deps/googletest
+ cmake --install . --prefix ../../dist
+
- name: Fetch Arrow
if: steps.cache-deps-build.outputs.cache-hit != 'true'
uses: actions/checkout@v3
@@ -80,19 +85,19 @@ jobs:
run: |
mkdir build-deps/arrow-build
cd build-deps/arrow-build
- cmake ../arrow/cpp -DARROW_JSON=ON -DARROW_TESTING=ON -DBoost_SOURCE=BUNDLED
+ cmake ../arrow/cpp -DARROW_JSON=ON -DARROW_TESTING=ON -DBoost_SOURCE=BUNDLED -DGTest_DIR=`pwd`/../../dist/lib/cmake/GTest
cmake --build .
cmake --install . --prefix ../../dist
- - name: Install Dependencies
+ - name: Install arrow
run: |
cd build-deps/arrow-build
cmake --install . --prefix ../../dist
- cd ../googletest
- cmake --install . --prefix ../../dist
- name: Build nanoarrow
run: |
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
+ sudo ldconfig
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug -DGTest_DIR=`pwd`/../dist/lib/cmake/GTest -DArrow_DIR=`pwd`/../dist/lib/cmake/arrow -DArrowTesting_DIR=`pwd`/../dist/lib/cmake/arrow -DNANOARROW_CODE_COVERAGE=ON -DNANOARROW_BUILD_TESTS=ON
@@ -100,13 +105,24 @@ jobs:
- name: Run tests
run: |
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
+ sudo ldconfig
cd build
ctest -T test --output-on-failure .
- name: Run tests with valgrind
run: |
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
+ sudo ldconfig
cd build
- ctest -T memcheck .
+ ctest -T memcheck .
+
+ - name: Upload memcheck results
+ if: failure()
+ uses: actions/upload-artifact@main
+ with:
+ name: nanoarrow-memcheck
+ path: build/Testing/Temporary/MemoryChecker.*.log
- name: Calculate coverage
run: |
diff --git a/c/vendor/nanoarrow/error.c b/c/vendor/nanoarrow/error.c
index 74539d3..9af6267 100644
--- a/c/vendor/nanoarrow/error.c
+++ b/c/vendor/nanoarrow/error.c
@@ -23,6 +23,10 @@
#include "nanoarrow.h"
int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) {
+ if (error == NULL) {
+ return NANOARROW_OK;
+ }
+
memset(error->message, 0, sizeof(error->message));
va_list args;
diff --git a/c/vendor/nanoarrow/metadata.c b/c/vendor/nanoarrow/metadata.c
index 2f24cbc..7cf452c 100644
--- a/c/vendor/nanoarrow/metadata.c
+++ b/c/vendor/nanoarrow/metadata.c
@@ -125,12 +125,7 @@ char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key) {
ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer,
const char* metadata) {
ArrowBufferInit(buffer);
- int result = ArrowBufferAppend(buffer, metadata, ArrowMetadataSizeOf(metadata));
- if (result != NANOARROW_OK) {
- return result;
- }
-
- return NANOARROW_OK;
+ return ArrowBufferAppend(buffer, metadata, ArrowMetadataSizeOf(metadata));
}
static ArrowErrorCode ArrowMetadataBuilderAppendInternal(struct ArrowBuffer* buffer,
@@ -140,14 +135,8 @@ static ArrowErrorCode ArrowMetadataBuilderAppendInternal(struct ArrowBuffer* buf
return NANOARROW_OK;
}
- int result;
-
if (buffer->capacity_bytes == 0) {
- int32_t zero = 0;
- result = ArrowBufferAppend(buffer, &zero, sizeof(int32_t));
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(buffer, 0));
}
if (buffer->capacity_bytes < sizeof(int32_t)) {
@@ -159,11 +148,8 @@ static ArrowErrorCode ArrowMetadataBuilderAppendInternal(struct ArrowBuffer* buf
int32_t key_size = key->n_bytes;
int32_t value_size = value->n_bytes;
- result = ArrowBufferReserve(buffer,
- sizeof(int32_t) + key_size + sizeof(int32_t) + value_size);
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(
+ buffer, sizeof(int32_t) + key_size + sizeof(int32_t) + value_size));
ArrowBufferAppendUnsafe(buffer, &key_size, sizeof(int32_t));
ArrowBufferAppendUnsafe(buffer, key->data, key_size);
@@ -181,11 +167,8 @@ static ArrowErrorCode ArrowMetadataBuilderSetInternal(struct ArrowBuffer* buffer
struct ArrowStringView* value) {
// Inspect the current value to see if we can avoid copying the buffer
struct ArrowStringView current_value = ArrowCharView(NULL);
- int result =
- ArrowMetadataGetValueInternal((const char*)buffer->data, key, ¤t_value);
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(
+ ArrowMetadataGetValueInternal((const char*)buffer->data, key, ¤t_value));
// The key should be removed but no key exists
if (value == NULL && current_value.data == NULL) {
@@ -200,19 +183,13 @@ static ArrowErrorCode ArrowMetadataBuilderSetInternal(struct ArrowBuffer* buffer
struct ArrowMetadataReader reader;
struct ArrowStringView existing_key;
struct ArrowStringView existing_value;
- result = ArrowMetadataReaderInit(&reader, (const char*)buffer->data);
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(ArrowMetadataReaderInit(&reader, (const char*)buffer->data));
struct ArrowBuffer new_buffer;
- result = ArrowMetadataBuilderInit(&new_buffer, NULL);
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderInit(&new_buffer, NULL));
while (reader.remaining_keys > 0) {
- result = ArrowMetadataReaderRead(&reader, &existing_key, &existing_value);
+ int result = ArrowMetadataReaderRead(&reader, &existing_key, &existing_value);
if (result != NANOARROW_OK) {
ArrowBufferReset(&new_buffer);
return result;
diff --git a/c/vendor/nanoarrow/nanoarrow.c b/c/vendor/nanoarrow/nanoarrow.c
index d3e33c9..f7504e8 100644
--- a/c/vendor/nanoarrow/nanoarrow.c
+++ b/c/vendor/nanoarrow/nanoarrow.c
@@ -17,7 +17,9 @@
#include "allocator.c"
#include "array.c"
+#include "array_view.c"
#include "error.c"
#include "metadata.c"
#include "schema.c"
#include "schema_view.c"
+#include "utils.c"
diff --git a/c/vendor/nanoarrow/nanoarrow.h b/c/vendor/nanoarrow/nanoarrow.h
index c432e60..38b0efc 100644
--- a/c/vendor/nanoarrow/nanoarrow.h
+++ b/c/vendor/nanoarrow/nanoarrow.h
@@ -61,7 +61,18 @@ void ArrowFree(void* ptr);
///
/// The default allocator uses ArrowMalloc(), ArrowRealloc(), and
/// ArrowFree().
-struct ArrowBufferAllocator* ArrowBufferAllocatorDefault();
+struct ArrowBufferAllocator ArrowBufferAllocatorDefault();
+
+/// \brief Create a custom deallocator
+///
+/// Creates a buffer allocator with only a free method that can be used to
+/// attach a custom deallocator to an ArrowBuffer. This may be used to
+/// avoid copying an existing buffer that was not allocated using the
+/// infrastructure provided here (e.g., by an R or Python object).
+struct ArrowBufferAllocator ArrowBufferDeallocator(
+ void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
+ int64_t size),
+ void* private_data);
/// }@
@@ -87,6 +98,9 @@ const char* ArrowErrorMessage(struct ArrowError* error);
/// \defgroup nanoarrow-utils Utility data structures
+/// \brief Initialize a description of buffer arrangements from a storage type
+void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type);
+
/// \brief Create a string view from a null-terminated string
static inline struct ArrowStringView ArrowCharView(const char* value);
@@ -259,6 +273,9 @@ struct ArrowSchemaView {
/// interpret the buffers in the array.
enum ArrowType storage_data_type;
+ /// \brief The storage layout represented by the schema
+ struct ArrowLayout layout;
+
/// \brief The extension type name if it exists
///
/// If the ARROW:extension:name key is present in schema.metadata,
@@ -271,21 +288,6 @@ struct ArrowSchemaView {
/// extension_metadata.data will be non-NULL.
struct ArrowStringView extension_metadata;
- /// \brief The expected number of buffers in a paired ArrowArray
- int32_t n_buffers;
-
- /// \brief The index of the validity buffer or -1 if one does not exist
- int32_t validity_buffer_id;
-
- /// \brief The index of the offset buffer or -1 if one does not exist
- int32_t offset_buffer_id;
-
- /// \brief The index of the data buffer or -1 if one does not exist
- int32_t data_buffer_id;
-
- /// \brief The index of the type_ids buffer or -1 if one does not exist
- int32_t type_id_buffer_id;
-
/// \brief Format fixed size parameter
///
/// This value is set when parsing a fixed-size binary or fixed-size
@@ -352,7 +354,7 @@ static inline void ArrowBufferInit(struct ArrowBuffer* buffer);
///
/// Returns EINVAL if the buffer has already been allocated.
static inline ArrowErrorCode ArrowBufferSetAllocator(
- struct ArrowBuffer* buffer, struct ArrowBufferAllocator* allocator);
+ struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator);
/// \brief Reset an ArrowBuffer
///
@@ -399,6 +401,13 @@ static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const voi
static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer,
const void* data, int64_t size_bytes);
+/// \brief Write fill to buffer and increment the buffer size
+///
+/// This function writes the specified number of fill bytes and
+/// ensures that the buffer has the required capacity,
+static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer,
+ uint8_t value, int64_t size_bytes);
+
/// \brief Write an 8-bit integer to a buffer
static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer,
int8_t value);
@@ -521,6 +530,14 @@ static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap);
/// NANOARROW_OK is returned.
ArrowErrorCode ArrowArrayInit(struct ArrowArray* array, enum ArrowType storage_type);
+/// \brief Initialize the contents of an ArrowArray from an ArrowSchema
+///
+/// Caller is responsible for calling the array->release callback if
+/// NANOARROW_OK is returned.
+ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array,
+ struct ArrowSchema* schema,
+ struct ArrowError* error);
+
/// \brief Allocate the array->children array
///
/// Includes the memory for each child struct ArrowArray,
@@ -558,11 +575,119 @@ static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* ar
/// array must have been allocated using ArrowArrayInit
static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i);
+/// \brief Start element-wise appending to an ArrowArray
+///
+/// Initializes any values needed to use ArrowArrayAppend*() functions.
+/// All element-wise appenders append by value and return EINVAL if the exact value
+/// cannot be represented by the underlying storage type.
+/// array must have been allocated using ArrowArrayInit
+static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array);
+
+/// \brief Reserve space for future appends
+///
+/// For buffer sizes that can be calculated (i.e., not string data buffers or
+/// child array sizes for non-fixed-size arrays), recursively reserve space for
+/// additional elements. This is useful for reducing the number of reallocations
+/// that occur using the item-wise appenders.
+ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array,
+ int64_t additional_size_elements);
+
+/// \brief Append a null value to an array
+static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n);
+
+/// \brief Append a signed integer value to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g., value
+/// is outside the valid array range).
+static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array, int64_t value);
+
+/// \brief Append an unsigned integer value to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g., value
+/// is outside the valid array range).
+static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array,
+ uint64_t value);
+
+/// \brief Append a double value to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g., value
+/// is outside the valid array range or there is an attempt to append
+/// a non-integer to an array with an integer storage type).
+static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array,
+ double value);
+
+/// \brief Append a string of bytes to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g.,
+/// the underlying array is not a binary, string, large binary, large string,
+/// or fixed-size binary array, or value is the wrong size for a fixed-size
+/// binary array).
+static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
+ struct ArrowBufferView value);
+
+/// \brief Append a string value to an array
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g.,
+/// the underlying array is not a string or large string array).
+static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array,
+ struct ArrowStringView value);
+
+/// \brief Finish a nested array element
+///
+/// Appends a non-null element to the array based on the first child's current
+/// length. Returns NANOARROW_OK if the item was successfully added or EINVAL
+/// if the underlying storage type is not a struct, list, large list, or fixed-size
+/// list, or if there was an attempt to add a struct or fixed-size list element where the
+/// length of the child array(s) did not match the expected length.
+static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array);
+
+/// \brief Shrink buffer capacity to the size required
+///
+/// Also applies shrinking to any child arrays. array must have been allocated using
+/// ArrowArrayInit
+static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array);
+
/// \brief Finish building an ArrowArray
///
+/// Flushes any pointers from internal buffers that may have been reallocated
+/// into the array->buffers array and checks the actual size of the buffers
+/// against the expected size based on the final length.
/// array must have been allocated using ArrowArrayInit
-static inline ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
- char shrink_to_fit);
+ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
+ struct ArrowError* error);
+
+/// }@
+
+/// \defgroup nanoarrow-array Array consumer helpers
+/// These functions read and validate the contents ArrowArray structures
+
+/// \brief Initialize the contents of an ArrowArrayView
+void ArrowArrayViewInit(struct ArrowArrayView* array_view, enum ArrowType storage_type);
+
+/// \brief Initialize the contents of an ArrowArrayView from an ArrowSchema
+ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
+ struct ArrowSchema* schema,
+ struct ArrowError* error);
+
+/// \brief Allocate the schema_view->children array
+///
+/// Includes the memory for each child struct ArrowArrayView
+ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
+ int64_t n_children);
+
+/// \brief Set data-independent buffer sizes from length
+void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length);
+
+/// \brief Set buffer sizes and data pointers from an ArrowArray
+ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
+ struct ArrowArray* array, struct ArrowError* error);
+
+/// \brief Reset the contents of an ArrowArrayView and frees resources
+void ArrowArrayViewReset(struct ArrowArrayView* array_view);
/// }@
diff --git a/c/vendor/nanoarrow/schema.c b/c/vendor/nanoarrow/schema.c
index c4220d9..9833c4e 100644
--- a/c/vendor/nanoarrow/schema.c
+++ b/c/vendor/nanoarrow/schema.c
@@ -164,10 +164,7 @@ ArrowErrorCode ArrowSchemaInit(struct ArrowSchema* schema, enum ArrowType data_t
ArrowErrorCode ArrowSchemaInitFixedSize(struct ArrowSchema* schema,
enum ArrowType data_type, int32_t fixed_size) {
- int result = ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED);
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED));
if (fixed_size <= 0) {
schema->release(schema);
@@ -189,21 +186,19 @@ ArrowErrorCode ArrowSchemaInitFixedSize(struct ArrowSchema* schema,
}
buffer[n_chars] = '\0';
- result = ArrowSchemaSetFormat(schema, buffer);
+ int result = ArrowSchemaSetFormat(schema, buffer);
if (result != NANOARROW_OK) {
schema->release(schema);
+ return result;
}
- return result;
+ return NANOARROW_OK;
}
ArrowErrorCode ArrowSchemaInitDecimal(struct ArrowSchema* schema,
enum ArrowType data_type, int32_t decimal_precision,
int32_t decimal_scale) {
- int result = ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED);
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED));
if (decimal_precision <= 0) {
schema->release(schema);
@@ -228,7 +223,7 @@ ArrowErrorCode ArrowSchemaInitDecimal(struct ArrowSchema* schema,
buffer[n_chars] = '\0';
- result = ArrowSchemaSetFormat(schema, buffer);
+ int result = ArrowSchemaSetFormat(schema, buffer);
if (result != NANOARROW_OK) {
schema->release(schema);
return result;
@@ -419,13 +414,9 @@ ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema) {
}
int ArrowSchemaDeepCopy(struct ArrowSchema* schema, struct ArrowSchema* schema_out) {
- int result;
- result = ArrowSchemaInit(schema_out, NANOARROW_TYPE_NA);
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaInit(schema_out, NANOARROW_TYPE_NA));
- result = ArrowSchemaSetFormat(schema_out, schema->format);
+ int result = ArrowSchemaSetFormat(schema_out, schema->format);
if (result != NANOARROW_OK) {
schema_out->release(schema_out);
return result;
diff --git a/c/vendor/nanoarrow/schema_view.c b/c/vendor/nanoarrow/schema_view.c
index 7a3ca93..691f737 100644
--- a/c/vendor/nanoarrow/schema_view.c
+++ b/c/vendor/nanoarrow/schema_view.c
@@ -24,20 +24,12 @@ static void ArrowSchemaViewSetPrimitive(struct ArrowSchemaView* schema_view,
enum ArrowType data_type) {
schema_view->data_type = data_type;
schema_view->storage_data_type = data_type;
- schema_view->n_buffers = 2;
- schema_view->validity_buffer_id = 0;
- schema_view->data_buffer_id = 1;
}
static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
const char* format,
const char** format_end_out,
struct ArrowError* error) {
- schema_view->validity_buffer_id = -1;
- schema_view->offset_buffer_id = -1;
- schema_view->offset_buffer_id = -1;
- schema_view->data_buffer_id = -1;
- schema_view->type_id_buffer_id = -1;
*format_end_out = format;
// needed for decimal parsing
@@ -48,7 +40,6 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
case 'n':
schema_view->data_type = NANOARROW_TYPE_NA;
schema_view->storage_data_type = NANOARROW_TYPE_NA;
- schema_view->n_buffers = 0;
*format_end_out = format + 1;
return NANOARROW_OK;
case 'b':
@@ -155,9 +146,6 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
return EINVAL;
}
- schema_view->n_buffers = 2;
- schema_view->validity_buffer_id = 0;
- schema_view->data_buffer_id = 1;
schema_view->fixed_size = strtol(format + 2, (char**)format_end_out, 10);
return NANOARROW_OK;
@@ -165,19 +153,11 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
case 'z':
schema_view->data_type = NANOARROW_TYPE_BINARY;
schema_view->storage_data_type = NANOARROW_TYPE_BINARY;
- schema_view->n_buffers = 3;
- schema_view->validity_buffer_id = 0;
- schema_view->offset_buffer_id = 1;
- schema_view->data_buffer_id = 2;
*format_end_out = format + 1;
return NANOARROW_OK;
case 'u':
schema_view->data_type = NANOARROW_TYPE_STRING;
schema_view->storage_data_type = NANOARROW_TYPE_STRING;
- schema_view->n_buffers = 3;
- schema_view->validity_buffer_id = 0;
- schema_view->offset_buffer_id = 1;
- schema_view->data_buffer_id = 2;
*format_end_out = format + 1;
return NANOARROW_OK;
@@ -185,19 +165,11 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
case 'Z':
schema_view->data_type = NANOARROW_TYPE_LARGE_BINARY;
schema_view->storage_data_type = NANOARROW_TYPE_LARGE_BINARY;
- schema_view->n_buffers = 3;
- schema_view->validity_buffer_id = 0;
- schema_view->offset_buffer_id = 1;
- schema_view->data_buffer_id = 2;
*format_end_out = format + 1;
return NANOARROW_OK;
case 'U':
schema_view->data_type = NANOARROW_TYPE_LARGE_STRING;
schema_view->storage_data_type = NANOARROW_TYPE_LARGE_STRING;
- schema_view->n_buffers = 3;
- schema_view->validity_buffer_id = 0;
- schema_view->offset_buffer_id = 1;
- schema_view->data_buffer_id = 2;
*format_end_out = format + 1;
return NANOARROW_OK;
@@ -208,9 +180,6 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
case 'l':
schema_view->storage_data_type = NANOARROW_TYPE_LIST;
schema_view->data_type = NANOARROW_TYPE_LIST;
- schema_view->n_buffers = 2;
- schema_view->validity_buffer_id = 0;
- schema_view->offset_buffer_id = 1;
*format_end_out = format + 2;
return NANOARROW_OK;
@@ -218,9 +187,6 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
case 'L':
schema_view->storage_data_type = NANOARROW_TYPE_LARGE_LIST;
schema_view->data_type = NANOARROW_TYPE_LARGE_LIST;
- schema_view->n_buffers = 2;
- schema_view->validity_buffer_id = 0;
- schema_view->offset_buffer_id = 1;
*format_end_out = format + 2;
return NANOARROW_OK;
@@ -233,22 +199,16 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
schema_view->storage_data_type = NANOARROW_TYPE_FIXED_SIZE_LIST;
schema_view->data_type = NANOARROW_TYPE_FIXED_SIZE_LIST;
- schema_view->n_buffers = 1;
- schema_view->validity_buffer_id = 0;
schema_view->fixed_size = strtol(format + 3, (char**)format_end_out, 10);
return NANOARROW_OK;
case 's':
schema_view->storage_data_type = NANOARROW_TYPE_STRUCT;
schema_view->data_type = NANOARROW_TYPE_STRUCT;
- schema_view->n_buffers = 1;
- schema_view->validity_buffer_id = 0;
*format_end_out = format + 2;
return NANOARROW_OK;
case 'm':
schema_view->storage_data_type = NANOARROW_TYPE_MAP;
schema_view->data_type = NANOARROW_TYPE_MAP;
- schema_view->n_buffers = 1;
- schema_view->validity_buffer_id = 0;
*format_end_out = format + 2;
return NANOARROW_OK;
@@ -258,15 +218,10 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
case 'd':
schema_view->storage_data_type = NANOARROW_TYPE_DENSE_UNION;
schema_view->data_type = NANOARROW_TYPE_DENSE_UNION;
- schema_view->n_buffers = 2;
- schema_view->type_id_buffer_id = 0;
- schema_view->offset_buffer_id = 1;
break;
case 's':
schema_view->storage_data_type = NANOARROW_TYPE_SPARSE_UNION;
schema_view->data_type = NANOARROW_TYPE_SPARSE_UNION;
- schema_view->n_buffers = 1;
- schema_view->type_id_buffer_id = 0;
break;
default:
ArrowErrorSet(error,
@@ -492,10 +447,7 @@ static ArrowErrorCode ArrowSchemaViewValidateUnion(struct ArrowSchemaView* schem
static ArrowErrorCode ArrowSchemaViewValidateMap(struct ArrowSchemaView* schema_view,
struct ArrowError* error) {
- int result = ArrowSchemaViewValidateNChildren(schema_view, 1, error);
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaViewValidateNChildren(schema_view, 1, error));
if (schema_view->schema->children[0]->n_children != 2) {
ArrowErrorSet(error, "Expected child of map type to have 2 children but found %d",
@@ -668,6 +620,13 @@ ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
}
}
+ ArrowLayoutInit(&schema_view->layout, schema_view->storage_data_type);
+ if (schema_view->storage_data_type == NANOARROW_TYPE_FIXED_SIZE_BINARY) {
+ schema_view->layout.element_size_bits[1] = schema_view->fixed_size * 8;
+ } else if (schema_view->storage_data_type == NANOARROW_TYPE_FIXED_SIZE_LIST) {
+ schema_view->layout.child_size_elements = schema_view->fixed_size;
+ }
+
schema_view->extension_name = ArrowCharView(NULL);
schema_view->extension_metadata = ArrowCharView(NULL);
ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:name"),
diff --git a/c/vendor/nanoarrow/typedefs_inline.h b/c/vendor/nanoarrow/typedefs_inline.h
index 5aca1ec..47e2892 100644
--- a/c/vendor/nanoarrow/typedefs_inline.h
+++ b/c/vendor/nanoarrow/typedefs_inline.h
@@ -166,6 +166,33 @@ enum ArrowType {
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
};
+/// \brief Functional types of buffers as described in the Arrow Columnar Specification
+enum ArrowBufferType {
+ NANOARROW_BUFFER_TYPE_NONE,
+ NANOARROW_BUFFER_TYPE_VALIDITY,
+ NANOARROW_BUFFER_TYPE_TYPE_ID,
+ NANOARROW_BUFFER_TYPE_UNION_OFFSET,
+ NANOARROW_BUFFER_TYPE_DATA_OFFSET,
+ NANOARROW_BUFFER_TYPE_DATA
+};
+
+/// \brief A description of an arrangement of buffers
+///
+/// Contains the minimum amount of information required to
+/// calculate the size of each buffer in an ArrowArray knowing only
+/// the length and offset of the array.
+struct ArrowLayout {
+ /// \brief The function of each buffer
+ enum ArrowBufferType buffer_type[3];
+
+ /// \brief The size of an element each buffer or 0 if this size is variable or unknown
+ int64_t element_size_bits[3];
+
+ /// \brief The number of elements in the child array per element in this array for a
+ /// fixed-size list
+ int64_t child_size_elements;
+};
+
/// \brief An non-owning view of a string
struct ArrowStringView {
/// \brief A pointer to the start of the string
@@ -179,15 +206,33 @@ struct ArrowStringView {
int64_t n_bytes;
};
+/// \brief An non-owning view of a buffer
+struct ArrowBufferView {
+ /// \brief A pointer to the start of the buffer
+ ///
+ /// If n_bytes is 0, this value may be NULL.
+ union {
+ const void* data;
+ const int8_t* as_int8;
+ const uint8_t* as_uint8;
+ const int16_t* as_int16;
+ const uint16_t* as_uint16;
+ const int32_t* as_int32;
+ const uint32_t* as_uint32;
+ const int64_t* as_int64;
+ const uint64_t* as_uint64;
+ } data;
+
+ /// \brief The size of the buffer in bytes
+ int64_t n_bytes;
+};
+
/// \brief Array buffer allocation and deallocation
///
/// Container for allocate, reallocate, and free methods that can be used
/// to customize allocation and deallocation of buffers when constructing
/// an ArrowArray.
struct ArrowBufferAllocator {
- /// \brief Allocate a buffer or return NULL if it cannot be allocated
- uint8_t* (*allocate)(struct ArrowBufferAllocator* allocator, int64_t size);
-
/// \brief Reallocate a buffer or return NULL if it cannot be reallocated
uint8_t* (*reallocate)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
int64_t old_size, int64_t new_size);
@@ -213,7 +258,7 @@ struct ArrowBuffer {
int64_t capacity_bytes;
/// \brief The allocator that will be used to reallocate and/or free the buffer
- struct ArrowBufferAllocator* allocator;
+ struct ArrowBufferAllocator allocator;
};
/// \brief An owning mutable view of a bitmap
@@ -242,6 +287,18 @@ struct ArrowArrayPrivateData {
// The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
enum ArrowType storage_type;
+
+ // The buffer arrangement for the storage type
+ struct ArrowLayout layout;
+};
+
+struct ArrowArrayView {
+ struct ArrowArray* array;
+ enum ArrowType storage_type;
+ struct ArrowLayout layout;
+ struct ArrowBufferView buffer_views[3];
+ int64_t n_children;
+ struct ArrowArrayView** children;
};
/// }@
diff --git a/c/vendor/nanoarrow/utils.c b/c/vendor/nanoarrow/utils.c
new file mode 100644
index 0000000..74e4560
--- /dev/null
+++ b/c/vendor/nanoarrow/utils.c
@@ -0,0 +1,126 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "nanoarrow.h"
+
+void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) {
+ layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY;
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+ layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_NONE;
+
+ layout->element_size_bits[0] = 1;
+ layout->element_size_bits[1] = 0;
+ layout->element_size_bits[2] = 0;
+
+ layout->child_size_elements = 0;
+
+ switch (storage_type) {
+ case NANOARROW_TYPE_UNINITIALIZED:
+ case NANOARROW_TYPE_NA:
+ layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE;
+ layout->element_size_bits[0] = 0;
+ break;
+
+ case NANOARROW_TYPE_LIST:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->element_size_bits[1] = 32;
+ break;
+
+ case NANOARROW_TYPE_LARGE_LIST:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->element_size_bits[1] = 64;
+ break;
+
+ case NANOARROW_TYPE_BOOL:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 1;
+ break;
+
+ case NANOARROW_TYPE_UINT8:
+ case NANOARROW_TYPE_INT8:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 8;
+ break;
+
+ case NANOARROW_TYPE_UINT16:
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_HALF_FLOAT:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 16;
+ break;
+
+ case NANOARROW_TYPE_UINT32:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_FLOAT:
+ case NANOARROW_TYPE_INTERVAL_MONTHS:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 32;
+ break;
+
+ case NANOARROW_TYPE_UINT64:
+ case NANOARROW_TYPE_INT64:
+ case NANOARROW_TYPE_DOUBLE:
+ case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 64;
+ break;
+
+ case NANOARROW_TYPE_DECIMAL128:
+ case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 128;
+ break;
+
+ case NANOARROW_TYPE_DECIMAL256:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 256;
+ break;
+
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ break;
+
+ case NANOARROW_TYPE_DENSE_UNION:
+ layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
+ layout->element_size_bits[0] = 8;
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_UNION_OFFSET;
+ layout->element_size_bits[1] = 32;
+ break;
+
+ case NANOARROW_TYPE_SPARSE_UNION:
+ layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
+ layout->element_size_bits[0] = 8;
+ break;
+
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->element_size_bits[1] = 32;
+ layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+ break;
+
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->element_size_bits[1] = 64;
+ layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+ break;
+
+ default:
+ break;
+ }
+}
diff --git a/c/vendor/nanoarrow/utils_inline.h b/c/vendor/nanoarrow/utils_inline.h
index 4c61555..3083339 100644
--- a/c/vendor/nanoarrow/utils_inline.h
+++ b/c/vendor/nanoarrow/utils_inline.h
@@ -18,6 +18,7 @@
#ifndef NANOARROW_UTILS_INLINE_H_INCLUDED
#define NANOARROW_UTILS_INLINE_H_INCLUDED
+#include <errno.h>
#include <string.h>
#include "typedefs_inline.h"
@@ -26,6 +27,21 @@
extern "C" {
#endif
+#define _NANOARROW_CONCAT(x, y) x##y
+#define _NANOARROW_MAKE_NAME(x, y) _NANOARROW_CONCAT(x, y)
+
+#define _NANOARROW_RETURN_NOT_OK_IMPL(NAME, EXPR) \
+ do { \
+ const int NAME = (EXPR); \
+ if (NAME) return NAME; \
+ } while (0)
+
+#define NANOARROW_RETURN_NOT_OK(EXPR) \
+ _NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR)
+
+#define _NANOARROW_CHECK_RANGE(x_, min_, max_) \
+ NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL)
+
static inline struct ArrowStringView ArrowCharView(const char* value) {
struct ArrowStringView out;