You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2022/08/24 20:36:26 UTC
[arrow-adbc] branch main updated: [C] Update vendored nanoarrow, use amalgamated build (#75)
This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 26235d9 [C] Update vendored nanoarrow, use amalgamated build (#75)
26235d9 is described below
commit 26235d9cf63b98cd7b09fa142354dd09a2af1f12
Author: David Li <li...@gmail.com>
AuthorDate: Wed Aug 24 16:36:07 2022 -0400
[C] Update vendored nanoarrow, use amalgamated build (#75)
---
c/vendor/nanoarrow/allocator.c | 72 --
c/vendor/nanoarrow/array.c | 432 -------
c/vendor/nanoarrow/array_inline.h | 452 -------
c/vendor/nanoarrow/array_view.c | 288 -----
c/vendor/nanoarrow/bitmap_inline.h | 320 -----
c/vendor/nanoarrow/buffer_inline.h | 193 ---
c/vendor/nanoarrow/build-and-test.yaml | 137 --
c/vendor/nanoarrow/error.c | 46 -
c/vendor/nanoarrow/metadata.c | 234 ----
c/vendor/nanoarrow/nanoarrow.c | 2194 +++++++++++++++++++++++++++++++-
c/vendor/nanoarrow/nanoarrow.h | 1512 +++++++++++++++++++++-
c/vendor/nanoarrow/schema.c | 466 -------
c/vendor/nanoarrow/schema_view.c | 638 ----------
c/vendor/nanoarrow/typedefs_inline.h | 310 -----
c/vendor/nanoarrow/utils.c | 126 --
c/vendor/nanoarrow/utils_inline.h | 62 -
c/vendor/vendor_nanoarrow.sh | 19 +-
17 files changed, 3706 insertions(+), 3795 deletions(-)
diff --git a/c/vendor/nanoarrow/allocator.c b/c/vendor/nanoarrow/allocator.c
deleted file mode 100644
index a016bce..0000000
--- a/c/vendor/nanoarrow/allocator.c
+++ /dev/null
@@ -1,72 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <stddef.h>
-#include <stdlib.h>
-
-#include "nanoarrow.h"
-
-void* ArrowMalloc(int64_t size) { return malloc(size); }
-
-void* ArrowRealloc(void* ptr, int64_t size) { return realloc(ptr, size); }
-
-void ArrowFree(void* ptr) { free(ptr); }
-
-static uint8_t* ArrowBufferAllocatorMallocAllocate(struct ArrowBufferAllocator* allocator,
- int64_t size) {
- return ArrowMalloc(size);
-}
-
-static uint8_t* ArrowBufferAllocatorMallocReallocate(
- struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size,
- int64_t new_size) {
- return ArrowRealloc(ptr, new_size);
-}
-
-static void ArrowBufferAllocatorMallocFree(struct ArrowBufferAllocator* allocator,
- uint8_t* ptr, int64_t size) {
- ArrowFree(ptr);
-}
-
-static struct ArrowBufferAllocator ArrowBufferAllocatorMalloc = {
- &ArrowBufferAllocatorMallocReallocate, &ArrowBufferAllocatorMallocFree, NULL};
-
-struct ArrowBufferAllocator ArrowBufferAllocatorDefault() {
- return ArrowBufferAllocatorMalloc;
-}
-
-static uint8_t* ArrowBufferAllocatorNeverAllocate(struct ArrowBufferAllocator* allocator,
- int64_t size) {
- return NULL;
-}
-
-static uint8_t* ArrowBufferAllocatorNeverReallocate(
- struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size,
- int64_t new_size) {
- return NULL;
-}
-
-struct ArrowBufferAllocator ArrowBufferDeallocator(
- void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
- int64_t size),
- void* private_data) {
- struct ArrowBufferAllocator allocator;
- allocator.reallocate = &ArrowBufferAllocatorNeverReallocate;
- allocator.free = custom_free;
- allocator.private_data = private_data;
- return allocator;
-}
diff --git a/c/vendor/nanoarrow/array.c b/c/vendor/nanoarrow/array.c
deleted file mode 100644
index 7d09130..0000000
--- a/c/vendor/nanoarrow/array.c
+++ /dev/null
@@ -1,432 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "nanoarrow.h"
-
-static void ArrowArrayRelease(struct ArrowArray* array) {
- // Release buffers held by this array
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
- if (private_data != NULL) {
- ArrowBitmapReset(&private_data->bitmap);
- ArrowBufferReset(&private_data->buffers[0]);
- ArrowBufferReset(&private_data->buffers[1]);
- ArrowFree(private_data);
- }
-
- // This object owns the memory for all the children, but those
- // children may have been generated elsewhere and might have
- // their own release() callback.
- if (array->children != NULL) {
- for (int64_t i = 0; i < array->n_children; i++) {
- if (array->children[i] != NULL) {
- if (array->children[i]->release != NULL) {
- array->children[i]->release(array->children[i]);
- }
-
- ArrowFree(array->children[i]);
- }
- }
-
- ArrowFree(array->children);
- }
-
- // This object owns the memory for the dictionary but it
- // may have been generated somewhere else and have its own
- // release() callback.
- if (array->dictionary != NULL) {
- if (array->dictionary->release != NULL) {
- array->dictionary->release(array->dictionary);
- }
-
- ArrowFree(array->dictionary);
- }
-
- // Mark released
- array->release = NULL;
-}
-
-ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
- enum ArrowType storage_type) {
- switch (storage_type) {
- case NANOARROW_TYPE_UNINITIALIZED:
- case NANOARROW_TYPE_NA:
- array->n_buffers = 0;
- break;
-
- case NANOARROW_TYPE_FIXED_SIZE_LIST:
- case NANOARROW_TYPE_STRUCT:
- case NANOARROW_TYPE_MAP:
- case NANOARROW_TYPE_SPARSE_UNION:
- array->n_buffers = 1;
- break;
-
- case NANOARROW_TYPE_LIST:
- case NANOARROW_TYPE_LARGE_LIST:
- case NANOARROW_TYPE_BOOL:
- case NANOARROW_TYPE_UINT8:
- case NANOARROW_TYPE_INT8:
- case NANOARROW_TYPE_UINT16:
- case NANOARROW_TYPE_INT16:
- case NANOARROW_TYPE_UINT32:
- case NANOARROW_TYPE_INT32:
- case NANOARROW_TYPE_UINT64:
- case NANOARROW_TYPE_INT64:
- case NANOARROW_TYPE_HALF_FLOAT:
- case NANOARROW_TYPE_FLOAT:
- case NANOARROW_TYPE_DOUBLE:
- case NANOARROW_TYPE_DECIMAL128:
- case NANOARROW_TYPE_DECIMAL256:
- case NANOARROW_TYPE_INTERVAL_MONTHS:
- case NANOARROW_TYPE_INTERVAL_DAY_TIME:
- case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
- case NANOARROW_TYPE_FIXED_SIZE_BINARY:
- case NANOARROW_TYPE_DENSE_UNION:
- array->n_buffers = 2;
- break;
-
- case NANOARROW_TYPE_STRING:
- case NANOARROW_TYPE_LARGE_STRING:
- case NANOARROW_TYPE_BINARY:
- case NANOARROW_TYPE_LARGE_BINARY:
- array->n_buffers = 3;
- break;
-
- default:
- return EINVAL;
-
- return NANOARROW_OK;
- }
-
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
- private_data->storage_type = storage_type;
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowArrayInit(struct ArrowArray* array, enum ArrowType storage_type) {
- array->length = 0;
- array->null_count = 0;
- array->offset = 0;
- array->n_buffers = 0;
- array->n_children = 0;
- array->buffers = NULL;
- array->children = NULL;
- array->dictionary = NULL;
- array->release = &ArrowArrayRelease;
- array->private_data = NULL;
-
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)ArrowMalloc(sizeof(struct ArrowArrayPrivateData));
- if (private_data == NULL) {
- array->release = NULL;
- return ENOMEM;
- }
-
- ArrowBitmapInit(&private_data->bitmap);
- ArrowBufferInit(&private_data->buffers[0]);
- ArrowBufferInit(&private_data->buffers[1]);
- private_data->buffer_data[0] = NULL;
- private_data->buffer_data[1] = NULL;
- private_data->buffer_data[2] = NULL;
-
- array->private_data = private_data;
- array->buffers = (const void**)(&private_data->buffer_data);
-
- int result = ArrowArraySetStorageType(array, storage_type);
- if (result != NANOARROW_OK) {
- array->release(array);
- return result;
- }
-
- ArrowLayoutInit(&private_data->layout, storage_type);
- return NANOARROW_OK;
-}
-
-static ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
- struct ArrowArrayView* array_view,
- struct ArrowError* error) {
- ArrowArrayInit(array, array_view->storage_type);
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- int result = ArrowArrayAllocateChildren(array, array_view->n_children);
- if (result != NANOARROW_OK) {
- array->release(array);
- return result;
- }
-
- private_data->layout = array_view->layout;
-
- for (int64_t i = 0; i < array_view->n_children; i++) {
- int result =
- ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error);
- if (result != NANOARROW_OK) {
- array->release(array);
- return result;
- }
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array,
- struct ArrowSchema* schema,
- struct ArrowError* error) {
- struct ArrowArrayView array_view;
- NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(&array_view, schema, error));
- NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromArrayView(array, &array_view, error));
- ArrowArrayViewReset(&array_view);
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children) {
- if (array->children != NULL) {
- return EINVAL;
- }
-
- if (n_children == 0) {
- return NANOARROW_OK;
- }
-
- array->children =
- (struct ArrowArray**)ArrowMalloc(n_children * sizeof(struct ArrowArray*));
- if (array->children == NULL) {
- return ENOMEM;
- }
-
- for (int64_t i = 0; i < n_children; i++) {
- array->children[i] = NULL;
- }
-
- for (int64_t i = 0; i < n_children; i++) {
- array->children[i] = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray));
- if (array->children[i] == NULL) {
- return ENOMEM;
- }
- array->children[i]->release = NULL;
- }
-
- array->n_children = n_children;
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array) {
- if (array->dictionary != NULL) {
- return EINVAL;
- }
-
- array->dictionary = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray));
- if (array->dictionary == NULL) {
- return ENOMEM;
- }
-
- array->dictionary->release = NULL;
- return NANOARROW_OK;
-}
-
-void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
- ArrowBufferMove(&bitmap->buffer, &private_data->bitmap.buffer);
- private_data->bitmap.size_bits = bitmap->size_bits;
- bitmap->size_bits = 0;
- private_data->buffer_data[0] = private_data->bitmap.buffer.data;
- array->null_count = -1;
-}
-
-ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i,
- struct ArrowBuffer* buffer) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- switch (i) {
- case 0:
- ArrowBufferMove(buffer, &private_data->bitmap.buffer);
- private_data->buffer_data[i] = private_data->bitmap.buffer.data;
- break;
- case 1:
- case 2:
- ArrowBufferMove(buffer, &private_data->buffers[i - 1]);
- private_data->buffer_data[i] = private_data->buffers[i - 1].data;
- break;
- default:
- return EINVAL;
- }
-
- return NANOARROW_OK;
-}
-
-static ArrowErrorCode ArrowArrayViewInitFromArray(struct ArrowArrayView* array_view,
- struct ArrowArray* array) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- ArrowArrayViewInit(array_view, private_data->storage_type);
- array_view->layout = private_data->layout;
- array_view->array = array;
-
- int result = ArrowArrayViewAllocateChildren(array_view, array->n_children);
- if (result != NANOARROW_OK) {
- ArrowArrayViewReset(array_view);
- return result;
- }
-
- for (int64_t i = 0; i < array->n_children; i++) {
- result = ArrowArrayViewInitFromArray(array_view->children[i], array->children[i]);
- if (result != NANOARROW_OK) {
- ArrowArrayViewReset(array_view);
- return result;
- }
- }
-
- return NANOARROW_OK;
-}
-
-static ArrowErrorCode ArrowArrayReserveInternal(struct ArrowArray* array,
- struct ArrowArrayView* array_view) {
- // Loop through buffers and reserve the extra space that we know about
- for (int64_t i = 0; i < array->n_buffers; i++) {
- // Don't reserve on a validity buffer that hasn't been allocated yet
- if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
- ArrowArrayBuffer(array, i)->data == NULL) {
- continue;
- }
-
- int64_t additional_size_bytes =
- array_view->buffer_views[i].n_bytes - ArrowArrayBuffer(array, i)->size_bytes;
-
- if (additional_size_bytes > 0) {
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferReserve(ArrowArrayBuffer(array, i), additional_size_bytes));
- }
- }
-
- // Recursively reserve children
- for (int64_t i = 0; i < array->n_children; i++) {
- NANOARROW_RETURN_NOT_OK(
- ArrowArrayReserveInternal(array->children[i], array_view->children[i]));
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array,
- int64_t additional_size_elements) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- struct ArrowArrayView array_view;
- NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array));
-
- // Calculate theoretical buffer sizes (recursively)
- ArrowArrayViewSetLength(&array_view, array->length + additional_size_elements);
-
- // Walk the structure (recursively)
- int result = ArrowArrayReserveInternal(array, &array_view);
- ArrowArrayViewReset(&array_view);
- if (result != NANOARROW_OK) {
- return result;
- }
-
- return NANOARROW_OK;
-}
-
-static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- for (int64_t i = 0; i < 3; i++) {
- private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
- }
-
- for (int64_t i = 0; i < array->n_children; i++) {
- ArrowArrayFlushInternalPointers(array->children[i]);
- }
-}
-
-static ArrowErrorCode ArrowArrayCheckInternalBufferSizes(
- struct ArrowArray* array, struct ArrowArrayView* array_view,
- char set_length, struct ArrowError* error) {
- if (set_length) {
- ArrowArrayViewSetLength(array_view, array->offset + array->length);
- }
-
- for (int64_t i = 0; i < array->n_buffers; i++) {
- if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
- array->null_count == 0 && array->buffers[i] == NULL) {
- continue;
- }
-
- int64_t expected_size = array_view->buffer_views[i].n_bytes;
- int64_t actual_size = ArrowArrayBuffer(array, i)->size_bytes;
-
- if (actual_size < expected_size) {
- ArrowErrorSet(
- error,
- "Expected buffer %d to size >= %ld bytes but found buffer with %ld bytes", i,
- (long)expected_size, (long)actual_size);
- return EINVAL;
- }
- }
-
- for (int64_t i = 0; i < array->n_children; i++) {
- NANOARROW_RETURN_NOT_OK(ArrowArrayCheckInternalBufferSizes(
- array->children[i], array_view->children[i], set_length, error));
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
- struct ArrowError* error) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- // Make sure the value we get with array->buffers[i] is set to the actual
- // pointer (which may have changed from the original due to reallocation)
- ArrowArrayFlushInternalPointers(array);
-
- // Check buffer sizes to make sure we are not sending an ArrowArray
- // into the wild that is going to segfault
- struct ArrowArrayView array_view;
-
- NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array));
-
- // Check buffer sizes once without using internal buffer data since
- // ArrowArrayViewSetArray() assumes that all the buffers are long enough
- // and issues invalid reads on offset buffers if they are not
- int result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 1, error);
- if (result != NANOARROW_OK) {
- ArrowArrayViewReset(&array_view);
- return result;
- }
-
- result = ArrowArrayViewSetArray(&array_view, array, error);
- if (result != NANOARROW_OK) {
- ArrowArrayViewReset(&array_view);
- return result;
- }
-
- result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 0, error);
- ArrowArrayViewReset(&array_view);
- return result;
-}
diff --git a/c/vendor/nanoarrow/array_inline.h b/c/vendor/nanoarrow/array_inline.h
deleted file mode 100644
index 14335a8..0000000
--- a/c/vendor/nanoarrow/array_inline.h
+++ /dev/null
@@ -1,452 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef NANOARROW_ARRAY_INLINE_H_INCLUDED
-#define NANOARROW_ARRAY_INLINE_H_INCLUDED
-
-#include <errno.h>
-#include <float.h>
-#include <limits.h>
-#include <stdint.h>
-#include <string.h>
-
-#include "bitmap_inline.h"
-#include "buffer_inline.h"
-#include "typedefs_inline.h"
-#include "utils_inline.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
- return &private_data->bitmap;
-}
-
-static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
- switch (i) {
- case 0:
- return &private_data->bitmap.buffer;
- default:
- return private_data->buffers + i - 1;
- }
-}
-
-static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- if (private_data->storage_type == NANOARROW_TYPE_UNINITIALIZED) {
- return EINVAL;
- }
-
- // Initialize any data offset buffer with a single zero
- for (int i = 0; i < 3; i++) {
- if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
- private_data->layout.element_size_bits[i] == 64) {
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, i), 0));
- } else if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
- private_data->layout.element_size_bits[i] == 32) {
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, i), 0));
- }
- }
-
- // Start building any child arrays
- for (int64_t i = 0; i < array->n_children; i++) {
- NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i]));
- }
-
- return NANOARROW_OK;
-}
-
-static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- for (int64_t i = 0; i < 3; i++) {
- struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
- NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1));
- }
-
- for (int64_t i = 0; i < array->n_children; i++) {
- NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i]));
- }
-
- return NANOARROW_OK;
-}
-
-static inline ArrowErrorCode _ArrowArrayAppendBits(struct ArrowArray* array,
- int64_t buffer_i, uint8_t value,
- int64_t n) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
- struct ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i);
- int64_t bytes_required =
- _ArrowRoundUpToMultipleOf8(private_data->layout.element_size_bits[buffer_i] *
- (array->length + 1)) /
- 8;
- if (bytes_required > buffer->size_bytes) {
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppendFill(buffer, 0, bytes_required - buffer->size_bytes));
- }
-
- ArrowBitsSetTo(buffer->data, array->length, n, value);
- return NANOARROW_OK;
-}
-
-static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- if (n == 0) {
- return NANOARROW_OK;
- }
-
- if (private_data->storage_type == NANOARROW_TYPE_NA) {
- array->null_count += n;
- array->length += n;
- return NANOARROW_OK;
- }
-
- // Append n 0 bits to the validity bitmap. If we haven't allocated a bitmap yet, do it
- // now
- if (private_data->bitmap.buffer.data == NULL) {
- NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, array->length + n));
- ArrowBitmapAppendUnsafe(&private_data->bitmap, 1, array->length);
- ArrowBitmapAppendUnsafe(&private_data->bitmap, 0, n);
- } else {
- NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, n));
- ArrowBitmapAppendUnsafe(&private_data->bitmap, 0, n);
- }
-
- // Add appropriate buffer fill
- struct ArrowBuffer* buffer;
- int64_t size_bytes;
-
- for (int i = 0; i < 3; i++) {
- buffer = ArrowArrayBuffer(array, i);
- size_bytes = private_data->layout.element_size_bits[i] / 8;
-
- switch (private_data->layout.buffer_type[i]) {
- case NANOARROW_BUFFER_TYPE_NONE:
- case NANOARROW_BUFFER_TYPE_VALIDITY:
- continue;
- case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
- // Append the current value at the end of the offset buffer for each element
- NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n));
-
- for (int64_t j = 0; j < n; j++) {
- ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes * (array->length + j),
- size_bytes);
- }
-
- // Skip the data buffer
- i++;
- continue;
- case NANOARROW_BUFFER_TYPE_DATA:
- // Zero out the next bit of memory
- if (private_data->layout.element_size_bits[i] % 8 == 0) {
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n));
- } else {
- NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, i, 0, n));
- }
- continue;
-
- case NANOARROW_BUFFER_TYPE_TYPE_ID:
- case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
- // Not supported
- return EINVAL;
- }
- }
-
- // For fixed-size list and struct we need to append some nulls to
- // children for the lengths to line up properly
- switch (private_data->storage_type) {
- case NANOARROW_TYPE_FIXED_SIZE_LIST:
- NANOARROW_RETURN_NOT_OK(ArrowArrayAppendNull(
- array->children[0], n * private_data->layout.child_size_elements));
- break;
- case NANOARROW_TYPE_STRUCT:
- for (int64_t i = 0; i < array->n_children; i++) {
- NANOARROW_RETURN_NOT_OK(ArrowArrayAppendNull(array->children[i], n));
- }
- default:
- break;
- }
-
- array->length += n;
- array->null_count += n;
- return NANOARROW_OK;
-}
-
-static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array,
- int64_t value) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
-
- switch (private_data->storage_type) {
- case NANOARROW_TYPE_INT64:
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(int64_t)));
- break;
- case NANOARROW_TYPE_INT32:
- _NANOARROW_CHECK_RANGE(value, INT32_MIN, INT32_MAX);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value));
- break;
- case NANOARROW_TYPE_INT16:
- _NANOARROW_CHECK_RANGE(value, INT16_MIN, INT16_MAX);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt16(data_buffer, value));
- break;
- case NANOARROW_TYPE_INT8:
- _NANOARROW_CHECK_RANGE(value, INT8_MIN, INT8_MAX);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(data_buffer, value));
- break;
- case NANOARROW_TYPE_UINT64:
- case NANOARROW_TYPE_UINT32:
- case NANOARROW_TYPE_UINT16:
- case NANOARROW_TYPE_UINT8:
- _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX);
- return ArrowArrayAppendUInt(array, value);
- case NANOARROW_TYPE_DOUBLE:
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, value));
- break;
- case NANOARROW_TYPE_FLOAT:
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
- break;
- case NANOARROW_TYPE_BOOL:
- NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
- break;
- default:
- return EINVAL;
- }
-
- if (private_data->bitmap.buffer.data != NULL) {
- NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
- }
-
- array->length++;
- return NANOARROW_OK;
-}
-
-static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array,
- uint64_t value) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
-
- switch (private_data->storage_type) {
- case NANOARROW_TYPE_UINT64:
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(uint64_t)));
- break;
- case NANOARROW_TYPE_UINT32:
- _NANOARROW_CHECK_RANGE(value, 0, UINT32_MAX);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt32(data_buffer, value));
- break;
- case NANOARROW_TYPE_UINT16:
- _NANOARROW_CHECK_RANGE(value, 0, UINT16_MAX);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt16(data_buffer, value));
- break;
- case NANOARROW_TYPE_UINT8:
- _NANOARROW_CHECK_RANGE(value, 0, UINT8_MAX);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(data_buffer, value));
- break;
- case NANOARROW_TYPE_INT64:
- case NANOARROW_TYPE_INT32:
- case NANOARROW_TYPE_INT16:
- case NANOARROW_TYPE_INT8:
- _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX);
- return ArrowArrayAppendInt(array, value);
- case NANOARROW_TYPE_DOUBLE:
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, value));
- break;
- case NANOARROW_TYPE_FLOAT:
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
- break;
- case NANOARROW_TYPE_BOOL:
- NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
- break;
- default:
- return EINVAL;
- }
-
- if (private_data->bitmap.buffer.data != NULL) {
- NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
- }
-
- array->length++;
- return NANOARROW_OK;
-}
-
-static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array,
- double value) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
-
- switch (private_data->storage_type) {
- case NANOARROW_TYPE_DOUBLE:
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(double)));
- break;
- case NANOARROW_TYPE_FLOAT:
- _NANOARROW_CHECK_RANGE(value, FLT_MIN, FLT_MAX);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
- break;
- default:
- return EINVAL;
- }
-
- if (private_data->bitmap.buffer.data != NULL) {
- NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
- }
-
- array->length++;
- return NANOARROW_OK;
-}
-
-static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
- struct ArrowBufferView value) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
- struct ArrowBuffer* data_buffer = ArrowArrayBuffer(
- array, 1 + (private_data->storage_type != NANOARROW_TYPE_FIXED_SIZE_BINARY));
- int32_t offset;
- int64_t large_offset;
- int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8;
-
- switch (private_data->storage_type) {
- case NANOARROW_TYPE_STRING:
- case NANOARROW_TYPE_BINARY:
- offset = ((int32_t*)offset_buffer->data)[array->length];
- if ((offset + value.n_bytes) > INT32_MAX) {
- return EINVAL;
- }
-
- offset += value.n_bytes;
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(offset_buffer, &offset, sizeof(int32_t)));
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppend(data_buffer, value.data.data, value.n_bytes));
- break;
-
- case NANOARROW_TYPE_LARGE_STRING:
- case NANOARROW_TYPE_LARGE_BINARY:
- large_offset = ((int64_t*)offset_buffer->data)[array->length];
- large_offset += value.n_bytes;
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppend(offset_buffer, &large_offset, sizeof(int64_t)));
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppend(data_buffer, value.data.data, value.n_bytes));
- break;
-
- case NANOARROW_TYPE_FIXED_SIZE_BINARY:
- if (value.n_bytes != fixed_size_bytes) {
- return EINVAL;
- }
-
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppend(data_buffer, value.data.data, value.n_bytes));
- break;
- default:
- return EINVAL;
- }
-
- if (private_data->bitmap.buffer.data != NULL) {
- NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
- }
-
- array->length++;
- return NANOARROW_OK;
-}
-
-static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array,
- struct ArrowStringView value) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- struct ArrowBufferView buffer_view;
- buffer_view.data.data = value.data;
- buffer_view.n_bytes = value.n_bytes;
-
- switch (private_data->storage_type) {
- case NANOARROW_TYPE_STRING:
- case NANOARROW_TYPE_LARGE_STRING:
- return ArrowArrayAppendBytes(array, buffer_view);
- default:
- return EINVAL;
- }
-}
-
-static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) {
- struct ArrowArrayPrivateData* private_data =
- (struct ArrowArrayPrivateData*)array->private_data;
-
- int64_t child_length;
-
- switch (private_data->storage_type) {
- case NANOARROW_TYPE_LIST:
- child_length = array->children[0]->length;
- if (child_length > INT32_MAX) {
- return EINVAL;
- }
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), child_length));
- break;
- case NANOARROW_TYPE_LARGE_LIST:
- child_length = array->children[0]->length;
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1), child_length));
- break;
- case NANOARROW_TYPE_FIXED_SIZE_LIST:
- child_length = array->children[0]->length;
- if (child_length !=
- ((array->length + 1) * private_data->layout.child_size_elements)) {
- return EINVAL;
- }
- break;
- case NANOARROW_TYPE_STRUCT:
- for (int64_t i = 0; i < array->n_children; i++) {
- child_length = array->children[i]->length;
- if (child_length != (array->length + 1)) {
- return EINVAL;
- }
- }
- break;
- default:
- return EINVAL;
- }
-
- if (private_data->bitmap.buffer.data != NULL) {
- NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
- }
-
- array->length++;
- return NANOARROW_OK;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/c/vendor/nanoarrow/array_view.c b/c/vendor/nanoarrow/array_view.c
deleted file mode 100644
index 84ccb07..0000000
--- a/c/vendor/nanoarrow/array_view.c
+++ /dev/null
@@ -1,288 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "nanoarrow.h"
-
-void ArrowArrayViewInit(struct ArrowArrayView* array_view, enum ArrowType storage_type) {
- memset(array_view, 0, sizeof(struct ArrowArrayView));
- array_view->storage_type = storage_type;
- ArrowLayoutInit(&array_view->layout, storage_type);
-}
-
-ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
- int64_t n_children) {
- if (array_view->children != NULL) {
- return EINVAL;
- }
-
- array_view->children =
- (struct ArrowArrayView**)ArrowMalloc(n_children * sizeof(struct ArrowArrayView*));
- if (array_view->children == NULL) {
- return ENOMEM;
- }
-
- for (int64_t i = 0; i < n_children; i++) {
- array_view->children[i] = NULL;
- }
-
- array_view->n_children = n_children;
-
- for (int64_t i = 0; i < n_children; i++) {
- array_view->children[i] =
- (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView));
- if (array_view->children[i] == NULL) {
- return ENOMEM;
- }
- ArrowArrayViewInit(array_view->children[i], NANOARROW_TYPE_UNINITIALIZED);
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
- struct ArrowSchema* schema,
- struct ArrowError* error) {
- struct ArrowSchemaView schema_view;
- int result = ArrowSchemaViewInit(&schema_view, schema, error);
- if (result != NANOARROW_OK) {
- return result;
- }
-
- ArrowArrayViewInit(array_view, schema_view.storage_data_type);
- array_view->layout = schema_view.layout;
-
- result = ArrowArrayViewAllocateChildren(array_view, schema->n_children);
- if (result != NANOARROW_OK) {
- ArrowArrayViewReset(array_view);
- return result;
- }
-
- for (int64_t i = 0; i < schema->n_children; i++) {
- result =
- ArrowArrayViewInitFromSchema(array_view->children[i], schema->children[i], error);
- if (result != NANOARROW_OK) {
- ArrowArrayViewReset(array_view);
- return result;
- }
- }
-
- return NANOARROW_OK;
-}
-
-void ArrowArrayViewReset(struct ArrowArrayView* array_view) {
- if (array_view->children != NULL) {
- for (int64_t i = 0; i < array_view->n_children; i++) {
- if (array_view->children[i] != NULL) {
- ArrowArrayViewReset(array_view->children[i]);
- ArrowFree(array_view->children[i]);
- }
- }
-
- ArrowFree(array_view->children);
- }
-
- ArrowArrayViewInit(array_view, NANOARROW_TYPE_UNINITIALIZED);
-}
-
-void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) {
- for (int i = 0; i < 3; i++) {
- int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
- array_view->buffer_views[i].data.data = NULL;
-
- switch (array_view->layout.buffer_type[i]) {
- case NANOARROW_BUFFER_TYPE_VALIDITY:
- array_view->buffer_views[i].n_bytes = _ArrowBytesForBits(length);
- continue;
- case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
- // Probably don't want/need to rely on the producer to have allocated an
- // offsets buffer of length 1 for a zero-size array
- array_view->buffer_views[i].n_bytes =
- (length != 0) * element_size_bytes * (length + 1);
- continue;
- case NANOARROW_BUFFER_TYPE_DATA:
- array_view->buffer_views[i].n_bytes =
- _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] * length) /
- 8;
- continue;
- case NANOARROW_BUFFER_TYPE_TYPE_ID:
- case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
- array_view->buffer_views[i].n_bytes = element_size_bytes * length;
- continue;
- case NANOARROW_BUFFER_TYPE_NONE:
- array_view->buffer_views[i].n_bytes = 0;
- continue;
- }
- }
-
- switch (array_view->storage_type) {
- case NANOARROW_TYPE_STRUCT:
- case NANOARROW_TYPE_SPARSE_UNION:
- for (int64_t i = 0; i < array_view->n_children; i++) {
- ArrowArrayViewSetLength(array_view->children[i], length);
- }
- break;
- case NANOARROW_TYPE_FIXED_SIZE_LIST:
- if (array_view->n_children >= 1) {
- ArrowArrayViewSetLength(array_view->children[0],
- length * array_view->layout.child_size_elements);
- }
- default:
- break;
- }
-}
-
-ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
- struct ArrowArray* array,
- struct ArrowError* error) {
- array_view->array = array;
- ArrowArrayViewSetLength(array_view, array->offset + array->length);
-
- int64_t buffers_required = 0;
- for (int i = 0; i < 3; i++) {
- if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
- break;
- }
-
- buffers_required++;
-
- // If the null_count is 0, the validity buffer can be NULL
- if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
- array->null_count == 0 && array->buffers[i] == NULL) {
- array_view->buffer_views[i].n_bytes = 0;
- }
-
- array_view->buffer_views[i].data.data = array->buffers[i];
- }
-
- if (buffers_required != array->n_buffers) {
- ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d buffer(s)",
- (int)buffers_required, (int)array->n_buffers);
- return EINVAL;
- }
-
- if (array_view->n_children != array->n_children) {
- return EINVAL;
- }
-
- // Check child sizes and calculate sizes that depend on data in the array buffers
- int64_t last_offset;
- switch (array_view->storage_type) {
- case NANOARROW_TYPE_STRING:
- case NANOARROW_TYPE_BINARY:
- if (array_view->buffer_views[1].n_bytes != 0) {
- last_offset =
- array_view->buffer_views[1].data.as_int32[array->offset + array->length];
- array_view->buffer_views[2].n_bytes = last_offset;
- }
- break;
- case NANOARROW_TYPE_LARGE_STRING:
- case NANOARROW_TYPE_LARGE_BINARY:
- if (array_view->buffer_views[1].n_bytes != 0) {
- last_offset =
- array_view->buffer_views[1].data.as_int64[array->offset + array->length];
- array_view->buffer_views[2].n_bytes = last_offset;
- }
- break;
- case NANOARROW_TYPE_STRUCT:
- for (int64_t i = 0; i < array_view->n_children; i++) {
- if (array->children[i]->length < (array->offset + array->length)) {
- ArrowErrorSet(
- error,
- "Expected struct child %d to have length >= %ld but found child with "
- "length %ld",
- (int)(i + 1), (long)(array->offset + array->length),
- (long)array->children[i]->length);
- return EINVAL;
- }
- }
- break;
- case NANOARROW_TYPE_LIST:
- if (array->n_children != 1) {
- ArrowErrorSet(error,
- "Expected 1 child of list array but found %d child arrays",
- (int)array->n_children);
- return EINVAL;
- }
-
- if (array_view->buffer_views[1].n_bytes != 0) {
- last_offset =
- array_view->buffer_views[1].data.as_int32[array->offset + array->length];
- if (array->children[0]->length < last_offset) {
- ArrowErrorSet(
- error,
- "Expected child of list array with length >= %ld but found array with "
- "length %ld",
- (long)last_offset, (long)array->children[0]->length);
- return EINVAL;
- }
- }
- break;
- case NANOARROW_TYPE_LARGE_LIST:
- if (array->n_children != 1) {
- ArrowErrorSet(error,
- "Expected 1 child of large list array but found %d child arrays",
- (int)array->n_children);
- return EINVAL;
- }
-
- if (array_view->buffer_views[1].n_bytes != 0) {
- last_offset =
- array_view->buffer_views[1].data.as_int64[array->offset + array->length];
- if (array->children[0]->length < last_offset) {
- ArrowErrorSet(
- error,
- "Expected child of large list array with length >= %ld but found array "
- "with length %ld",
- (long)last_offset, (long)array->children[0]->length);
- return EINVAL;
- }
- }
- break;
- case NANOARROW_TYPE_FIXED_SIZE_LIST:
- if (array->n_children != 1) {
- ArrowErrorSet(error,
- "Expected 1 child of fixed-size array but found %d child arrays",
- (int)array->n_children);
- return EINVAL;
- }
-
- last_offset =
- (array->offset + array->length) * array_view->layout.child_size_elements;
- if (array->children[0]->length < last_offset) {
- ArrowErrorSet(
- error,
- "Expected child of fixed-size list array with length >= %ld but found array "
- "with length %ld",
- (long)last_offset, (long)array->children[0]->length);
- return EINVAL;
- }
- break;
- default:
- break;
- }
-
- for (int64_t i = 0; i < array_view->n_children; i++) {
- NANOARROW_RETURN_NOT_OK(
- ArrowArrayViewSetArray(array_view->children[i], array->children[i], error));
- }
-
- return NANOARROW_OK;
-}
diff --git a/c/vendor/nanoarrow/bitmap_inline.h b/c/vendor/nanoarrow/bitmap_inline.h
deleted file mode 100644
index fc80e80..0000000
--- a/c/vendor/nanoarrow/bitmap_inline.h
+++ /dev/null
@@ -1,320 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef NANOARROW_BITMAP_INLINE_H_INCLUDED
-#define NANOARROW_BITMAP_INLINE_H_INCLUDED
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "buffer_inline.h"
-#include "typedefs_inline.h"
-#include "utils_inline.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-static const uint8_t _ArrowkBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
-static const uint8_t _ArrowkFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127};
-static const uint8_t _ArrowkPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
-static const uint8_t _ArrowkTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
-
-static const uint8_t _ArrowkBytePopcount[] = {
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3,
- 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4,
- 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4,
- 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,
- 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2,
- 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5,
- 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4,
- 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6,
- 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
-
-static inline int64_t _ArrowRoundUpToMultipleOf8(int64_t value) {
- return (value + 7) & ~((int64_t)7);
-}
-
-static inline int64_t _ArrowRoundDownToMultipleOf8(int64_t value) {
- return (value / 8) * 8;
-}
-
-static inline int64_t _ArrowBytesForBits(int64_t bits) {
- return (bits >> 3) + ((bits & 7) != 0);
-}
-
-static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
- *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 |
- values[5] << 5 | values[6] << 6 | values[7] << 7);
-}
-
-static inline void _ArrowBitmapPackInt32(const int32_t* values, uint8_t* out) {
- *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 |
- values[5] << 5 | values[6] << 6 | values[7] << 7);
-}
-
-static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) {
- return (bits[i >> 3] >> (i & 0x07)) & 1;
-}
-
-static inline void ArrowBitSet(uint8_t* bits, int64_t i) {
- bits[i / 8] |= _ArrowkBitmask[i % 8];
-}
-
-static inline void ArrowBitClear(uint8_t* bits, int64_t i) {
- bits[i / 8] &= _ArrowkFlippedBitmask[i % 8];
-}
-
-static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t bit_is_set) {
- bits[i / 8] ^=
- ((uint8_t)(-((uint8_t)(bit_is_set != 0)) ^ bits[i / 8])) & _ArrowkBitmask[i % 8];
-}
-
-static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length,
- uint8_t bits_are_set) {
- const int64_t i_begin = start_offset;
- const int64_t i_end = start_offset + length;
- const uint8_t fill_byte = (uint8_t)(-bits_are_set);
-
- const int64_t bytes_begin = i_begin / 8;
- const int64_t bytes_end = i_end / 8 + 1;
-
- const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
- const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8];
-
- if (bytes_end == bytes_begin + 1) {
- // set bits within a single byte
- const uint8_t only_byte_mask =
- i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
- bits[bytes_begin] &= only_byte_mask;
- bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask);
- return;
- }
-
- // set/clear trailing bits of first byte
- bits[bytes_begin] &= first_byte_mask;
- bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask);
-
- if (bytes_end - bytes_begin > 2) {
- // set/clear whole bytes
- memset(bits + bytes_begin + 1, fill_byte, (size_t)(bytes_end - bytes_begin - 2));
- }
-
- if (i_end % 8 == 0) {
- return;
- }
-
- // set/clear leading bits of last byte
- bits[bytes_end - 1] &= last_byte_mask;
- bits[bytes_end - 1] |= (uint8_t)(fill_byte & ~last_byte_mask);
-}
-
-static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t start_offset,
- int64_t length) {
- if (length == 0) {
- return 0;
- }
-
- const int64_t i_begin = start_offset;
- const int64_t i_end = start_offset + length;
-
- const int64_t bytes_begin = i_begin / 8;
- const int64_t bytes_end = i_end / 8 + 1;
-
- const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
- const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8];
-
- if (bytes_end == bytes_begin + 1) {
- // count bits within a single byte
- const uint8_t only_byte_mask =
- i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
- const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask;
- return _ArrowkBytePopcount[byte_masked];
- }
-
- int64_t count = 0;
-
- // first byte
- count += _ArrowkBytePopcount[bits[bytes_begin] & ~first_byte_mask];
-
- // middle bytes
- for (int64_t i = bytes_begin + 1; i < (bytes_end - 1); i++) {
- count += _ArrowkBytePopcount[bits[i]];
- }
-
- // last byte
- count += _ArrowkBytePopcount[bits[bytes_end - 1] & ~last_byte_mask];
-
- return count;
-}
-
-static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap) {
- ArrowBufferInit(&bitmap->buffer);
- bitmap->size_bits = 0;
-}
-
-static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
- int64_t additional_size_bits) {
- int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits;
- if (min_capacity_bits <= (bitmap->buffer.capacity_bytes * 8)) {
- return NANOARROW_OK;
- }
-
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferReserve(&bitmap->buffer, _ArrowBytesForBits(additional_size_bits)));
-
- bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0;
- return NANOARROW_OK;
-}
-
-static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
- int64_t new_capacity_bits,
- char shrink_to_fit) {
- if (new_capacity_bits < 0) {
- return EINVAL;
- }
-
- int64_t new_capacity_bytes = _ArrowBytesForBits(new_capacity_bits);
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferResize(&bitmap->buffer, new_capacity_bytes, shrink_to_fit));
-
- if (new_capacity_bits < bitmap->size_bits) {
- bitmap->size_bits = new_capacity_bits;
- }
-
- return NANOARROW_OK;
-}
-
-static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,
- uint8_t bits_are_set, int64_t length) {
- NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(bitmap, length));
-
- ArrowBitmapAppendUnsafe(bitmap, bits_are_set, length);
- return NANOARROW_OK;
-}
-
-static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap,
- uint8_t bits_are_set, int64_t length) {
- ArrowBitsSetTo(bitmap->buffer.data, bitmap->size_bits, length, bits_are_set);
- bitmap->size_bits += length;
- bitmap->buffer.size_bytes = _ArrowBytesForBits(bitmap->size_bits);
-}
-
-static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
- const int8_t* values, int64_t n_values) {
- if (n_values == 0) {
- return;
- }
-
- const int8_t* values_cursor = values;
- int64_t n_remaining = n_values;
- int64_t out_i_cursor = bitmap->size_bits;
- uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8;
-
- // First byte
- if ((out_i_cursor % 8) != 0) {
- int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor;
- for (int i = 0; i < n_partial_bits; i++) {
- ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]);
- }
-
- out_cursor++;
- values_cursor += n_partial_bits;
- n_remaining -= n_partial_bits;
- }
-
- // Middle bytes
- int64_t n_full_bytes = n_remaining / 8;
- for (int64_t i = 0; i < n_full_bytes; i++) {
- _ArrowBitmapPackInt8(values_cursor, out_cursor);
- values_cursor += 8;
- out_cursor++;
- }
-
- // Last byte
- out_i_cursor += n_full_bytes * 8;
- n_remaining -= n_full_bytes * 8;
- if (n_remaining > 0) {
- // Zero out the last byte
- *out_cursor = 0x00;
- for (int i = 0; i < n_remaining; i++) {
- ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
- }
- out_cursor++;
- }
-
- bitmap->size_bits += n_values;
- bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data;
-}
-
-static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap,
- const int32_t* values, int64_t n_values) {
- if (n_values == 0) {
- return;
- }
-
- const int32_t* values_cursor = values;
- int64_t n_remaining = n_values;
- int64_t out_i_cursor = bitmap->size_bits;
- uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8;
-
- // First byte
- if ((out_i_cursor % 8) != 0) {
- int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor;
- for (int i = 0; i < n_partial_bits; i++) {
- ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]);
- }
-
- out_cursor++;
- values_cursor += n_partial_bits;
- n_remaining -= n_partial_bits;
- }
-
- // Middle bytes
- int64_t n_full_bytes = n_remaining / 8;
- for (int64_t i = 0; i < n_full_bytes; i++) {
- _ArrowBitmapPackInt32(values_cursor, out_cursor);
- values_cursor += 8;
- out_cursor++;
- }
-
- // Last byte
- out_i_cursor += n_full_bytes * 8;
- n_remaining -= n_full_bytes * 8;
- if (n_remaining > 0) {
- // Zero out the last byte
- *out_cursor = 0x00;
- for (int i = 0; i < n_remaining; i++) {
- ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
- }
- out_cursor++;
- }
-
- bitmap->size_bits += n_values;
- bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data;
-}
-
-static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap) {
- ArrowBufferReset(&bitmap->buffer);
- bitmap->size_bits = 0;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/c/vendor/nanoarrow/buffer_inline.h b/c/vendor/nanoarrow/buffer_inline.h
deleted file mode 100644
index 1b6c43b..0000000
--- a/c/vendor/nanoarrow/buffer_inline.h
+++ /dev/null
@@ -1,193 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef NANOARROW_BUFFER_INLINE_H_INCLUDED
-#define NANOARROW_BUFFER_INLINE_H_INCLUDED
-
-#include <errno.h>
-#include <stdint.h>
-#include <string.h>
-
-#include "typedefs_inline.h"
-#include "utils_inline.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-static inline int64_t _ArrowGrowByFactor(int64_t current_capacity, int64_t new_capacity) {
- int64_t doubled_capacity = current_capacity * 2;
- if (doubled_capacity > new_capacity) {
- return doubled_capacity;
- } else {
- return new_capacity;
- }
-}
-
-static inline void ArrowBufferInit(struct ArrowBuffer* buffer) {
- buffer->data = NULL;
- buffer->size_bytes = 0;
- buffer->capacity_bytes = 0;
- buffer->allocator = ArrowBufferAllocatorDefault();
-}
-
-static inline ArrowErrorCode ArrowBufferSetAllocator(
- struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator) {
- if (buffer->data == NULL) {
- buffer->allocator = allocator;
- return NANOARROW_OK;
- } else {
- return EINVAL;
- }
-}
-
-static inline void ArrowBufferReset(struct ArrowBuffer* buffer) {
- if (buffer->data != NULL) {
- buffer->allocator.free(&buffer->allocator, (uint8_t*)buffer->data,
- buffer->capacity_bytes);
- buffer->data = NULL;
- }
-
- buffer->capacity_bytes = 0;
- buffer->size_bytes = 0;
-}
-
-static inline void ArrowBufferMove(struct ArrowBuffer* buffer,
- struct ArrowBuffer* buffer_out) {
- memcpy(buffer_out, buffer, sizeof(struct ArrowBuffer));
- buffer->data = NULL;
- ArrowBufferReset(buffer);
-}
-
-static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
- int64_t new_capacity_bytes,
- char shrink_to_fit) {
- if (new_capacity_bytes < 0) {
- return EINVAL;
- }
-
- if (new_capacity_bytes > buffer->capacity_bytes || shrink_to_fit) {
- buffer->data = buffer->allocator.reallocate(
- &buffer->allocator, buffer->data, buffer->capacity_bytes, new_capacity_bytes);
- if (buffer->data == NULL && new_capacity_bytes > 0) {
- buffer->capacity_bytes = 0;
- buffer->size_bytes = 0;
- return ENOMEM;
- }
-
- buffer->capacity_bytes = new_capacity_bytes;
- }
-
- // Ensures that when shrinking that size <= capacity
- if (new_capacity_bytes < buffer->size_bytes) {
- buffer->size_bytes = new_capacity_bytes;
- }
-
- return NANOARROW_OK;
-}
-
-static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer,
- int64_t additional_size_bytes) {
- int64_t min_capacity_bytes = buffer->size_bytes + additional_size_bytes;
- if (min_capacity_bytes <= buffer->capacity_bytes) {
- return NANOARROW_OK;
- }
-
- return ArrowBufferResize(
- buffer, _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes), 0);
-}
-
-static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data,
- int64_t size_bytes) {
- if (size_bytes > 0) {
- memcpy(buffer->data + buffer->size_bytes, data, size_bytes);
- buffer->size_bytes += size_bytes;
- }
-}
-
-static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer,
- const void* data, int64_t size_bytes) {
- NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes));
-
- ArrowBufferAppendUnsafe(buffer, data, size_bytes);
- return NANOARROW_OK;
-}
-
-static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer,
- int8_t value) {
- return ArrowBufferAppend(buffer, &value, sizeof(int8_t));
-}
-
-static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer,
- uint8_t value) {
- return ArrowBufferAppend(buffer, &value, sizeof(uint8_t));
-}
-
-static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer,
- int16_t value) {
- return ArrowBufferAppend(buffer, &value, sizeof(int16_t));
-}
-
-static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer,
- uint16_t value) {
- return ArrowBufferAppend(buffer, &value, sizeof(uint16_t));
-}
-
-static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer,
- int32_t value) {
- return ArrowBufferAppend(buffer, &value, sizeof(int32_t));
-}
-
-static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer,
- uint32_t value) {
- return ArrowBufferAppend(buffer, &value, sizeof(uint32_t));
-}
-
-static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer,
- int64_t value) {
- return ArrowBufferAppend(buffer, &value, sizeof(int64_t));
-}
-
-static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer,
- uint64_t value) {
- return ArrowBufferAppend(buffer, &value, sizeof(uint64_t));
-}
-
-static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer,
- double value) {
- return ArrowBufferAppend(buffer, &value, sizeof(double));
-}
-
-static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer,
- float value) {
- return ArrowBufferAppend(buffer, &value, sizeof(float));
-}
-
-static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer,
- uint8_t value, int64_t size_bytes) {
- NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes));
-
- memset(buffer->data + buffer->size_bytes, value, size_bytes);
- buffer->size_bytes += size_bytes;
- return NANOARROW_OK;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/c/vendor/nanoarrow/build-and-test.yaml b/c/vendor/nanoarrow/build-and-test.yaml
deleted file mode 100644
index 13e4806..0000000
--- a/c/vendor/nanoarrow/build-and-test.yaml
+++ /dev/null
@@ -1,137 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-on:
- push:
- branches: [main, master]
- pull_request:
- branches: [main, master]
-
-name: Build and Test
-
-jobs:
- build-and-test:
- runs-on: ubuntu-latest
- steps:
- - name: Checkout repo
- uses: actions/checkout@v3
- with:
- fetch-depth: 0
-
- - name: Install dependencies
- run: |
- sudo apt-get install -y cmake valgrind
-
- - name: Cache Dependency Builds
- id: cache-deps-build
- uses: actions/cache@v3
- with:
- path: build-deps
- key: ${{ runner.os }}-5
-
- - name: Init build dir
- if: steps.cache-deps-build.outputs.cache-hit != 'true'
- run: mkdir build-deps
-
- # There seems to be an error passing -DGTest_DIR into Arrow's build
- # so we just build the same version of it and install
- - name: Fetch googletest
- if: steps.cache-deps-build.outputs.cache-hit != 'true'
- uses: actions/checkout@v3
- with:
- repository: google/googletest
- ref: release-1.11.0
- path: build-deps/googletest
- fetch-depth: 0
-
- - name: Build googletest
- if: steps.cache-deps-build.outputs.cache-hit != 'true'
- run: |
- cd build-deps/googletest
- cmake . -DCMAKE_CXX_FLAGS=-fPIC
- cmake --build .
- cmake --install . --prefix ../../dist
-
- - name: Install googletest
- run: |
- cd build-deps/googletest
- cmake --install . --prefix ../../dist
-
- - name: Fetch Arrow
- if: steps.cache-deps-build.outputs.cache-hit != 'true'
- uses: actions/checkout@v3
- with:
- repository: apache/arrow
- ref: apache-arrow-8.0.0
- path: build-deps/arrow
- fetch-depth: 0
-
- - name: Build Arrow
- if: steps.cache-deps-build.outputs.cache-hit != 'true'
- run: |
- mkdir build-deps/arrow-build
- cd build-deps/arrow-build
- cmake ../arrow/cpp -DARROW_JSON=ON -DARROW_TESTING=ON -DBoost_SOURCE=BUNDLED -DGTest_DIR=`pwd`/../../dist/lib/cmake/GTest
- cmake --build .
- cmake --install . --prefix ../../dist
-
- - name: Install arrow
- run: |
- cd build-deps/arrow-build
- cmake --install . --prefix ../../dist
-
- - name: Build nanoarrow
- run: |
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
- sudo ldconfig
- mkdir build
- cd build
- cmake .. -DCMAKE_BUILD_TYPE=Debug -DGTest_DIR=`pwd`/../dist/lib/cmake/GTest -DArrow_DIR=`pwd`/../dist/lib/cmake/arrow -DArrowTesting_DIR=`pwd`/../dist/lib/cmake/arrow -DNANOARROW_CODE_COVERAGE=ON -DNANOARROW_BUILD_TESTS=ON
- cmake --build .
-
- - name: Run tests
- run: |
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
- sudo ldconfig
- cd build
- ctest -T test --output-on-failure .
-
- - name: Run tests with valgrind
- run: |
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
- sudo ldconfig
- cd build
- ctest -T memcheck .
-
- - name: Upload memcheck results
- if: failure()
- uses: actions/upload-artifact@main
- with:
- name: nanoarrow-memcheck
- path: build/Testing/Temporary/MemoryChecker.*.log
-
- - name: Calculate coverage
- run: |
- SOURCE_PREFIX=`pwd`
- mkdir build/cov
- cd build/cov
- gcov -abcfu --source-prefix=$SOURCE_PREFIX `find ../CMakeFiles/nanoarrow.dir/ -name "*.gcno"`
-
- - name: Upload coverage
- uses: codecov/codecov-action@v2
- with:
- directory: build/cov
diff --git a/c/vendor/nanoarrow/error.c b/c/vendor/nanoarrow/error.c
deleted file mode 100644
index 9af6267..0000000
--- a/c/vendor/nanoarrow/error.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <errno.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "nanoarrow.h"
-
-int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) {
- if (error == NULL) {
- return NANOARROW_OK;
- }
-
- memset(error->message, 0, sizeof(error->message));
-
- va_list args;
- va_start(args, fmt);
- int chars_needed = vsnprintf(error->message, sizeof(error->message), fmt, args);
- va_end(args);
-
- if (chars_needed < 0) {
- return EINVAL;
- } else if (chars_needed >= sizeof(error->message)) {
- return ERANGE;
- } else {
- return NANOARROW_OK;
- }
-}
-
-const char* ArrowErrorMessage(struct ArrowError* error) { return error->message; }
diff --git a/c/vendor/nanoarrow/metadata.c b/c/vendor/nanoarrow/metadata.c
deleted file mode 100644
index 7cf452c..0000000
--- a/c/vendor/nanoarrow/metadata.c
+++ /dev/null
@@ -1,234 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "nanoarrow.h"
-
-ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader,
- const char* metadata) {
- reader->metadata = metadata;
-
- if (reader->metadata == NULL) {
- reader->offset = 0;
- reader->remaining_keys = 0;
- } else {
- memcpy(&reader->remaining_keys, reader->metadata, sizeof(int32_t));
- reader->offset = sizeof(int32_t);
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader,
- struct ArrowStringView* key_out,
- struct ArrowStringView* value_out) {
- if (reader->remaining_keys <= 0) {
- return EINVAL;
- }
-
- int64_t pos = 0;
-
- int32_t key_size;
- memcpy(&key_size, reader->metadata + reader->offset + pos, sizeof(int32_t));
- pos += sizeof(int32_t);
-
- key_out->data = reader->metadata + reader->offset + pos;
- key_out->n_bytes = key_size;
- pos += key_size;
-
- int32_t value_size;
- memcpy(&value_size, reader->metadata + reader->offset + pos, sizeof(int32_t));
- pos += sizeof(int32_t);
-
- value_out->data = reader->metadata + reader->offset + pos;
- value_out->n_bytes = value_size;
- pos += value_size;
-
- reader->offset += pos;
- reader->remaining_keys--;
- return NANOARROW_OK;
-}
-
-int64_t ArrowMetadataSizeOf(const char* metadata) {
- if (metadata == NULL) {
- return 0;
- }
-
- struct ArrowMetadataReader reader;
- struct ArrowStringView key;
- struct ArrowStringView value;
- ArrowMetadataReaderInit(&reader, metadata);
-
- int64_t size = sizeof(int32_t);
- while (ArrowMetadataReaderRead(&reader, &key, &value) == NANOARROW_OK) {
- size += sizeof(int32_t) + key.n_bytes + sizeof(int32_t) + value.n_bytes;
- }
-
- return size;
-}
-
-static ArrowErrorCode ArrowMetadataGetValueInternal(const char* metadata,
- struct ArrowStringView* key,
- struct ArrowStringView* value_out) {
- struct ArrowMetadataReader reader;
- struct ArrowStringView existing_key;
- struct ArrowStringView existing_value;
- ArrowMetadataReaderInit(&reader, metadata);
-
- int64_t size = sizeof(int32_t);
- while (ArrowMetadataReaderRead(&reader, &existing_key, &existing_value) ==
- NANOARROW_OK) {
- int key_equal = key->n_bytes == existing_key.n_bytes &&
- strncmp(key->data, existing_key.data, existing_key.n_bytes) == 0;
- if (key_equal) {
- value_out->data = existing_value.data;
- value_out->n_bytes = existing_value.n_bytes;
- break;
- }
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringView key,
- struct ArrowStringView* value_out) {
- if (value_out == NULL) {
- return EINVAL;
- }
-
- return ArrowMetadataGetValueInternal(metadata, &key, value_out);
-}
-
-char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key) {
- struct ArrowStringView value = ArrowCharView(NULL);
- ArrowMetadataGetValue(metadata, key, &value);
- return value.data != NULL;
-}
-
-ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer,
- const char* metadata) {
- ArrowBufferInit(buffer);
- return ArrowBufferAppend(buffer, metadata, ArrowMetadataSizeOf(metadata));
-}
-
-static ArrowErrorCode ArrowMetadataBuilderAppendInternal(struct ArrowBuffer* buffer,
- struct ArrowStringView* key,
- struct ArrowStringView* value) {
- if (value == NULL) {
- return NANOARROW_OK;
- }
-
- if (buffer->capacity_bytes == 0) {
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(buffer, 0));
- }
-
- if (buffer->capacity_bytes < sizeof(int32_t)) {
- return EINVAL;
- }
-
- int32_t n_keys;
- memcpy(&n_keys, buffer->data, sizeof(int32_t));
-
- int32_t key_size = key->n_bytes;
- int32_t value_size = value->n_bytes;
- NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(
- buffer, sizeof(int32_t) + key_size + sizeof(int32_t) + value_size));
-
- ArrowBufferAppendUnsafe(buffer, &key_size, sizeof(int32_t));
- ArrowBufferAppendUnsafe(buffer, key->data, key_size);
- ArrowBufferAppendUnsafe(buffer, &value_size, sizeof(int32_t));
- ArrowBufferAppendUnsafe(buffer, value->data, value_size);
-
- n_keys++;
- memcpy(buffer->data, &n_keys, sizeof(int32_t));
-
- return NANOARROW_OK;
-}
-
-static ArrowErrorCode ArrowMetadataBuilderSetInternal(struct ArrowBuffer* buffer,
- struct ArrowStringView* key,
- struct ArrowStringView* value) {
- // Inspect the current value to see if we can avoid copying the buffer
- struct ArrowStringView current_value = ArrowCharView(NULL);
- NANOARROW_RETURN_NOT_OK(
- ArrowMetadataGetValueInternal((const char*)buffer->data, key, ¤t_value));
-
- // The key should be removed but no key exists
- if (value == NULL && current_value.data == NULL) {
- return NANOARROW_OK;
- }
-
- // The key/value can be appended because no key exists
- if (value != NULL && current_value.data == NULL) {
- return ArrowMetadataBuilderAppendInternal(buffer, key, value);
- }
-
- struct ArrowMetadataReader reader;
- struct ArrowStringView existing_key;
- struct ArrowStringView existing_value;
- NANOARROW_RETURN_NOT_OK(ArrowMetadataReaderInit(&reader, (const char*)buffer->data));
-
- struct ArrowBuffer new_buffer;
- NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderInit(&new_buffer, NULL));
-
- while (reader.remaining_keys > 0) {
- int result = ArrowMetadataReaderRead(&reader, &existing_key, &existing_value);
- if (result != NANOARROW_OK) {
- ArrowBufferReset(&new_buffer);
- return result;
- }
-
- if (key->n_bytes == existing_key.n_bytes &&
- strncmp((const char*)key->data, (const char*)existing_key.data,
- existing_key.n_bytes) == 0) {
- result = ArrowMetadataBuilderAppendInternal(&new_buffer, key, value);
- value = NULL;
- } else {
- result =
- ArrowMetadataBuilderAppendInternal(&new_buffer, &existing_key, &existing_value);
- }
-
- if (result != NANOARROW_OK) {
- ArrowBufferReset(&new_buffer);
- return result;
- }
- }
-
- ArrowBufferReset(buffer);
- ArrowBufferMove(&new_buffer, buffer);
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer,
- struct ArrowStringView key,
- struct ArrowStringView value) {
- return ArrowMetadataBuilderAppendInternal(buffer, &key, &value);
-}
-
-ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer,
- struct ArrowStringView key,
- struct ArrowStringView value) {
- return ArrowMetadataBuilderSetInternal(buffer, &key, &value);
-}
-
-ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer,
- struct ArrowStringView key) {
- return ArrowMetadataBuilderSetInternal(buffer, &key, NULL);
-}
diff --git a/c/vendor/nanoarrow/nanoarrow.c b/c/vendor/nanoarrow/nanoarrow.c
index f7504e8..693fbf7 100644
--- a/c/vendor/nanoarrow/nanoarrow.c
+++ b/c/vendor/nanoarrow/nanoarrow.c
@@ -15,11 +15,2189 @@
// specific language governing permissions and limitations
// under the License.
-#include "allocator.c"
-#include "array.c"
-#include "array_view.c"
-#include "error.c"
-#include "metadata.c"
-#include "schema.c"
-#include "schema_view.c"
-#include "utils.c"
+#include <errno.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nanoarrow.h"
+
+const char* ArrowNanoarrowBuildId() { return NANOARROW_BUILD_ID; }
+
+int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) {
+ if (error == NULL) {
+ return NANOARROW_OK;
+ }
+
+ memset(error->message, 0, sizeof(error->message));
+
+ va_list args;
+ va_start(args, fmt);
+ int chars_needed = vsnprintf(error->message, sizeof(error->message), fmt, args);
+ va_end(args);
+
+ if (chars_needed < 0) {
+ return EINVAL;
+ } else if (chars_needed >= sizeof(error->message)) {
+ return ERANGE;
+ } else {
+ return NANOARROW_OK;
+ }
+}
+
+const char* ArrowErrorMessage(struct ArrowError* error) { return error->message; }
+
+void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) {
+ layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY;
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+ layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_NONE;
+
+ layout->element_size_bits[0] = 1;
+ layout->element_size_bits[1] = 0;
+ layout->element_size_bits[2] = 0;
+
+ layout->child_size_elements = 0;
+
+ switch (storage_type) {
+ case NANOARROW_TYPE_UNINITIALIZED:
+ case NANOARROW_TYPE_NA:
+ layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE;
+ layout->element_size_bits[0] = 0;
+ break;
+
+ case NANOARROW_TYPE_LIST:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->element_size_bits[1] = 32;
+ break;
+
+ case NANOARROW_TYPE_LARGE_LIST:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->element_size_bits[1] = 64;
+ break;
+
+ case NANOARROW_TYPE_BOOL:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 1;
+ break;
+
+ case NANOARROW_TYPE_UINT8:
+ case NANOARROW_TYPE_INT8:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 8;
+ break;
+
+ case NANOARROW_TYPE_UINT16:
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_HALF_FLOAT:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 16;
+ break;
+
+ case NANOARROW_TYPE_UINT32:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_FLOAT:
+ case NANOARROW_TYPE_INTERVAL_MONTHS:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 32;
+ break;
+
+ case NANOARROW_TYPE_UINT64:
+ case NANOARROW_TYPE_INT64:
+ case NANOARROW_TYPE_DOUBLE:
+ case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 64;
+ break;
+
+ case NANOARROW_TYPE_DECIMAL128:
+ case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 128;
+ break;
+
+ case NANOARROW_TYPE_DECIMAL256:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 256;
+ break;
+
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ break;
+
+ case NANOARROW_TYPE_DENSE_UNION:
+ layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
+ layout->element_size_bits[0] = 8;
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_UNION_OFFSET;
+ layout->element_size_bits[1] = 32;
+ break;
+
+ case NANOARROW_TYPE_SPARSE_UNION:
+ layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
+ layout->element_size_bits[0] = 8;
+ break;
+
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->element_size_bits[1] = 32;
+ layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+ break;
+
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->element_size_bits[1] = 64;
+ layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+ break;
+
+ default:
+ break;
+ }
+}
+
+void* ArrowMalloc(int64_t size) { return malloc(size); }
+
+void* ArrowRealloc(void* ptr, int64_t size) { return realloc(ptr, size); }
+
+void ArrowFree(void* ptr) { free(ptr); }
+
+static uint8_t* ArrowBufferAllocatorMallocAllocate(struct ArrowBufferAllocator* allocator,
+ int64_t size) {
+ return ArrowMalloc(size);
+}
+
+static uint8_t* ArrowBufferAllocatorMallocReallocate(
+ struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size,
+ int64_t new_size) {
+ return ArrowRealloc(ptr, new_size);
+}
+
+static void ArrowBufferAllocatorMallocFree(struct ArrowBufferAllocator* allocator,
+ uint8_t* ptr, int64_t size) {
+ ArrowFree(ptr);
+}
+
+static struct ArrowBufferAllocator ArrowBufferAllocatorMalloc = {
+ &ArrowBufferAllocatorMallocReallocate, &ArrowBufferAllocatorMallocFree, NULL};
+
+struct ArrowBufferAllocator ArrowBufferAllocatorDefault() {
+ return ArrowBufferAllocatorMalloc;
+}
+
+static uint8_t* ArrowBufferAllocatorNeverAllocate(struct ArrowBufferAllocator* allocator,
+ int64_t size) {
+ return NULL;
+}
+
+static uint8_t* ArrowBufferAllocatorNeverReallocate(
+ struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t old_size,
+ int64_t new_size) {
+ return NULL;
+}
+
+struct ArrowBufferAllocator ArrowBufferDeallocator(
+ void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
+ int64_t size),
+ void* private_data) {
+ struct ArrowBufferAllocator allocator;
+ allocator.reallocate = &ArrowBufferAllocatorNeverReallocate;
+ allocator.free = custom_free;
+ allocator.private_data = private_data;
+ return allocator;
+}
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nanoarrow.h"
+
+void ArrowSchemaRelease(struct ArrowSchema* schema) {
+ if (schema->format != NULL) ArrowFree((void*)schema->format);
+ if (schema->name != NULL) ArrowFree((void*)schema->name);
+ if (schema->metadata != NULL) ArrowFree((void*)schema->metadata);
+
+ // This object owns the memory for all the children, but those
+ // children may have been generated elsewhere and might have
+ // their own release() callback.
+ if (schema->children != NULL) {
+ for (int64_t i = 0; i < schema->n_children; i++) {
+ if (schema->children[i] != NULL) {
+ if (schema->children[i]->release != NULL) {
+ schema->children[i]->release(schema->children[i]);
+ }
+
+ ArrowFree(schema->children[i]);
+ }
+ }
+
+ ArrowFree(schema->children);
+ }
+
+ // This object owns the memory for the dictionary but it
+ // may have been generated somewhere else and have its own
+ // release() callback.
+ if (schema->dictionary != NULL) {
+ if (schema->dictionary->release != NULL) {
+ schema->dictionary->release(schema->dictionary);
+ }
+
+ ArrowFree(schema->dictionary);
+ }
+
+ // private data not currently used
+ if (schema->private_data != NULL) {
+ ArrowFree(schema->private_data);
+ }
+
+ schema->release = NULL;
+}
+
+const char* ArrowSchemaFormatTemplate(enum ArrowType data_type) {
+ switch (data_type) {
+ case NANOARROW_TYPE_UNINITIALIZED:
+ return NULL;
+ case NANOARROW_TYPE_NA:
+ return "n";
+ case NANOARROW_TYPE_BOOL:
+ return "b";
+
+ case NANOARROW_TYPE_UINT8:
+ return "C";
+ case NANOARROW_TYPE_INT8:
+ return "c";
+ case NANOARROW_TYPE_UINT16:
+ return "S";
+ case NANOARROW_TYPE_INT16:
+ return "s";
+ case NANOARROW_TYPE_UINT32:
+ return "I";
+ case NANOARROW_TYPE_INT32:
+ return "i";
+ case NANOARROW_TYPE_UINT64:
+ return "L";
+ case NANOARROW_TYPE_INT64:
+ return "l";
+
+ case NANOARROW_TYPE_HALF_FLOAT:
+ return "e";
+ case NANOARROW_TYPE_FLOAT:
+ return "f";
+ case NANOARROW_TYPE_DOUBLE:
+ return "g";
+
+ case NANOARROW_TYPE_STRING:
+ return "u";
+ case NANOARROW_TYPE_LARGE_STRING:
+ return "U";
+ case NANOARROW_TYPE_BINARY:
+ return "z";
+ case NANOARROW_TYPE_LARGE_BINARY:
+ return "Z";
+
+ case NANOARROW_TYPE_DATE32:
+ return "tdD";
+ case NANOARROW_TYPE_DATE64:
+ return "tdm";
+ case NANOARROW_TYPE_INTERVAL_MONTHS:
+ return "tiM";
+ case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+ return "tiD";
+ case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+ return "tin";
+
+ case NANOARROW_TYPE_LIST:
+ return "+l";
+ case NANOARROW_TYPE_LARGE_LIST:
+ return "+L";
+ case NANOARROW_TYPE_STRUCT:
+ return "+s";
+ case NANOARROW_TYPE_MAP:
+ return "+m";
+
+ default:
+ return NULL;
+ }
+}
+
+ArrowErrorCode ArrowSchemaInit(struct ArrowSchema* schema, enum ArrowType data_type) {
+ schema->format = NULL;
+ schema->name = NULL;
+ schema->metadata = NULL;
+ schema->flags = ARROW_FLAG_NULLABLE;
+ schema->n_children = 0;
+ schema->children = NULL;
+ schema->dictionary = NULL;
+ schema->private_data = NULL;
+ schema->release = &ArrowSchemaRelease;
+
+ // We don't allocate the dictionary because it has to be nullptr
+ // for non-dictionary-encoded arrays.
+
+ // Set the format to a valid format string for data_type
+ const char* template_format = ArrowSchemaFormatTemplate(data_type);
+
+ // If data_type isn't recognized and not explicitly unset
+ if (template_format == NULL && data_type != NANOARROW_TYPE_UNINITIALIZED) {
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ int result = ArrowSchemaSetFormat(schema, template_format);
+ if (result != NANOARROW_OK) {
+ schema->release(schema);
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaInitFixedSize(struct ArrowSchema* schema,
+ enum ArrowType data_type, int32_t fixed_size) {
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED));
+
+ if (fixed_size <= 0) {
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ char buffer[64];
+ int n_chars;
+ switch (data_type) {
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ n_chars = snprintf(buffer, sizeof(buffer), "w:%d", (int)fixed_size);
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ n_chars = snprintf(buffer, sizeof(buffer), "+w:%d", (int)fixed_size);
+ break;
+ default:
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ buffer[n_chars] = '\0';
+ int result = ArrowSchemaSetFormat(schema, buffer);
+ if (result != NANOARROW_OK) {
+ schema->release(schema);
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaInitDecimal(struct ArrowSchema* schema,
+ enum ArrowType data_type, int32_t decimal_precision,
+ int32_t decimal_scale) {
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED));
+
+ if (decimal_precision <= 0) {
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ char buffer[64];
+ int n_chars;
+ switch (data_type) {
+ case NANOARROW_TYPE_DECIMAL128:
+ n_chars =
+ snprintf(buffer, sizeof(buffer), "d:%d,%d", decimal_precision, decimal_scale);
+ break;
+ case NANOARROW_TYPE_DECIMAL256:
+ n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,256", decimal_precision,
+ decimal_scale);
+ break;
+ default:
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ buffer[n_chars] = '\0';
+
+ int result = ArrowSchemaSetFormat(schema, buffer);
+ if (result != NANOARROW_OK) {
+ schema->release(schema);
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) {
+ switch (time_unit) {
+ case NANOARROW_TIME_UNIT_SECOND:
+ return "s";
+ case NANOARROW_TIME_UNIT_MILLI:
+ return "m";
+ case NANOARROW_TIME_UNIT_MICRO:
+ return "u";
+ case NANOARROW_TIME_UNIT_NANO:
+ return "n";
+ default:
+ return NULL;
+ }
+}
+
+ArrowErrorCode ArrowSchemaInitDateTime(struct ArrowSchema* schema,
+ enum ArrowType data_type,
+ enum ArrowTimeUnit time_unit,
+ const char* timezone) {
+ int result = ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ const char* time_unit_str = ArrowTimeUnitString(time_unit);
+ if (time_unit_str == NULL) {
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ char buffer[128];
+ int n_chars;
+ switch (data_type) {
+ case NANOARROW_TYPE_TIME32:
+ case NANOARROW_TYPE_TIME64:
+ if (timezone != NULL) {
+ schema->release(schema);
+ return EINVAL;
+ }
+ n_chars = snprintf(buffer, sizeof(buffer), "tt%s", time_unit_str);
+ break;
+ case NANOARROW_TYPE_TIMESTAMP:
+ if (timezone == NULL) {
+ timezone = "";
+ }
+ n_chars = snprintf(buffer, sizeof(buffer), "ts%s:%s", time_unit_str, timezone);
+ break;
+ case NANOARROW_TYPE_DURATION:
+ if (timezone != NULL) {
+ schema->release(schema);
+ return EINVAL;
+ }
+ n_chars = snprintf(buffer, sizeof(buffer), "tD%s", time_unit_str);
+ break;
+ default:
+ schema->release(schema);
+ return EINVAL;
+ }
+
+ if (n_chars >= sizeof(buffer)) {
+ schema->release(schema);
+ return ERANGE;
+ }
+
+ buffer[n_chars] = '\0';
+
+ result = ArrowSchemaSetFormat(schema, buffer);
+ if (result != NANOARROW_OK) {
+ schema->release(schema);
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format) {
+ if (schema->format != NULL) {
+ ArrowFree((void*)schema->format);
+ }
+
+ if (format != NULL) {
+ size_t format_size = strlen(format) + 1;
+ schema->format = (const char*)ArrowMalloc(format_size);
+ if (schema->format == NULL) {
+ return ENOMEM;
+ }
+
+ memcpy((void*)schema->format, format, format_size);
+ } else {
+ schema->format = NULL;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name) {
+ if (schema->name != NULL) {
+ ArrowFree((void*)schema->name);
+ }
+
+ if (name != NULL) {
+ size_t name_size = strlen(name) + 1;
+ schema->name = (const char*)ArrowMalloc(name_size);
+ if (schema->name == NULL) {
+ return ENOMEM;
+ }
+
+ memcpy((void*)schema->name, name, name_size);
+ } else {
+ schema->name = NULL;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata) {
+ if (schema->metadata != NULL) {
+ ArrowFree((void*)schema->metadata);
+ }
+
+ if (metadata != NULL) {
+ size_t metadata_size = ArrowMetadataSizeOf(metadata);
+ schema->metadata = (const char*)ArrowMalloc(metadata_size);
+ if (schema->metadata == NULL) {
+ return ENOMEM;
+ }
+
+ memcpy((void*)schema->metadata, metadata, metadata_size);
+ } else {
+ schema->metadata = NULL;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema,
+ int64_t n_children) {
+ if (schema->children != NULL) {
+ return EEXIST;
+ }
+
+ if (n_children > 0) {
+ schema->children =
+ (struct ArrowSchema**)ArrowMalloc(n_children * sizeof(struct ArrowSchema*));
+
+ if (schema->children == NULL) {
+ return ENOMEM;
+ }
+
+ schema->n_children = n_children;
+
+ memset(schema->children, 0, n_children * sizeof(struct ArrowSchema*));
+
+ for (int64_t i = 0; i < n_children; i++) {
+ schema->children[i] = (struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema));
+
+ if (schema->children[i] == NULL) {
+ return ENOMEM;
+ }
+
+ schema->children[i]->release = NULL;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema) {
+ if (schema->dictionary != NULL) {
+ return EEXIST;
+ }
+
+ schema->dictionary = (struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema));
+ if (schema->dictionary == NULL) {
+ return ENOMEM;
+ }
+
+ schema->dictionary->release = NULL;
+ return NANOARROW_OK;
+}
+
+int ArrowSchemaDeepCopy(struct ArrowSchema* schema, struct ArrowSchema* schema_out) {
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaInit(schema_out, NANOARROW_TYPE_NA));
+
+ int result = ArrowSchemaSetFormat(schema_out, schema->format);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+
+ result = ArrowSchemaSetName(schema_out, schema->name);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+
+ result = ArrowSchemaSetMetadata(schema_out, schema->metadata);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+
+ result = ArrowSchemaAllocateChildren(schema_out, schema->n_children);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+
+ for (int64_t i = 0; i < schema->n_children; i++) {
+ result = ArrowSchemaDeepCopy(schema->children[i], schema_out->children[i]);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+ }
+
+ if (schema->dictionary != NULL) {
+ result = ArrowSchemaAllocateDictionary(schema_out);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+
+ result = ArrowSchemaDeepCopy(schema->dictionary, schema_out->dictionary);
+ if (result != NANOARROW_OK) {
+ schema_out->release(schema_out);
+ return result;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+static void ArrowSchemaViewSetPrimitive(struct ArrowSchemaView* schema_view,
+ enum ArrowType data_type) {
+ schema_view->data_type = data_type;
+ schema_view->storage_data_type = data_type;
+}
+
+static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
+ const char* format,
+ const char** format_end_out,
+ struct ArrowError* error) {
+ *format_end_out = format;
+
+ // needed for decimal parsing
+ const char* parse_start;
+ char* parse_end;
+
+ switch (format[0]) {
+ case 'n':
+ schema_view->data_type = NANOARROW_TYPE_NA;
+ schema_view->storage_data_type = NANOARROW_TYPE_NA;
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'b':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_BOOL);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'c':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT8);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'C':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT8);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 's':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT16);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'S':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT16);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'i':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'I':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT32);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'l':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'L':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT64);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'e':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_HALF_FLOAT);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'f':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_FLOAT);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'g':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DOUBLE);
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+
+ // decimal
+ case 'd':
+ if (format[1] != ':' || format[2] == '\0') {
+ ArrowErrorSet(error, "Expected ':precision,scale[,bitwidth]' following 'd'",
+ format + 3);
+ return EINVAL;
+ }
+
+ parse_start = format + 2;
+ schema_view->decimal_precision = strtol(parse_start, &parse_end, 10);
+ if (parse_end == parse_start || parse_end[0] != ',') {
+ ArrowErrorSet(error, "Expected 'precision,scale[,bitwidth]' following 'd:'");
+ return EINVAL;
+ }
+
+ parse_start = parse_end + 1;
+ schema_view->decimal_scale = strtol(parse_start, &parse_end, 10);
+ if (parse_end == parse_start) {
+ ArrowErrorSet(error, "Expected 'scale[,bitwidth]' following 'd:precision,'");
+ return EINVAL;
+ } else if (parse_end[0] != ',') {
+ schema_view->decimal_bitwidth = 128;
+ } else {
+ parse_start = parse_end + 1;
+ schema_view->decimal_bitwidth = strtol(parse_start, &parse_end, 10);
+ if (parse_start == parse_end) {
+ ArrowErrorSet(error, "Expected precision following 'd:precision,scale,'");
+ return EINVAL;
+ }
+ }
+
+ *format_end_out = parse_end;
+
+ switch (schema_view->decimal_bitwidth) {
+ case 128:
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL128);
+ return NANOARROW_OK;
+ case 256:
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL256);
+ return NANOARROW_OK;
+ default:
+ ArrowErrorSet(error, "Expected decimal bitwidth of 128 or 256 but found %d",
+ (int)schema_view->decimal_bitwidth);
+ return EINVAL;
+ }
+
+ // validity + data
+ case 'w':
+ schema_view->data_type = NANOARROW_TYPE_FIXED_SIZE_BINARY;
+ schema_view->storage_data_type = NANOARROW_TYPE_FIXED_SIZE_BINARY;
+ if (format[1] != ':' || format[2] == '\0') {
+ ArrowErrorSet(error, "Expected ':<width>' following 'w'");
+ return EINVAL;
+ }
+
+ schema_view->fixed_size = strtol(format + 2, (char**)format_end_out, 10);
+ return NANOARROW_OK;
+
+ // validity + offset + data
+ case 'z':
+ schema_view->data_type = NANOARROW_TYPE_BINARY;
+ schema_view->storage_data_type = NANOARROW_TYPE_BINARY;
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'u':
+ schema_view->data_type = NANOARROW_TYPE_STRING;
+ schema_view->storage_data_type = NANOARROW_TYPE_STRING;
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+
+ // validity + large_offset + data
+ case 'Z':
+ schema_view->data_type = NANOARROW_TYPE_LARGE_BINARY;
+ schema_view->storage_data_type = NANOARROW_TYPE_LARGE_BINARY;
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+ case 'U':
+ schema_view->data_type = NANOARROW_TYPE_LARGE_STRING;
+ schema_view->storage_data_type = NANOARROW_TYPE_LARGE_STRING;
+ *format_end_out = format + 1;
+ return NANOARROW_OK;
+
+ // nested types
+ case '+':
+ switch (format[1]) {
+ // list has validity + offset or offset
+ case 'l':
+ schema_view->storage_data_type = NANOARROW_TYPE_LIST;
+ schema_view->data_type = NANOARROW_TYPE_LIST;
+ *format_end_out = format + 2;
+ return NANOARROW_OK;
+
+ // large list has validity + large_offset or large_offset
+ case 'L':
+ schema_view->storage_data_type = NANOARROW_TYPE_LARGE_LIST;
+ schema_view->data_type = NANOARROW_TYPE_LARGE_LIST;
+ *format_end_out = format + 2;
+ return NANOARROW_OK;
+
+ // just validity buffer
+ case 'w':
+ if (format[2] != ':' || format[3] == '\0') {
+ ArrowErrorSet(error, "Expected ':<width>' following '+w'");
+ return EINVAL;
+ }
+
+ schema_view->storage_data_type = NANOARROW_TYPE_FIXED_SIZE_LIST;
+ schema_view->data_type = NANOARROW_TYPE_FIXED_SIZE_LIST;
+ schema_view->fixed_size = strtol(format + 3, (char**)format_end_out, 10);
+ return NANOARROW_OK;
+ case 's':
+ schema_view->storage_data_type = NANOARROW_TYPE_STRUCT;
+ schema_view->data_type = NANOARROW_TYPE_STRUCT;
+ *format_end_out = format + 2;
+ return NANOARROW_OK;
+ case 'm':
+ schema_view->storage_data_type = NANOARROW_TYPE_MAP;
+ schema_view->data_type = NANOARROW_TYPE_MAP;
+ *format_end_out = format + 2;
+ return NANOARROW_OK;
+
+ // unions
+ case 'u':
+ switch (format[2]) {
+ case 'd':
+ schema_view->storage_data_type = NANOARROW_TYPE_DENSE_UNION;
+ schema_view->data_type = NANOARROW_TYPE_DENSE_UNION;
+ break;
+ case 's':
+ schema_view->storage_data_type = NANOARROW_TYPE_SPARSE_UNION;
+ schema_view->data_type = NANOARROW_TYPE_SPARSE_UNION;
+ break;
+ default:
+ ArrowErrorSet(error,
+ "Expected union format string +us:<type_ids> or "
+ "+ud:<type_ids> but found '%s'",
+ format);
+ return EINVAL;
+ }
+
+ if (format[3] == ':') {
+ schema_view->union_type_ids.data = format + 4;
+ schema_view->union_type_ids.n_bytes = strlen(format + 4);
+ *format_end_out = format + strlen(format);
+ return NANOARROW_OK;
+ } else {
+ ArrowErrorSet(error,
+ "Expected union format string +us:<type_ids> or +ud:<type_ids> "
+ "but found '%s'",
+ format);
+ return EINVAL;
+ }
+ }
+
+ // date/time types
+ case 't':
+ switch (format[1]) {
+ // date
+ case 'd':
+ switch (format[2]) {
+ case 'D':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_DATE32;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'm':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_DATE64;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ default:
+ ArrowErrorSet(error, "Expected 'D' or 'm' following 'td' but found '%s'",
+ format + 2);
+ return EINVAL;
+ }
+
+ // time of day
+ case 't':
+ switch (format[2]) {
+ case 's':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_TIME32;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'm':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_TIME32;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'u':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_TIME64;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'n':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_TIME64;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ default:
+ ArrowErrorSet(
+ error, "Expected 's', 'm', 'u', or 'n' following 'tt' but found '%s'",
+ format + 2);
+ return EINVAL;
+ }
+
+ // timestamp
+ case 's':
+ switch (format[2]) {
+ case 's':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_TIMESTAMP;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
+ break;
+ case 'm':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_TIMESTAMP;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
+ break;
+ case 'u':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_TIMESTAMP;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
+ break;
+ case 'n':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_TIMESTAMP;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
+ break;
+ default:
+ ArrowErrorSet(
+ error, "Expected 's', 'm', 'u', or 'n' following 'ts' but found '%s'",
+ format + 2);
+ return EINVAL;
+ }
+
+ if (format[3] != ':') {
+ ArrowErrorSet(error, "Expected ':' following '%.3s' but found '%s'", format,
+ format + 3);
+ return EINVAL;
+ }
+
+ schema_view->timezone.data = format + 4;
+ schema_view->timezone.n_bytes = strlen(format + 4);
+ *format_end_out = format + strlen(format);
+ return NANOARROW_OK;
+
+ // duration
+ case 'D':
+ switch (format[2]) {
+ case 's':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_DURATION;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'm':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
+ schema_view->data_type = NANOARROW_TYPE_DURATION;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'u':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_DURATION;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'n':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
+ schema_view->data_type = NANOARROW_TYPE_DURATION;
+ schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ default:
+ ArrowErrorSet(error,
+ "Expected 's', 'm', u', or 'n' following 'tD' but found '%s'",
+ format + 2);
+ return EINVAL;
+ }
+
+ // interval
+ case 'i':
+ switch (format[2]) {
+ case 'M':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_MONTHS);
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'D':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_DAY_TIME);
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'n':
+ ArrowSchemaViewSetPrimitive(schema_view,
+ NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO);
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ default:
+ ArrowErrorSet(error,
+ "Expected 'M', 'D', or 'n' following 'ti' but found '%s'",
+ format + 2);
+ return EINVAL;
+ }
+
+ default:
+ ArrowErrorSet(
+ error, "Expected 'd', 't', 's', 'D', or 'i' following 't' but found '%s'",
+ format + 1);
+ return EINVAL;
+ }
+
+ default:
+ ArrowErrorSet(error, "Unknown format: '%s'", format);
+ return EINVAL;
+ }
+}
+
+static ArrowErrorCode ArrowSchemaViewValidateNChildren(
+ struct ArrowSchemaView* schema_view, int64_t n_children, struct ArrowError* error) {
+ if (n_children != -1 && schema_view->schema->n_children != n_children) {
+ ArrowErrorSet(error, "Expected schema with %d children but found %d children",
+ (int)n_children, (int)schema_view->schema->n_children);
+ return EINVAL;
+ }
+
+ // Don't do a full validation of children but do check that they won't
+ // segfault if inspected
+ struct ArrowSchema* child;
+ for (int64_t i = 0; i < schema_view->schema->n_children; i++) {
+ child = schema_view->schema->children[i];
+ if (child == NULL) {
+ ArrowErrorSet(error, "Expected valid schema at schema->children[%d] but found NULL",
+ i);
+ return EINVAL;
+ } else if (child->release == NULL) {
+ ArrowErrorSet(
+ error,
+ "Expected valid schema at schema->children[%d] but found a released schema", i);
+ return EINVAL;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowSchemaViewValidateUnion(struct ArrowSchemaView* schema_view,
+ struct ArrowError* error) {
+ return ArrowSchemaViewValidateNChildren(schema_view, -1, error);
+}
+
+static ArrowErrorCode ArrowSchemaViewValidateMap(struct ArrowSchemaView* schema_view,
+ struct ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaViewValidateNChildren(schema_view, 1, error));
+
+ if (schema_view->schema->children[0]->n_children != 2) {
+ ArrowErrorSet(error, "Expected child of map type to have 2 children but found %d",
+ (int)schema_view->schema->children[0]->n_children);
+ return EINVAL;
+ }
+
+ if (strcmp(schema_view->schema->children[0]->format, "+s") != 0) {
+ ArrowErrorSet(error, "Expected format of child of map type to be '+s' but found '%s'",
+ schema_view->schema->children[0]->format);
+ return EINVAL;
+ }
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowSchemaViewValidateDictionary(
+ struct ArrowSchemaView* schema_view, struct ArrowError* error) {
+ // check for valid index type
+ switch (schema_view->storage_data_type) {
+ case NANOARROW_TYPE_UINT8:
+ case NANOARROW_TYPE_INT8:
+ case NANOARROW_TYPE_UINT16:
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_UINT32:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_UINT64:
+ case NANOARROW_TYPE_INT64:
+ break;
+ default:
+ ArrowErrorSet(
+ error,
+ "Expected dictionary schema index type to be an integral type but found '%s'",
+ schema_view->schema->format);
+ return EINVAL;
+ }
+
+ struct ArrowSchemaView dictionary_schema_view;
+ return ArrowSchemaViewInit(&dictionary_schema_view, schema_view->schema->dictionary,
+ error);
+}
+
+static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_view,
+ enum ArrowType data_type,
+ struct ArrowError* error) {
+ switch (data_type) {
+ case NANOARROW_TYPE_NA:
+ case NANOARROW_TYPE_BOOL:
+ case NANOARROW_TYPE_UINT8:
+ case NANOARROW_TYPE_INT8:
+ case NANOARROW_TYPE_UINT16:
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_UINT32:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_UINT64:
+ case NANOARROW_TYPE_INT64:
+ case NANOARROW_TYPE_HALF_FLOAT:
+ case NANOARROW_TYPE_FLOAT:
+ case NANOARROW_TYPE_DOUBLE:
+ case NANOARROW_TYPE_DECIMAL128:
+ case NANOARROW_TYPE_DECIMAL256:
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ case NANOARROW_TYPE_DATE32:
+ case NANOARROW_TYPE_DATE64:
+ case NANOARROW_TYPE_INTERVAL_MONTHS:
+ case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+ case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+ case NANOARROW_TYPE_TIMESTAMP:
+ case NANOARROW_TYPE_TIME32:
+ case NANOARROW_TYPE_TIME64:
+ case NANOARROW_TYPE_DURATION:
+ return ArrowSchemaViewValidateNChildren(schema_view, 0, error);
+
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ if (schema_view->fixed_size <= 0) {
+ ArrowErrorSet(error, "Expected size > 0 for fixed size binary but found size %d",
+ schema_view->fixed_size);
+ return EINVAL;
+ }
+ return ArrowSchemaViewValidateNChildren(schema_view, 0, error);
+
+ case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_LARGE_LIST:
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ return ArrowSchemaViewValidateNChildren(schema_view, 1, error);
+
+ case NANOARROW_TYPE_STRUCT:
+ return ArrowSchemaViewValidateNChildren(schema_view, -1, error);
+
+ case NANOARROW_TYPE_SPARSE_UNION:
+ case NANOARROW_TYPE_DENSE_UNION:
+ return ArrowSchemaViewValidateUnion(schema_view, error);
+
+ case NANOARROW_TYPE_MAP:
+ return ArrowSchemaViewValidateMap(schema_view, error);
+
+ case NANOARROW_TYPE_DICTIONARY:
+ return ArrowSchemaViewValidateDictionary(schema_view, error);
+
+ default:
+ ArrowErrorSet(error, "Expected a valid enum ArrowType value but found %d",
+ (int)schema_view->data_type);
+ return EINVAL;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
+ struct ArrowSchema* schema, struct ArrowError* error) {
+ if (schema == NULL) {
+ ArrowErrorSet(error, "Expected non-NULL schema");
+ return EINVAL;
+ }
+
+ if (schema->release == NULL) {
+ ArrowErrorSet(error, "Expected non-released schema");
+ return EINVAL;
+ }
+
+ schema_view->schema = schema;
+
+ const char* format = schema->format;
+ if (format == NULL) {
+ ArrowErrorSet(
+ error,
+ "Error parsing schema->format: Expected a null-terminated string but found NULL");
+ return EINVAL;
+ }
+
+ int format_len = strlen(format);
+ if (format_len == 0) {
+ ArrowErrorSet(error, "Error parsing schema->format: Expected a string with size > 0");
+ return EINVAL;
+ }
+
+ const char* format_end_out;
+ ArrowErrorCode result =
+ ArrowSchemaViewParse(schema_view, format, &format_end_out, error);
+
+ if (result != NANOARROW_OK) {
+ char child_error[1024];
+ memcpy(child_error, ArrowErrorMessage(error), 1024);
+ ArrowErrorSet(error, "Error parsing schema->format: %s", child_error);
+ return result;
+ }
+
+ if ((format + format_len) != format_end_out) {
+ ArrowErrorSet(error, "Error parsing schema->format '%s': parsed %d/%d characters",
+ format, (int)(format_end_out - format), (int)(format_len));
+ return EINVAL;
+ }
+
+ if (schema->dictionary != NULL) {
+ schema_view->data_type = NANOARROW_TYPE_DICTIONARY;
+ }
+
+ result = ArrowSchemaViewValidate(schema_view, schema_view->storage_data_type, error);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ if (schema_view->storage_data_type != schema_view->data_type) {
+ result = ArrowSchemaViewValidate(schema_view, schema_view->data_type, error);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+ }
+
+ ArrowLayoutInit(&schema_view->layout, schema_view->storage_data_type);
+ if (schema_view->storage_data_type == NANOARROW_TYPE_FIXED_SIZE_BINARY) {
+ schema_view->layout.element_size_bits[1] = schema_view->fixed_size * 8;
+ } else if (schema_view->storage_data_type == NANOARROW_TYPE_FIXED_SIZE_LIST) {
+ schema_view->layout.child_size_elements = schema_view->fixed_size;
+ }
+
+ schema_view->extension_name = ArrowCharView(NULL);
+ schema_view->extension_metadata = ArrowCharView(NULL);
+ ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:name"),
+ &schema_view->extension_name);
+ ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:metadata"),
+ &schema_view->extension_metadata);
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader,
+ const char* metadata) {
+ reader->metadata = metadata;
+
+ if (reader->metadata == NULL) {
+ reader->offset = 0;
+ reader->remaining_keys = 0;
+ } else {
+ memcpy(&reader->remaining_keys, reader->metadata, sizeof(int32_t));
+ reader->offset = sizeof(int32_t);
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader,
+ struct ArrowStringView* key_out,
+ struct ArrowStringView* value_out) {
+ if (reader->remaining_keys <= 0) {
+ return EINVAL;
+ }
+
+ int64_t pos = 0;
+
+ int32_t key_size;
+ memcpy(&key_size, reader->metadata + reader->offset + pos, sizeof(int32_t));
+ pos += sizeof(int32_t);
+
+ key_out->data = reader->metadata + reader->offset + pos;
+ key_out->n_bytes = key_size;
+ pos += key_size;
+
+ int32_t value_size;
+ memcpy(&value_size, reader->metadata + reader->offset + pos, sizeof(int32_t));
+ pos += sizeof(int32_t);
+
+ value_out->data = reader->metadata + reader->offset + pos;
+ value_out->n_bytes = value_size;
+ pos += value_size;
+
+ reader->offset += pos;
+ reader->remaining_keys--;
+ return NANOARROW_OK;
+}
+
+int64_t ArrowMetadataSizeOf(const char* metadata) {
+ if (metadata == NULL) {
+ return 0;
+ }
+
+ struct ArrowMetadataReader reader;
+ struct ArrowStringView key;
+ struct ArrowStringView value;
+ ArrowMetadataReaderInit(&reader, metadata);
+
+ int64_t size = sizeof(int32_t);
+ while (ArrowMetadataReaderRead(&reader, &key, &value) == NANOARROW_OK) {
+ size += sizeof(int32_t) + key.n_bytes + sizeof(int32_t) + value.n_bytes;
+ }
+
+ return size;
+}
+
+static ArrowErrorCode ArrowMetadataGetValueInternal(const char* metadata,
+ struct ArrowStringView* key,
+ struct ArrowStringView* value_out) {
+ struct ArrowMetadataReader reader;
+ struct ArrowStringView existing_key;
+ struct ArrowStringView existing_value;
+ ArrowMetadataReaderInit(&reader, metadata);
+
+ int64_t size = sizeof(int32_t);
+ while (ArrowMetadataReaderRead(&reader, &existing_key, &existing_value) ==
+ NANOARROW_OK) {
+ int key_equal = key->n_bytes == existing_key.n_bytes &&
+ strncmp(key->data, existing_key.data, existing_key.n_bytes) == 0;
+ if (key_equal) {
+ value_out->data = existing_value.data;
+ value_out->n_bytes = existing_value.n_bytes;
+ break;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringView key,
+ struct ArrowStringView* value_out) {
+ if (value_out == NULL) {
+ return EINVAL;
+ }
+
+ return ArrowMetadataGetValueInternal(metadata, &key, value_out);
+}
+
+char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key) {
+ struct ArrowStringView value = ArrowCharView(NULL);
+ ArrowMetadataGetValue(metadata, key, &value);
+ return value.data != NULL;
+}
+
+ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer,
+ const char* metadata) {
+ ArrowBufferInit(buffer);
+ return ArrowBufferAppend(buffer, metadata, ArrowMetadataSizeOf(metadata));
+}
+
+static ArrowErrorCode ArrowMetadataBuilderAppendInternal(struct ArrowBuffer* buffer,
+ struct ArrowStringView* key,
+ struct ArrowStringView* value) {
+ if (value == NULL) {
+ return NANOARROW_OK;
+ }
+
+ if (buffer->capacity_bytes == 0) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(buffer, 0));
+ }
+
+ if (buffer->capacity_bytes < sizeof(int32_t)) {
+ return EINVAL;
+ }
+
+ int32_t n_keys;
+ memcpy(&n_keys, buffer->data, sizeof(int32_t));
+
+ int32_t key_size = key->n_bytes;
+ int32_t value_size = value->n_bytes;
+ NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(
+ buffer, sizeof(int32_t) + key_size + sizeof(int32_t) + value_size));
+
+ ArrowBufferAppendUnsafe(buffer, &key_size, sizeof(int32_t));
+ ArrowBufferAppendUnsafe(buffer, key->data, key_size);
+ ArrowBufferAppendUnsafe(buffer, &value_size, sizeof(int32_t));
+ ArrowBufferAppendUnsafe(buffer, value->data, value_size);
+
+ n_keys++;
+ memcpy(buffer->data, &n_keys, sizeof(int32_t));
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowMetadataBuilderSetInternal(struct ArrowBuffer* buffer,
+ struct ArrowStringView* key,
+ struct ArrowStringView* value) {
+ // Inspect the current value to see if we can avoid copying the buffer
+ struct ArrowStringView current_value = ArrowCharView(NULL);
+ NANOARROW_RETURN_NOT_OK(
+ ArrowMetadataGetValueInternal((const char*)buffer->data, key, ¤t_value));
+
+ // The key should be removed but no key exists
+ if (value == NULL && current_value.data == NULL) {
+ return NANOARROW_OK;
+ }
+
+ // The key/value can be appended because no key exists
+ if (value != NULL && current_value.data == NULL) {
+ return ArrowMetadataBuilderAppendInternal(buffer, key, value);
+ }
+
+ struct ArrowMetadataReader reader;
+ struct ArrowStringView existing_key;
+ struct ArrowStringView existing_value;
+ NANOARROW_RETURN_NOT_OK(ArrowMetadataReaderInit(&reader, (const char*)buffer->data));
+
+ struct ArrowBuffer new_buffer;
+ NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderInit(&new_buffer, NULL));
+
+ while (reader.remaining_keys > 0) {
+ int result = ArrowMetadataReaderRead(&reader, &existing_key, &existing_value);
+ if (result != NANOARROW_OK) {
+ ArrowBufferReset(&new_buffer);
+ return result;
+ }
+
+ if (key->n_bytes == existing_key.n_bytes &&
+ strncmp((const char*)key->data, (const char*)existing_key.data,
+ existing_key.n_bytes) == 0) {
+ result = ArrowMetadataBuilderAppendInternal(&new_buffer, key, value);
+ value = NULL;
+ } else {
+ result =
+ ArrowMetadataBuilderAppendInternal(&new_buffer, &existing_key, &existing_value);
+ }
+
+ if (result != NANOARROW_OK) {
+ ArrowBufferReset(&new_buffer);
+ return result;
+ }
+ }
+
+ ArrowBufferReset(buffer);
+ ArrowBufferMove(&new_buffer, buffer);
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer,
+ struct ArrowStringView key,
+ struct ArrowStringView value) {
+ return ArrowMetadataBuilderAppendInternal(buffer, &key, &value);
+}
+
+ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer,
+ struct ArrowStringView key,
+ struct ArrowStringView value) {
+ return ArrowMetadataBuilderSetInternal(buffer, &key, &value);
+}
+
+ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer,
+ struct ArrowStringView key) {
+ return ArrowMetadataBuilderSetInternal(buffer, &key, NULL);
+}
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nanoarrow.h"
+
+static void ArrowArrayRelease(struct ArrowArray* array) {
+ // Release buffers held by this array
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ if (private_data != NULL) {
+ ArrowBitmapReset(&private_data->bitmap);
+ ArrowBufferReset(&private_data->buffers[0]);
+ ArrowBufferReset(&private_data->buffers[1]);
+ ArrowFree(private_data);
+ }
+
+ // This object owns the memory for all the children, but those
+ // children may have been generated elsewhere and might have
+ // their own release() callback.
+ if (array->children != NULL) {
+ for (int64_t i = 0; i < array->n_children; i++) {
+ if (array->children[i] != NULL) {
+ if (array->children[i]->release != NULL) {
+ array->children[i]->release(array->children[i]);
+ }
+
+ ArrowFree(array->children[i]);
+ }
+ }
+
+ ArrowFree(array->children);
+ }
+
+ // This object owns the memory for the dictionary but it
+ // may have been generated somewhere else and have its own
+ // release() callback.
+ if (array->dictionary != NULL) {
+ if (array->dictionary->release != NULL) {
+ array->dictionary->release(array->dictionary);
+ }
+
+ ArrowFree(array->dictionary);
+ }
+
+ // Mark released
+ array->release = NULL;
+}
+
+ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array,
+ enum ArrowType storage_type) {
+ switch (storage_type) {
+ case NANOARROW_TYPE_UNINITIALIZED:
+ case NANOARROW_TYPE_NA:
+ array->n_buffers = 0;
+ break;
+
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ case NANOARROW_TYPE_STRUCT:
+ case NANOARROW_TYPE_MAP:
+ case NANOARROW_TYPE_SPARSE_UNION:
+ array->n_buffers = 1;
+ break;
+
+ case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_LARGE_LIST:
+ case NANOARROW_TYPE_BOOL:
+ case NANOARROW_TYPE_UINT8:
+ case NANOARROW_TYPE_INT8:
+ case NANOARROW_TYPE_UINT16:
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_UINT32:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_UINT64:
+ case NANOARROW_TYPE_INT64:
+ case NANOARROW_TYPE_HALF_FLOAT:
+ case NANOARROW_TYPE_FLOAT:
+ case NANOARROW_TYPE_DOUBLE:
+ case NANOARROW_TYPE_DECIMAL128:
+ case NANOARROW_TYPE_DECIMAL256:
+ case NANOARROW_TYPE_INTERVAL_MONTHS:
+ case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+ case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ case NANOARROW_TYPE_DENSE_UNION:
+ array->n_buffers = 2;
+ break;
+
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ array->n_buffers = 3;
+ break;
+
+ default:
+ return EINVAL;
+
+ return NANOARROW_OK;
+ }
+
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ private_data->storage_type = storage_type;
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayInit(struct ArrowArray* array, enum ArrowType storage_type) {
+ array->length = 0;
+ array->null_count = 0;
+ array->offset = 0;
+ array->n_buffers = 0;
+ array->n_children = 0;
+ array->buffers = NULL;
+ array->children = NULL;
+ array->dictionary = NULL;
+ array->release = &ArrowArrayRelease;
+ array->private_data = NULL;
+
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)ArrowMalloc(sizeof(struct ArrowArrayPrivateData));
+ if (private_data == NULL) {
+ array->release = NULL;
+ return ENOMEM;
+ }
+
+ ArrowBitmapInit(&private_data->bitmap);
+ ArrowBufferInit(&private_data->buffers[0]);
+ ArrowBufferInit(&private_data->buffers[1]);
+ private_data->buffer_data[0] = NULL;
+ private_data->buffer_data[1] = NULL;
+ private_data->buffer_data[2] = NULL;
+
+ array->private_data = private_data;
+ array->buffers = (const void**)(&private_data->buffer_data);
+
+ int result = ArrowArraySetStorageType(array, storage_type);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
+
+ ArrowLayoutInit(&private_data->layout, storage_type);
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
+ struct ArrowArrayView* array_view,
+ struct ArrowError* error) {
+ ArrowArrayInit(array, array_view->storage_type);
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ int result = ArrowArrayAllocateChildren(array, array_view->n_children);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
+
+ private_data->layout = array_view->layout;
+
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ int result =
+ ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array,
+ struct ArrowSchema* schema,
+ struct ArrowError* error) {
+ struct ArrowArrayView array_view;
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(&array_view, schema, error));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromArrayView(array, &array_view, error));
+ ArrowArrayViewReset(&array_view);
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children) {
+ if (array->children != NULL) {
+ return EINVAL;
+ }
+
+ if (n_children == 0) {
+ return NANOARROW_OK;
+ }
+
+ array->children =
+ (struct ArrowArray**)ArrowMalloc(n_children * sizeof(struct ArrowArray*));
+ if (array->children == NULL) {
+ return ENOMEM;
+ }
+
+ for (int64_t i = 0; i < n_children; i++) {
+ array->children[i] = NULL;
+ }
+
+ for (int64_t i = 0; i < n_children; i++) {
+ array->children[i] = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray));
+ if (array->children[i] == NULL) {
+ return ENOMEM;
+ }
+ array->children[i]->release = NULL;
+ }
+
+ array->n_children = n_children;
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array) {
+ if (array->dictionary != NULL) {
+ return EINVAL;
+ }
+
+ array->dictionary = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray));
+ if (array->dictionary == NULL) {
+ return ENOMEM;
+ }
+
+ array->dictionary->release = NULL;
+ return NANOARROW_OK;
+}
+
+void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ ArrowBufferMove(&bitmap->buffer, &private_data->bitmap.buffer);
+ private_data->bitmap.size_bits = bitmap->size_bits;
+ bitmap->size_bits = 0;
+ private_data->buffer_data[0] = private_data->bitmap.buffer.data;
+ array->null_count = -1;
+}
+
+ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i,
+ struct ArrowBuffer* buffer) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ switch (i) {
+ case 0:
+ ArrowBufferMove(buffer, &private_data->bitmap.buffer);
+ private_data->buffer_data[i] = private_data->bitmap.buffer.data;
+ break;
+ case 1:
+ case 2:
+ ArrowBufferMove(buffer, &private_data->buffers[i - 1]);
+ private_data->buffer_data[i] = private_data->buffers[i - 1].data;
+ break;
+ default:
+ return EINVAL;
+ }
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowArrayViewInitFromArray(struct ArrowArrayView* array_view,
+ struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ ArrowArrayViewInit(array_view, private_data->storage_type);
+ array_view->layout = private_data->layout;
+ array_view->array = array;
+
+ int result = ArrowArrayViewAllocateChildren(array_view, array->n_children);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+
+ for (int64_t i = 0; i < array->n_children; i++) {
+ result = ArrowArrayViewInitFromArray(array_view->children[i], array->children[i]);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowArrayReserveInternal(struct ArrowArray* array,
+ struct ArrowArrayView* array_view) {
+ // Loop through buffers and reserve the extra space that we know about
+ for (int64_t i = 0; i < array->n_buffers; i++) {
+ // Don't reserve on a validity buffer that hasn't been allocated yet
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
+ ArrowArrayBuffer(array, i)->data == NULL) {
+ continue;
+ }
+
+ int64_t additional_size_bytes =
+ array_view->buffer_views[i].n_bytes - ArrowArrayBuffer(array, i)->size_bytes;
+
+ if (additional_size_bytes > 0) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferReserve(ArrowArrayBuffer(array, i), additional_size_bytes));
+ }
+ }
+
+ // Recursively reserve children
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayReserveInternal(array->children[i], array_view->children[i]));
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array,
+ int64_t additional_size_elements) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowArrayView array_view;
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array));
+
+ // Calculate theoretical buffer sizes (recursively)
+ ArrowArrayViewSetLength(&array_view, array->length + additional_size_elements);
+
+ // Walk the structure (recursively)
+ int result = ArrowArrayReserveInternal(array, &array_view);
+ ArrowArrayViewReset(&array_view);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ for (int64_t i = 0; i < 3; i++) {
+ private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
+ }
+
+ for (int64_t i = 0; i < array->n_children; i++) {
+ ArrowArrayFlushInternalPointers(array->children[i]);
+ }
+}
+
+static ArrowErrorCode ArrowArrayCheckInternalBufferSizes(
+ struct ArrowArray* array, struct ArrowArrayView* array_view,
+ char set_length, struct ArrowError* error) {
+ if (set_length) {
+ ArrowArrayViewSetLength(array_view, array->offset + array->length);
+ }
+
+ for (int64_t i = 0; i < array->n_buffers; i++) {
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
+ array->null_count == 0 && array->buffers[i] == NULL) {
+ continue;
+ }
+
+ int64_t expected_size = array_view->buffer_views[i].n_bytes;
+ int64_t actual_size = ArrowArrayBuffer(array, i)->size_bytes;
+
+ if (actual_size < expected_size) {
+ ArrowErrorSet(
+ error,
+ "Expected buffer %d to size >= %ld bytes but found buffer with %ld bytes", i,
+ (long)expected_size, (long)actual_size);
+ return EINVAL;
+ }
+ }
+
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayCheckInternalBufferSizes(
+ array->children[i], array_view->children[i], set_length, error));
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
+ struct ArrowError* error) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ // Make sure the value we get with array->buffers[i] is set to the actual
+ // pointer (which may have changed from the original due to reallocation)
+ ArrowArrayFlushInternalPointers(array);
+
+ // Check buffer sizes to make sure we are not sending an ArrowArray
+ // into the wild that is going to segfault
+ struct ArrowArrayView array_view;
+
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array));
+
+ // Check buffer sizes once without using internal buffer data since
+ // ArrowArrayViewSetArray() assumes that all the buffers are long enough
+ // and issues invalid reads on offset buffers if they are not
+ int result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 1, error);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(&array_view);
+ return result;
+ }
+
+ result = ArrowArrayViewSetArray(&array_view, array, error);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(&array_view);
+ return result;
+ }
+
+ result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 0, error);
+ ArrowArrayViewReset(&array_view);
+ return result;
+}
+
+
+void ArrowArrayViewInit(struct ArrowArrayView* array_view, enum ArrowType storage_type) {
+ memset(array_view, 0, sizeof(struct ArrowArrayView));
+ array_view->storage_type = storage_type;
+ ArrowLayoutInit(&array_view->layout, storage_type);
+}
+
+ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
+ int64_t n_children) {
+ if (array_view->children != NULL) {
+ return EINVAL;
+ }
+
+ array_view->children =
+ (struct ArrowArrayView**)ArrowMalloc(n_children * sizeof(struct ArrowArrayView*));
+ if (array_view->children == NULL) {
+ return ENOMEM;
+ }
+
+ for (int64_t i = 0; i < n_children; i++) {
+ array_view->children[i] = NULL;
+ }
+
+ array_view->n_children = n_children;
+
+ for (int64_t i = 0; i < n_children; i++) {
+ array_view->children[i] =
+ (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView));
+ if (array_view->children[i] == NULL) {
+ return ENOMEM;
+ }
+ ArrowArrayViewInit(array_view->children[i], NANOARROW_TYPE_UNINITIALIZED);
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
+ struct ArrowSchema* schema,
+ struct ArrowError* error) {
+ struct ArrowSchemaView schema_view;
+ int result = ArrowSchemaViewInit(&schema_view, schema, error);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ ArrowArrayViewInit(array_view, schema_view.storage_data_type);
+ array_view->layout = schema_view.layout;
+
+ result = ArrowArrayViewAllocateChildren(array_view, schema->n_children);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+
+ for (int64_t i = 0; i < schema->n_children; i++) {
+ result =
+ ArrowArrayViewInitFromSchema(array_view->children[i], schema->children[i], error);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+void ArrowArrayViewReset(struct ArrowArrayView* array_view) {
+ if (array_view->children != NULL) {
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ if (array_view->children[i] != NULL) {
+ ArrowArrayViewReset(array_view->children[i]);
+ ArrowFree(array_view->children[i]);
+ }
+ }
+
+ ArrowFree(array_view->children);
+ }
+
+ ArrowArrayViewInit(array_view, NANOARROW_TYPE_UNINITIALIZED);
+}
+
+void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) {
+ for (int i = 0; i < 3; i++) {
+ int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
+ array_view->buffer_views[i].data.data = NULL;
+
+ switch (array_view->layout.buffer_type[i]) {
+ case NANOARROW_BUFFER_TYPE_VALIDITY:
+ array_view->buffer_views[i].n_bytes = _ArrowBytesForBits(length);
+ continue;
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+ // Probably don't want/need to rely on the producer to have allocated an
+ // offsets buffer of length 1 for a zero-size array
+ array_view->buffer_views[i].n_bytes =
+ (length != 0) * element_size_bytes * (length + 1);
+ continue;
+ case NANOARROW_BUFFER_TYPE_DATA:
+ array_view->buffer_views[i].n_bytes =
+ _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] * length) /
+ 8;
+ continue;
+ case NANOARROW_BUFFER_TYPE_TYPE_ID:
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+ array_view->buffer_views[i].n_bytes = element_size_bytes * length;
+ continue;
+ case NANOARROW_BUFFER_TYPE_NONE:
+ array_view->buffer_views[i].n_bytes = 0;
+ continue;
+ }
+ }
+
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_STRUCT:
+ case NANOARROW_TYPE_SPARSE_UNION:
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ ArrowArrayViewSetLength(array_view->children[i], length);
+ }
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ if (array_view->n_children >= 1) {
+ ArrowArrayViewSetLength(array_view->children[0],
+ length * array_view->layout.child_size_elements);
+ }
+ default:
+ break;
+ }
+}
+
+ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
+ struct ArrowArray* array,
+ struct ArrowError* error) {
+ array_view->array = array;
+ ArrowArrayViewSetLength(array_view, array->offset + array->length);
+
+ int64_t buffers_required = 0;
+ for (int i = 0; i < 3; i++) {
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
+ break;
+ }
+
+ buffers_required++;
+
+ // If the null_count is 0, the validity buffer can be NULL
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
+ array->null_count == 0 && array->buffers[i] == NULL) {
+ array_view->buffer_views[i].n_bytes = 0;
+ }
+
+ array_view->buffer_views[i].data.data = array->buffers[i];
+ }
+
+ if (buffers_required != array->n_buffers) {
+ ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d buffer(s)",
+ (int)buffers_required, (int)array->n_buffers);
+ return EINVAL;
+ }
+
+ if (array_view->n_children != array->n_children) {
+ return EINVAL;
+ }
+
+ // Check child sizes and calculate sizes that depend on data in the array buffers
+ int64_t last_offset;
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ if (array_view->buffer_views[1].n_bytes != 0) {
+ last_offset =
+ array_view->buffer_views[1].data.as_int32[array->offset + array->length];
+ array_view->buffer_views[2].n_bytes = last_offset;
+ }
+ break;
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ if (array_view->buffer_views[1].n_bytes != 0) {
+ last_offset =
+ array_view->buffer_views[1].data.as_int64[array->offset + array->length];
+ array_view->buffer_views[2].n_bytes = last_offset;
+ }
+ break;
+ case NANOARROW_TYPE_STRUCT:
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ if (array->children[i]->length < (array->offset + array->length)) {
+ ArrowErrorSet(
+ error,
+ "Expected struct child %d to have length >= %ld but found child with "
+ "length %ld",
+ (int)(i + 1), (long)(array->offset + array->length),
+ (long)array->children[i]->length);
+ return EINVAL;
+ }
+ }
+ break;
+ case NANOARROW_TYPE_LIST:
+ if (array->n_children != 1) {
+ ArrowErrorSet(error,
+ "Expected 1 child of list array but found %d child arrays",
+ (int)array->n_children);
+ return EINVAL;
+ }
+
+ if (array_view->buffer_views[1].n_bytes != 0) {
+ last_offset =
+ array_view->buffer_views[1].data.as_int32[array->offset + array->length];
+ if (array->children[0]->length < last_offset) {
+ ArrowErrorSet(
+ error,
+ "Expected child of list array with length >= %ld but found array with "
+ "length %ld",
+ (long)last_offset, (long)array->children[0]->length);
+ return EINVAL;
+ }
+ }
+ break;
+ case NANOARROW_TYPE_LARGE_LIST:
+ if (array->n_children != 1) {
+ ArrowErrorSet(error,
+ "Expected 1 child of large list array but found %d child arrays",
+ (int)array->n_children);
+ return EINVAL;
+ }
+
+ if (array_view->buffer_views[1].n_bytes != 0) {
+ last_offset =
+ array_view->buffer_views[1].data.as_int64[array->offset + array->length];
+ if (array->children[0]->length < last_offset) {
+ ArrowErrorSet(
+ error,
+ "Expected child of large list array with length >= %ld but found array "
+ "with length %ld",
+ (long)last_offset, (long)array->children[0]->length);
+ return EINVAL;
+ }
+ }
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ if (array->n_children != 1) {
+ ArrowErrorSet(error,
+ "Expected 1 child of fixed-size array but found %d child arrays",
+ (int)array->n_children);
+ return EINVAL;
+ }
+
+ last_offset =
+ (array->offset + array->length) * array_view->layout.child_size_elements;
+ if (array->children[0]->length < last_offset) {
+ ArrowErrorSet(
+ error,
+ "Expected child of fixed-size list array with length >= %ld but found array "
+ "with length %ld",
+ (long)last_offset, (long)array->children[0]->length);
+ return EINVAL;
+ }
+ break;
+ default:
+ break;
+ }
+
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayViewSetArray(array_view->children[i], array->children[i], error));
+ }
+
+ return NANOARROW_OK;
+}
diff --git a/c/vendor/nanoarrow/nanoarrow.h b/c/vendor/nanoarrow/nanoarrow.h
index 38b0efc..45bba3d 100644
--- a/c/vendor/nanoarrow/nanoarrow.h
+++ b/c/vendor/nanoarrow/nanoarrow.h
@@ -15,6 +15,372 @@
// specific language governing permissions and limitations
// under the License.
+#ifndef NANOARROW_NANOARROW_TYPES_H_INCLUDED
+#define NANOARROW_NANOARROW_TYPES_H_INCLUDED
+
+#include <stdint.h>
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// \defgroup nanoarrow-inline-typedef Type definitions used in inlined implementations
+
+// Extra guard for versions of Arrow without the canonical guard
+#ifndef ARROW_FLAG_DICTIONARY_ORDERED
+
+#ifndef ARROW_C_DATA_INTERFACE
+#define ARROW_C_DATA_INTERFACE
+
+#define ARROW_FLAG_DICTIONARY_ORDERED 1
+#define ARROW_FLAG_NULLABLE 2
+#define ARROW_FLAG_MAP_KEYS_SORTED 4
+
+struct ArrowSchema {
+ // Array type description
+ const char* format;
+ const char* name;
+ const char* metadata;
+ int64_t flags;
+ int64_t n_children;
+ struct ArrowSchema** children;
+ struct ArrowSchema* dictionary;
+
+ // Release callback
+ void (*release)(struct ArrowSchema*);
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+struct ArrowArray {
+ // Array data description
+ int64_t length;
+ int64_t null_count;
+ int64_t offset;
+ int64_t n_buffers;
+ int64_t n_children;
+ const void** buffers;
+ struct ArrowArray** children;
+ struct ArrowArray* dictionary;
+
+ // Release callback
+ void (*release)(struct ArrowArray*);
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+#endif // ARROW_C_DATA_INTERFACE
+
+#ifndef ARROW_C_STREAM_INTERFACE
+#define ARROW_C_STREAM_INTERFACE
+
+struct ArrowArrayStream {
+ // Callback to get the stream type
+ // (will be the same for all arrays in the stream).
+ //
+ // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+ //
+ // If successful, the ArrowSchema must be released independently from the stream.
+ int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
+
+ // Callback to get the next array
+ // (if no error and the array is released, the stream has ended)
+ //
+ // Return value: 0 if successful, an `errno`-compatible error code otherwise.
+ //
+ // If successful, the ArrowArray must be released independently from the stream.
+ int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
+
+ // Callback to get optional detailed error information.
+ // This must only be called if the last stream operation failed
+ // with a non-0 return code.
+ //
+ // Return value: pointer to a null-terminated character array describing
+ // the last error, or NULL if no description is available.
+ //
+ // The returned pointer is only valid until the next operation on this stream
+ // (including release).
+ const char* (*get_last_error)(struct ArrowArrayStream*);
+
+ // Release callback: release the stream's own resources.
+ // Note that arrays returned by `get_next` must be individually released.
+ void (*release)(struct ArrowArrayStream*);
+
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+#endif // ARROW_C_STREAM_INTERFACE
+#endif // ARROW_FLAG_DICTIONARY_ORDERED
+
+/// \brief Return code for success.
+#define NANOARROW_OK 0
+
+/// \brief Represents an errno-compatible error code
+typedef int ArrowErrorCode;
+
+/// \brief Arrow type enumerator
+///
+/// These names are intended to map to the corresponding arrow::Type::type
+/// enumerator; however, the numeric values are specifically not equal
+/// (i.e., do not rely on numeric comparison).
+enum ArrowType {
+ NANOARROW_TYPE_UNINITIALIZED = 0,
+ NANOARROW_TYPE_NA = 1,
+ NANOARROW_TYPE_BOOL,
+ NANOARROW_TYPE_UINT8,
+ NANOARROW_TYPE_INT8,
+ NANOARROW_TYPE_UINT16,
+ NANOARROW_TYPE_INT16,
+ NANOARROW_TYPE_UINT32,
+ NANOARROW_TYPE_INT32,
+ NANOARROW_TYPE_UINT64,
+ NANOARROW_TYPE_INT64,
+ NANOARROW_TYPE_HALF_FLOAT,
+ NANOARROW_TYPE_FLOAT,
+ NANOARROW_TYPE_DOUBLE,
+ NANOARROW_TYPE_STRING,
+ NANOARROW_TYPE_BINARY,
+ NANOARROW_TYPE_FIXED_SIZE_BINARY,
+ NANOARROW_TYPE_DATE32,
+ NANOARROW_TYPE_DATE64,
+ NANOARROW_TYPE_TIMESTAMP,
+ NANOARROW_TYPE_TIME32,
+ NANOARROW_TYPE_TIME64,
+ NANOARROW_TYPE_INTERVAL_MONTHS,
+ NANOARROW_TYPE_INTERVAL_DAY_TIME,
+ NANOARROW_TYPE_DECIMAL128,
+ NANOARROW_TYPE_DECIMAL256,
+ NANOARROW_TYPE_LIST,
+ NANOARROW_TYPE_STRUCT,
+ NANOARROW_TYPE_SPARSE_UNION,
+ NANOARROW_TYPE_DENSE_UNION,
+ NANOARROW_TYPE_DICTIONARY,
+ NANOARROW_TYPE_MAP,
+ NANOARROW_TYPE_EXTENSION,
+ NANOARROW_TYPE_FIXED_SIZE_LIST,
+ NANOARROW_TYPE_DURATION,
+ NANOARROW_TYPE_LARGE_STRING,
+ NANOARROW_TYPE_LARGE_BINARY,
+ NANOARROW_TYPE_LARGE_LIST,
+ NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
+};
+
+/// \brief Functional types of buffers as described in the Arrow Columnar Specification
+enum ArrowBufferType {
+ NANOARROW_BUFFER_TYPE_NONE,
+ NANOARROW_BUFFER_TYPE_VALIDITY,
+ NANOARROW_BUFFER_TYPE_TYPE_ID,
+ NANOARROW_BUFFER_TYPE_UNION_OFFSET,
+ NANOARROW_BUFFER_TYPE_DATA_OFFSET,
+ NANOARROW_BUFFER_TYPE_DATA
+};
+
+#define _NANOARROW_CONCAT(x, y) x##y
+#define _NANOARROW_MAKE_NAME(x, y) _NANOARROW_CONCAT(x, y)
+
+#define _NANOARROW_RETURN_NOT_OK_IMPL(NAME, EXPR) \
+ do { \
+ const int NAME = (EXPR); \
+ if (NAME) return NAME; \
+ } while (0)
+
+#define NANOARROW_RETURN_NOT_OK(EXPR) \
+ _NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR)
+
+#define _NANOARROW_CHECK_RANGE(x_, min_, max_) \
+ NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL)
+
+/// \brief A description of an arrangement of buffers
+///
+/// Contains the minimum amount of information required to
+/// calculate the size of each buffer in an ArrowArray knowing only
+/// the length and offset of the array.
+struct ArrowLayout {
+ /// \brief The function of each buffer
+ enum ArrowBufferType buffer_type[3];
+
+ /// \brief The size of an element each buffer or 0 if this size is variable or unknown
+ int64_t element_size_bits[3];
+
+ /// \brief The number of elements in the child array per element in this array for a
+ /// fixed-size list
+ int64_t child_size_elements;
+};
+
+/// \brief An non-owning view of a string
+struct ArrowStringView {
+ /// \brief A pointer to the start of the string
+ ///
+ /// If n_bytes is 0, this value may be NULL.
+ const char* data;
+
+ /// \brief The size of the string in bytes,
+ ///
+ /// (Not including the null terminator.)
+ int64_t n_bytes;
+};
+
+static inline struct ArrowStringView ArrowCharView(const char* value) {
+ struct ArrowStringView out;
+
+ out.data = value;
+ if (value) {
+ out.n_bytes = (int64_t)strlen(value);
+ } else {
+ out.n_bytes = 0;
+ }
+
+ return out;
+}
+
+/// \brief An non-owning view of a buffer
+struct ArrowBufferView {
+ /// \brief A pointer to the start of the buffer
+ ///
+ /// If n_bytes is 0, this value may be NULL.
+ union {
+ const void* data;
+ const int8_t* as_int8;
+ const uint8_t* as_uint8;
+ const int16_t* as_int16;
+ const uint16_t* as_uint16;
+ const int32_t* as_int32;
+ const uint32_t* as_uint32;
+ const int64_t* as_int64;
+ const uint64_t* as_uint64;
+ const double* as_double;
+ const float* as_float;
+ const char* as_char;
+ } data;
+
+ /// \brief The size of the buffer in bytes
+ int64_t n_bytes;
+};
+
+/// \brief Array buffer allocation and deallocation
+///
+/// Container for allocate, reallocate, and free methods that can be used
+/// to customize allocation and deallocation of buffers when constructing
+/// an ArrowArray.
+struct ArrowBufferAllocator {
+ /// \brief Reallocate a buffer or return NULL if it cannot be reallocated
+ uint8_t* (*reallocate)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
+ int64_t old_size, int64_t new_size);
+
+ /// \brief Deallocate a buffer allocated by this allocator
+ void (*free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size);
+
+ /// \brief Opaque data specific to the allocator
+ void* private_data;
+};
+
+/// \brief An owning mutable view of a buffer
+struct ArrowBuffer {
+ /// \brief A pointer to the start of the buffer
+ ///
+ /// If capacity_bytes is 0, this value may be NULL.
+ uint8_t* data;
+
+ /// \brief The size of the buffer in bytes
+ int64_t size_bytes;
+
+ /// \brief The capacity of the buffer in bytes
+ int64_t capacity_bytes;
+
+ /// \brief The allocator that will be used to reallocate and/or free the buffer
+ struct ArrowBufferAllocator allocator;
+};
+
+/// \brief An owning mutable view of a bitmap
+struct ArrowBitmap {
+ /// \brief An ArrowBuffer to hold the allocated memory
+ struct ArrowBuffer buffer;
+
+ /// \brief The number of bits that have been appended to the bitmap
+ int64_t size_bits;
+};
+
+// Used as the private data member for ArrowArrays allocated here and accessed
+// internally within inline ArrowArray* helpers.
+struct ArrowArrayPrivateData {
+ // Holder for the validity buffer (or first buffer for union types, which are
+ // the only type whose first buffer is not a valdiity buffer)
+ struct ArrowBitmap bitmap;
+
+ // Holder for additional buffers as required
+ struct ArrowBuffer buffers[2];
+
+ // The array of pointers to buffers. This must be updated after a sequence
+ // of appends to synchronize its values with the actual buffer addresses
+ // (which may have ben reallocated uring that time)
+ const void* buffer_data[3];
+
+ // The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
+ enum ArrowType storage_type;
+
+ // The buffer arrangement for the storage type
+ struct ArrowLayout layout;
+};
+
+struct ArrowArrayView {
+ struct ArrowArray* array;
+ enum ArrowType storage_type;
+ struct ArrowLayout layout;
+ struct ArrowBufferView buffer_views[3];
+ int64_t n_children;
+ struct ArrowArrayView** children;
+};
+
+/// }@
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_BUILD_ID_H_INCLUDED
+#define NANOARROW_BUILD_ID_H_INCLUDED
+
+#define NANOARROW_BUILD_ID "OFF"
+
+#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
#ifndef NANOARROW_H_INCLUDED
#define NANOARROW_H_INCLUDED
@@ -22,7 +388,7 @@
#include <stdint.h>
#include <stdlib.h>
-#include "typedefs_inline.h"
+
#ifdef __cplusplus
extern "C" {
@@ -98,6 +464,9 @@ const char* ArrowErrorMessage(struct ArrowError* error);
/// \defgroup nanoarrow-utils Utility data structures
+/// \brief Return the build id against which the library was compiled
+const char* ArrowNanoarrowBuildId();
+
/// \brief Initialize a description of buffer arrangements from a storage type
void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type);
@@ -689,13 +1058,1146 @@ ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
/// \brief Reset the contents of an ArrowArrayView and frees resources
void ArrowArrayViewReset(struct ArrowArrayView* array_view);
+/// \brief Check for a null element in an ArrowArrayView
+static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an integer
+///
+/// This function does not check for null values, that values are actually integers, or
+/// that values are within a valid range for an int64.
+static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view,
+ int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an unsigned integer
+///
+/// This function does not check for null values, that values are actually integers, or
+/// that values are within a valid range for a uint64.
+static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view,
+ int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as a double
+///
+/// This function does not check for null values, or
+/// that values are within a valid range for a double.
+static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view,
+ int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an ArrowStringView
+///
+/// This function does not check for null values.
+static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
+ struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an ArrowBufferView
+///
+/// This function does not check for null values.
+static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
+ struct ArrowArrayView* array_view, int64_t i);
+
/// }@
// Inline function definitions
-#include "array_inline.h"
-#include "bitmap_inline.h"
-#include "buffer_inline.h"
-#include "utils_inline.h"
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_BUFFER_INLINE_H_INCLUDED
+#define NANOARROW_BUFFER_INLINE_H_INCLUDED
+
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline int64_t _ArrowGrowByFactor(int64_t current_capacity, int64_t new_capacity) {
+ int64_t doubled_capacity = current_capacity * 2;
+ if (doubled_capacity > new_capacity) {
+ return doubled_capacity;
+ } else {
+ return new_capacity;
+ }
+}
+
+static inline void ArrowBufferInit(struct ArrowBuffer* buffer) {
+ buffer->data = NULL;
+ buffer->size_bytes = 0;
+ buffer->capacity_bytes = 0;
+ buffer->allocator = ArrowBufferAllocatorDefault();
+}
+
+static inline ArrowErrorCode ArrowBufferSetAllocator(
+ struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator) {
+ if (buffer->data == NULL) {
+ buffer->allocator = allocator;
+ return NANOARROW_OK;
+ } else {
+ return EINVAL;
+ }
+}
+
+static inline void ArrowBufferReset(struct ArrowBuffer* buffer) {
+ if (buffer->data != NULL) {
+ buffer->allocator.free(&buffer->allocator, (uint8_t*)buffer->data,
+ buffer->capacity_bytes);
+ buffer->data = NULL;
+ }
+
+ buffer->capacity_bytes = 0;
+ buffer->size_bytes = 0;
+}
+
+static inline void ArrowBufferMove(struct ArrowBuffer* buffer,
+ struct ArrowBuffer* buffer_out) {
+ memcpy(buffer_out, buffer, sizeof(struct ArrowBuffer));
+ buffer->data = NULL;
+ ArrowBufferReset(buffer);
+}
+
+static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer,
+ int64_t new_capacity_bytes,
+ char shrink_to_fit) {
+ if (new_capacity_bytes < 0) {
+ return EINVAL;
+ }
+
+ if (new_capacity_bytes > buffer->capacity_bytes || shrink_to_fit) {
+ buffer->data = buffer->allocator.reallocate(
+ &buffer->allocator, buffer->data, buffer->capacity_bytes, new_capacity_bytes);
+ if (buffer->data == NULL && new_capacity_bytes > 0) {
+ buffer->capacity_bytes = 0;
+ buffer->size_bytes = 0;
+ return ENOMEM;
+ }
+
+ buffer->capacity_bytes = new_capacity_bytes;
+ }
+
+ // Ensures that when shrinking that size <= capacity
+ if (new_capacity_bytes < buffer->size_bytes) {
+ buffer->size_bytes = new_capacity_bytes;
+ }
+
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer,
+ int64_t additional_size_bytes) {
+ int64_t min_capacity_bytes = buffer->size_bytes + additional_size_bytes;
+ if (min_capacity_bytes <= buffer->capacity_bytes) {
+ return NANOARROW_OK;
+ }
+
+ return ArrowBufferResize(
+ buffer, _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes), 0);
+}
+
+static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data,
+ int64_t size_bytes) {
+ if (size_bytes > 0) {
+ memcpy(buffer->data + buffer->size_bytes, data, size_bytes);
+ buffer->size_bytes += size_bytes;
+ }
+}
+
+static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer,
+ const void* data, int64_t size_bytes) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes));
+
+ ArrowBufferAppendUnsafe(buffer, data, size_bytes);
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer,
+ int8_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(int8_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer,
+ uint8_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(uint8_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer,
+ int16_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(int16_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer,
+ uint16_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(uint16_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer,
+ int32_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(int32_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer,
+ uint32_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(uint32_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer,
+ int64_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(int64_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer,
+ uint64_t value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(uint64_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer,
+ double value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(double));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer,
+ float value) {
+ return ArrowBufferAppend(buffer, &value, sizeof(float));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer,
+ uint8_t value, int64_t size_bytes) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes));
+
+ memset(buffer->data + buffer->size_bytes, value, size_bytes);
+ buffer->size_bytes += size_bytes;
+ return NANOARROW_OK;
+}
+
+static const uint8_t _ArrowkBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
+static const uint8_t _ArrowkFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127};
+static const uint8_t _ArrowkPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
+static const uint8_t _ArrowkTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
+
+static const uint8_t _ArrowkBytePopcount[] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3,
+ 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4,
+ 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4,
+ 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,
+ 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2,
+ 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5,
+ 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4,
+ 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6,
+ 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
+
+static inline int64_t _ArrowRoundUpToMultipleOf8(int64_t value) {
+ return (value + 7) & ~((int64_t)7);
+}
+
+static inline int64_t _ArrowRoundDownToMultipleOf8(int64_t value) {
+ return (value / 8) * 8;
+}
+
+static inline int64_t _ArrowBytesForBits(int64_t bits) {
+ return (bits >> 3) + ((bits & 7) != 0);
+}
+
+static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
+ *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 |
+ values[5] << 5 | values[6] << 6 | values[7] << 7);
+}
+
+static inline void _ArrowBitmapPackInt32(const int32_t* values, uint8_t* out) {
+ *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 |
+ values[5] << 5 | values[6] << 6 | values[7] << 7);
+}
+
+static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) {
+ return (bits[i >> 3] >> (i & 0x07)) & 1;
+}
+
+static inline void ArrowBitSet(uint8_t* bits, int64_t i) {
+ bits[i / 8] |= _ArrowkBitmask[i % 8];
+}
+
+static inline void ArrowBitClear(uint8_t* bits, int64_t i) {
+ bits[i / 8] &= _ArrowkFlippedBitmask[i % 8];
+}
+
+static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t bit_is_set) {
+ bits[i / 8] ^=
+ ((uint8_t)(-((uint8_t)(bit_is_set != 0)) ^ bits[i / 8])) & _ArrowkBitmask[i % 8];
+}
+
+static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length,
+ uint8_t bits_are_set) {
+ const int64_t i_begin = start_offset;
+ const int64_t i_end = start_offset + length;
+ const uint8_t fill_byte = (uint8_t)(-bits_are_set);
+
+ const int64_t bytes_begin = i_begin / 8;
+ const int64_t bytes_end = i_end / 8 + 1;
+
+ const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
+ const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8];
+
+ if (bytes_end == bytes_begin + 1) {
+ // set bits within a single byte
+ const uint8_t only_byte_mask =
+ i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
+ bits[bytes_begin] &= only_byte_mask;
+ bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask);
+ return;
+ }
+
+ // set/clear trailing bits of first byte
+ bits[bytes_begin] &= first_byte_mask;
+ bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask);
+
+ if (bytes_end - bytes_begin > 2) {
+ // set/clear whole bytes
+ memset(bits + bytes_begin + 1, fill_byte, (size_t)(bytes_end - bytes_begin - 2));
+ }
+
+ if (i_end % 8 == 0) {
+ return;
+ }
+
+ // set/clear leading bits of last byte
+ bits[bytes_end - 1] &= last_byte_mask;
+ bits[bytes_end - 1] |= (uint8_t)(fill_byte & ~last_byte_mask);
+}
+
+static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t start_offset,
+ int64_t length) {
+ if (length == 0) {
+ return 0;
+ }
+
+ const int64_t i_begin = start_offset;
+ const int64_t i_end = start_offset + length;
+
+ const int64_t bytes_begin = i_begin / 8;
+ const int64_t bytes_end = i_end / 8 + 1;
+
+ const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
+ const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8];
+
+ if (bytes_end == bytes_begin + 1) {
+ // count bits within a single byte
+ const uint8_t only_byte_mask =
+ i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
+ const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask;
+ return _ArrowkBytePopcount[byte_masked];
+ }
+
+ int64_t count = 0;
+
+ // first byte
+ count += _ArrowkBytePopcount[bits[bytes_begin] & ~first_byte_mask];
+
+ // middle bytes
+ for (int64_t i = bytes_begin + 1; i < (bytes_end - 1); i++) {
+ count += _ArrowkBytePopcount[bits[i]];
+ }
+
+ // last byte
+ count += _ArrowkBytePopcount[bits[bytes_end - 1] & ~last_byte_mask];
+
+ return count;
+}
+
+static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap) {
+ ArrowBufferInit(&bitmap->buffer);
+ bitmap->size_bits = 0;
+}
+
+static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap,
+ int64_t additional_size_bits) {
+ int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits;
+ if (min_capacity_bits <= (bitmap->buffer.capacity_bytes * 8)) {
+ return NANOARROW_OK;
+ }
+
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferReserve(&bitmap->buffer, _ArrowBytesForBits(additional_size_bits)));
+
+ bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap,
+ int64_t new_capacity_bits,
+ char shrink_to_fit) {
+ if (new_capacity_bits < 0) {
+ return EINVAL;
+ }
+
+ int64_t new_capacity_bytes = _ArrowBytesForBits(new_capacity_bits);
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferResize(&bitmap->buffer, new_capacity_bytes, shrink_to_fit));
+
+ if (new_capacity_bits < bitmap->size_bits) {
+ bitmap->size_bits = new_capacity_bits;
+ }
+
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,
+ uint8_t bits_are_set, int64_t length) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(bitmap, length));
+
+ ArrowBitmapAppendUnsafe(bitmap, bits_are_set, length);
+ return NANOARROW_OK;
+}
+
+static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap,
+ uint8_t bits_are_set, int64_t length) {
+ ArrowBitsSetTo(bitmap->buffer.data, bitmap->size_bits, length, bits_are_set);
+ bitmap->size_bits += length;
+ bitmap->buffer.size_bytes = _ArrowBytesForBits(bitmap->size_bits);
+}
+
+static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
+ const int8_t* values, int64_t n_values) {
+ if (n_values == 0) {
+ return;
+ }
+
+ const int8_t* values_cursor = values;
+ int64_t n_remaining = n_values;
+ int64_t out_i_cursor = bitmap->size_bits;
+ uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8;
+
+ // First byte
+ if ((out_i_cursor % 8) != 0) {
+ int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor;
+ for (int i = 0; i < n_partial_bits; i++) {
+ ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]);
+ }
+
+ out_cursor++;
+ values_cursor += n_partial_bits;
+ n_remaining -= n_partial_bits;
+ }
+
+ // Middle bytes
+ int64_t n_full_bytes = n_remaining / 8;
+ for (int64_t i = 0; i < n_full_bytes; i++) {
+ _ArrowBitmapPackInt8(values_cursor, out_cursor);
+ values_cursor += 8;
+ out_cursor++;
+ }
+
+ // Last byte
+ out_i_cursor += n_full_bytes * 8;
+ n_remaining -= n_full_bytes * 8;
+ if (n_remaining > 0) {
+ // Zero out the last byte
+ *out_cursor = 0x00;
+ for (int i = 0; i < n_remaining; i++) {
+ ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
+ }
+ out_cursor++;
+ }
+
+ bitmap->size_bits += n_values;
+ bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data;
+}
+
+static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap,
+ const int32_t* values, int64_t n_values) {
+ if (n_values == 0) {
+ return;
+ }
+
+ const int32_t* values_cursor = values;
+ int64_t n_remaining = n_values;
+ int64_t out_i_cursor = bitmap->size_bits;
+ uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8;
+
+ // First byte
+ if ((out_i_cursor % 8) != 0) {
+ int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor;
+ for (int i = 0; i < n_partial_bits; i++) {
+ ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]);
+ }
+
+ out_cursor++;
+ values_cursor += n_partial_bits;
+ n_remaining -= n_partial_bits;
+ }
+
+ // Middle bytes
+ int64_t n_full_bytes = n_remaining / 8;
+ for (int64_t i = 0; i < n_full_bytes; i++) {
+ _ArrowBitmapPackInt32(values_cursor, out_cursor);
+ values_cursor += 8;
+ out_cursor++;
+ }
+
+ // Last byte
+ out_i_cursor += n_full_bytes * 8;
+ n_remaining -= n_full_bytes * 8;
+ if (n_remaining > 0) {
+ // Zero out the last byte
+ *out_cursor = 0x00;
+ for (int i = 0; i < n_remaining; i++) {
+ ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
+ }
+ out_cursor++;
+ }
+
+ bitmap->size_bits += n_values;
+ bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data;
+}
+
+static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap) {
+ ArrowBufferReset(&bitmap->buffer);
+ bitmap->size_bits = 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_ARRAY_INLINE_H_INCLUDED
+#define NANOARROW_ARRAY_INLINE_H_INCLUDED
+
+#include <errno.h>
+#include <float.h>
+#include <limits.h>
+#include <stdint.h>
+#include <string.h>
+
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ return &private_data->bitmap;
+}
+
+static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ switch (i) {
+ case 0:
+ return &private_data->bitmap.buffer;
+ default:
+ return private_data->buffers + i - 1;
+ }
+}
+
+static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ if (private_data->storage_type == NANOARROW_TYPE_UNINITIALIZED) {
+ return EINVAL;
+ }
+
+ // Initialize any data offset buffer with a single zero
+ for (int i = 0; i < 3; i++) {
+ if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
+ private_data->layout.element_size_bits[i] == 64) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, i), 0));
+ } else if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
+ private_data->layout.element_size_bits[i] == 32) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, i), 0));
+ }
+ }
+
+ // Start building any child arrays
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i]));
+ }
+
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ for (int64_t i = 0; i < 3; i++) {
+ struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1));
+ }
+
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i]));
+ }
+
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode _ArrowArrayAppendBits(struct ArrowArray* array,
+ int64_t buffer_i, uint8_t value,
+ int64_t n) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ struct ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i);
+ int64_t bytes_required =
+ _ArrowRoundUpToMultipleOf8(private_data->layout.element_size_bits[buffer_i] *
+ (array->length + 1)) /
+ 8;
+ if (bytes_required > buffer->size_bytes) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendFill(buffer, 0, bytes_required - buffer->size_bytes));
+ }
+
+ ArrowBitsSetTo(buffer->data, array->length, n, value);
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ if (n == 0) {
+ return NANOARROW_OK;
+ }
+
+ if (private_data->storage_type == NANOARROW_TYPE_NA) {
+ array->null_count += n;
+ array->length += n;
+ return NANOARROW_OK;
+ }
+
+ // Append n 0 bits to the validity bitmap. If we haven't allocated a bitmap yet, do it
+ // now
+ if (private_data->bitmap.buffer.data == NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, array->length + n));
+ ArrowBitmapAppendUnsafe(&private_data->bitmap, 1, array->length);
+ ArrowBitmapAppendUnsafe(&private_data->bitmap, 0, n);
+ } else {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, n));
+ ArrowBitmapAppendUnsafe(&private_data->bitmap, 0, n);
+ }
+
+ // Add appropriate buffer fill
+ struct ArrowBuffer* buffer;
+ int64_t size_bytes;
+
+ for (int i = 0; i < 3; i++) {
+ buffer = ArrowArrayBuffer(array, i);
+ size_bytes = private_data->layout.element_size_bits[i] / 8;
+
+ switch (private_data->layout.buffer_type[i]) {
+ case NANOARROW_BUFFER_TYPE_NONE:
+ case NANOARROW_BUFFER_TYPE_VALIDITY:
+ continue;
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+ // Append the current value at the end of the offset buffer for each element
+ NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n));
+
+ for (int64_t j = 0; j < n; j++) {
+ ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes * (array->length + j),
+ size_bytes);
+ }
+
+ // Skip the data buffer
+ i++;
+ continue;
+ case NANOARROW_BUFFER_TYPE_DATA:
+ // Zero out the next bit of memory
+ if (private_data->layout.element_size_bits[i] % 8 == 0) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n));
+ } else {
+ NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, i, 0, n));
+ }
+ continue;
+
+ case NANOARROW_BUFFER_TYPE_TYPE_ID:
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+ // Not supported
+ return EINVAL;
+ }
+ }
+
+ // For fixed-size list and struct we need to append some nulls to
+ // children for the lengths to line up properly
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendNull(
+ array->children[0], n * private_data->layout.child_size_elements));
+ break;
+ case NANOARROW_TYPE_STRUCT:
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendNull(array->children[i], n));
+ }
+ default:
+ break;
+ }
+
+ array->length += n;
+ array->null_count += n;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array,
+ int64_t value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_INT64:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(int64_t)));
+ break;
+ case NANOARROW_TYPE_INT32:
+ _NANOARROW_CHECK_RANGE(value, INT32_MIN, INT32_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_INT16:
+ _NANOARROW_CHECK_RANGE(value, INT16_MIN, INT16_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt16(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_INT8:
+ _NANOARROW_CHECK_RANGE(value, INT8_MIN, INT8_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_UINT64:
+ case NANOARROW_TYPE_UINT32:
+ case NANOARROW_TYPE_UINT16:
+ case NANOARROW_TYPE_UINT8:
+ _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX);
+ return ArrowArrayAppendUInt(array, value);
+ case NANOARROW_TYPE_DOUBLE:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_FLOAT:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_BOOL:
+ NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+ }
+
+ array->length++;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array,
+ uint64_t value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_UINT64:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(uint64_t)));
+ break;
+ case NANOARROW_TYPE_UINT32:
+ _NANOARROW_CHECK_RANGE(value, 0, UINT32_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt32(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_UINT16:
+ _NANOARROW_CHECK_RANGE(value, 0, UINT16_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt16(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_UINT8:
+ _NANOARROW_CHECK_RANGE(value, 0, UINT8_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_INT64:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_INT8:
+ _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX);
+ return ArrowArrayAppendInt(array, value);
+ case NANOARROW_TYPE_DOUBLE:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_FLOAT:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_BOOL:
+ NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+ }
+
+ array->length++;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array,
+ double value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_DOUBLE:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(double)));
+ break;
+ case NANOARROW_TYPE_FLOAT:
+ _NANOARROW_CHECK_RANGE(value, FLT_MIN, FLT_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+ }
+
+ array->length++;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
+ struct ArrowBufferView value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(
+ array, 1 + (private_data->storage_type != NANOARROW_TYPE_FIXED_SIZE_BINARY));
+ int32_t offset;
+ int64_t large_offset;
+ int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8;
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ offset = ((int32_t*)offset_buffer->data)[array->length];
+ if ((offset + value.n_bytes) > INT32_MAX) {
+ return EINVAL;
+ }
+
+ offset += value.n_bytes;
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(offset_buffer, &offset, sizeof(int32_t)));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value.data.data, value.n_bytes));
+ break;
+
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ large_offset = ((int64_t*)offset_buffer->data)[array->length];
+ large_offset += value.n_bytes;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(offset_buffer, &large_offset, sizeof(int64_t)));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value.data.data, value.n_bytes));
+ break;
+
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ if (value.n_bytes != fixed_size_bytes) {
+ return EINVAL;
+ }
+
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value.data.data, value.n_bytes));
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+ }
+
+ array->length++;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array,
+ struct ArrowStringView value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBufferView buffer_view;
+ buffer_view.data.data = value.data;
+ buffer_view.n_bytes = value.n_bytes;
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_LARGE_STRING:
+ return ArrowArrayAppendBytes(array, buffer_view);
+ default:
+ return EINVAL;
+ }
+}
+
+static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ int64_t child_length;
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_LIST:
+ child_length = array->children[0]->length;
+ if (child_length > INT32_MAX) {
+ return EINVAL;
+ }
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), child_length));
+ break;
+ case NANOARROW_TYPE_LARGE_LIST:
+ child_length = array->children[0]->length;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1), child_length));
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ child_length = array->children[0]->length;
+ if (child_length !=
+ ((array->length + 1) * private_data->layout.child_size_elements)) {
+ return EINVAL;
+ }
+ break;
+ case NANOARROW_TYPE_STRUCT:
+ for (int64_t i = 0; i < array->n_children; i++) {
+ child_length = array->children[i]->length;
+ if (child_length != (array->length + 1)) {
+ return EINVAL;
+ }
+ }
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1));
+ }
+
+ array->length++;
+ return NANOARROW_OK;
+}
+
+static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i) {
+ const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8;
+ i += array_view->array->offset;
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_NA:
+ return 0x01;
+ case NANOARROW_TYPE_DENSE_UNION:
+ case NANOARROW_TYPE_SPARSE_UNION:
+ // Not supported yet
+ return 0xff;
+ default:
+ return validity_buffer != NULL && !ArrowBitGet(validity_buffer, i);
+ }
+}
+
+static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view,
+ int64_t i) {
+ struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+ i += array_view->array->offset;
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_INT64:
+ return data_view->data.as_int64[i];
+ case NANOARROW_TYPE_UINT64:
+ return data_view->data.as_uint64[i];
+ case NANOARROW_TYPE_INT32:
+ return data_view->data.as_int32[i];
+ case NANOARROW_TYPE_UINT32:
+ return data_view->data.as_uint32[i];
+ case NANOARROW_TYPE_INT16:
+ return data_view->data.as_int16[i];
+ case NANOARROW_TYPE_UINT16:
+ return data_view->data.as_uint16[i];
+ case NANOARROW_TYPE_INT8:
+ return data_view->data.as_int8[i];
+ case NANOARROW_TYPE_UINT8:
+ return data_view->data.as_uint8[i];
+ case NANOARROW_TYPE_DOUBLE:
+ return data_view->data.as_double[i];
+ case NANOARROW_TYPE_FLOAT:
+ return data_view->data.as_float[i];
+ case NANOARROW_TYPE_BOOL:
+ return ArrowBitGet(data_view->data.as_uint8, i);
+ default:
+ return INT64_MAX;
+ }
+}
+
+static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view,
+ int64_t i) {
+ i += array_view->array->offset;
+ struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_INT64:
+ return data_view->data.as_int64[i];
+ case NANOARROW_TYPE_UINT64:
+ return data_view->data.as_uint64[i];
+ case NANOARROW_TYPE_INT32:
+ return data_view->data.as_int32[i];
+ case NANOARROW_TYPE_UINT32:
+ return data_view->data.as_uint32[i];
+ case NANOARROW_TYPE_INT16:
+ return data_view->data.as_int16[i];
+ case NANOARROW_TYPE_UINT16:
+ return data_view->data.as_uint16[i];
+ case NANOARROW_TYPE_INT8:
+ return data_view->data.as_int8[i];
+ case NANOARROW_TYPE_UINT8:
+ return data_view->data.as_uint8[i];
+ case NANOARROW_TYPE_DOUBLE:
+ return data_view->data.as_double[i];
+ case NANOARROW_TYPE_FLOAT:
+ return data_view->data.as_float[i];
+ case NANOARROW_TYPE_BOOL:
+ return ArrowBitGet(data_view->data.as_uint8, i);
+ default:
+ return UINT64_MAX;
+ }
+}
+
+static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view,
+ int64_t i) {
+ i += array_view->array->offset;
+ struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_INT64:
+ return data_view->data.as_int64[i];
+ case NANOARROW_TYPE_UINT64:
+ return data_view->data.as_uint64[i];
+ case NANOARROW_TYPE_INT32:
+ return data_view->data.as_int32[i];
+ case NANOARROW_TYPE_UINT32:
+ return data_view->data.as_uint32[i];
+ case NANOARROW_TYPE_INT16:
+ return data_view->data.as_int16[i];
+ case NANOARROW_TYPE_UINT16:
+ return data_view->data.as_uint16[i];
+ case NANOARROW_TYPE_INT8:
+ return data_view->data.as_int8[i];
+ case NANOARROW_TYPE_UINT8:
+ return data_view->data.as_uint8[i];
+ case NANOARROW_TYPE_DOUBLE:
+ return data_view->data.as_double[i];
+ case NANOARROW_TYPE_FLOAT:
+ return data_view->data.as_float[i];
+ case NANOARROW_TYPE_BOOL:
+ return ArrowBitGet(data_view->data.as_uint8, i);
+ default:
+ return DBL_MAX;
+ }
+}
+
+static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
+ struct ArrowArrayView* array_view, int64_t i) {
+ i += array_view->array->offset;
+ struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
+ const char* data_view = array_view->buffer_views[2].data.as_char;
+
+ struct ArrowStringView view;
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ view.data = data_view + offsets_view->data.as_int32[i];
+ view.n_bytes = offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i];
+ break;
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ view.data = data_view + offsets_view->data.as_int64[i];
+ view.n_bytes = offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i];
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ view.n_bytes = array_view->layout.element_size_bits[1] / 8;
+ view.data = array_view->buffer_views[1].data.as_char + (i * view.n_bytes);
+ break;
+ default:
+ view.data = NULL;
+ view.n_bytes = 0;
+ break;
+ }
+
+ return view;
+}
+
+static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
+ struct ArrowArrayView* array_view, int64_t i) {
+ i += array_view->array->offset;
+ struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
+ const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8;
+
+ struct ArrowBufferView view;
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ view.n_bytes = offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i];
+ view.data.as_uint8 = data_view + offsets_view->data.as_int32[i];
+ break;
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ view.n_bytes = offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i];
+ view.data.as_uint8 = data_view + offsets_view->data.as_int64[i];
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ view.n_bytes = array_view->layout.element_size_bits[1] / 8;
+ view.data.as_uint8 = array_view->buffer_views[1].data.as_uint8 + (i * view.n_bytes);
+ break;
+ default:
+ view.data.data = NULL;
+ view.n_bytes = 0;
+ break;
+ }
+
+ return view;
+}
#ifdef __cplusplus
}
diff --git a/c/vendor/nanoarrow/schema.c b/c/vendor/nanoarrow/schema.c
deleted file mode 100644
index 9833c4e..0000000
--- a/c/vendor/nanoarrow/schema.c
+++ /dev/null
@@ -1,466 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "nanoarrow.h"
-
-void ArrowSchemaRelease(struct ArrowSchema* schema) {
- if (schema->format != NULL) ArrowFree((void*)schema->format);
- if (schema->name != NULL) ArrowFree((void*)schema->name);
- if (schema->metadata != NULL) ArrowFree((void*)schema->metadata);
-
- // This object owns the memory for all the children, but those
- // children may have been generated elsewhere and might have
- // their own release() callback.
- if (schema->children != NULL) {
- for (int64_t i = 0; i < schema->n_children; i++) {
- if (schema->children[i] != NULL) {
- if (schema->children[i]->release != NULL) {
- schema->children[i]->release(schema->children[i]);
- }
-
- ArrowFree(schema->children[i]);
- }
- }
-
- ArrowFree(schema->children);
- }
-
- // This object owns the memory for the dictionary but it
- // may have been generated somewhere else and have its own
- // release() callback.
- if (schema->dictionary != NULL) {
- if (schema->dictionary->release != NULL) {
- schema->dictionary->release(schema->dictionary);
- }
-
- ArrowFree(schema->dictionary);
- }
-
- // private data not currently used
- if (schema->private_data != NULL) {
- ArrowFree(schema->private_data);
- }
-
- schema->release = NULL;
-}
-
-const char* ArrowSchemaFormatTemplate(enum ArrowType data_type) {
- switch (data_type) {
- case NANOARROW_TYPE_UNINITIALIZED:
- return NULL;
- case NANOARROW_TYPE_NA:
- return "n";
- case NANOARROW_TYPE_BOOL:
- return "b";
-
- case NANOARROW_TYPE_UINT8:
- return "C";
- case NANOARROW_TYPE_INT8:
- return "c";
- case NANOARROW_TYPE_UINT16:
- return "S";
- case NANOARROW_TYPE_INT16:
- return "s";
- case NANOARROW_TYPE_UINT32:
- return "I";
- case NANOARROW_TYPE_INT32:
- return "i";
- case NANOARROW_TYPE_UINT64:
- return "L";
- case NANOARROW_TYPE_INT64:
- return "l";
-
- case NANOARROW_TYPE_HALF_FLOAT:
- return "e";
- case NANOARROW_TYPE_FLOAT:
- return "f";
- case NANOARROW_TYPE_DOUBLE:
- return "g";
-
- case NANOARROW_TYPE_STRING:
- return "u";
- case NANOARROW_TYPE_LARGE_STRING:
- return "U";
- case NANOARROW_TYPE_BINARY:
- return "z";
- case NANOARROW_TYPE_LARGE_BINARY:
- return "Z";
-
- case NANOARROW_TYPE_DATE32:
- return "tdD";
- case NANOARROW_TYPE_DATE64:
- return "tdm";
- case NANOARROW_TYPE_INTERVAL_MONTHS:
- return "tiM";
- case NANOARROW_TYPE_INTERVAL_DAY_TIME:
- return "tiD";
- case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
- return "tin";
-
- case NANOARROW_TYPE_LIST:
- return "+l";
- case NANOARROW_TYPE_LARGE_LIST:
- return "+L";
- case NANOARROW_TYPE_STRUCT:
- return "+s";
- case NANOARROW_TYPE_MAP:
- return "+m";
-
- default:
- return NULL;
- }
-}
-
-ArrowErrorCode ArrowSchemaInit(struct ArrowSchema* schema, enum ArrowType data_type) {
- schema->format = NULL;
- schema->name = NULL;
- schema->metadata = NULL;
- schema->flags = ARROW_FLAG_NULLABLE;
- schema->n_children = 0;
- schema->children = NULL;
- schema->dictionary = NULL;
- schema->private_data = NULL;
- schema->release = &ArrowSchemaRelease;
-
- // We don't allocate the dictionary because it has to be nullptr
- // for non-dictionary-encoded arrays.
-
- // Set the format to a valid format string for data_type
- const char* template_format = ArrowSchemaFormatTemplate(data_type);
-
- // If data_type isn't recognized and not explicitly unset
- if (template_format == NULL && data_type != NANOARROW_TYPE_UNINITIALIZED) {
- schema->release(schema);
- return EINVAL;
- }
-
- int result = ArrowSchemaSetFormat(schema, template_format);
- if (result != NANOARROW_OK) {
- schema->release(schema);
- return result;
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowSchemaInitFixedSize(struct ArrowSchema* schema,
- enum ArrowType data_type, int32_t fixed_size) {
- NANOARROW_RETURN_NOT_OK(ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED));
-
- if (fixed_size <= 0) {
- schema->release(schema);
- return EINVAL;
- }
-
- char buffer[64];
- int n_chars;
- switch (data_type) {
- case NANOARROW_TYPE_FIXED_SIZE_BINARY:
- n_chars = snprintf(buffer, sizeof(buffer), "w:%d", (int)fixed_size);
- break;
- case NANOARROW_TYPE_FIXED_SIZE_LIST:
- n_chars = snprintf(buffer, sizeof(buffer), "+w:%d", (int)fixed_size);
- break;
- default:
- schema->release(schema);
- return EINVAL;
- }
-
- buffer[n_chars] = '\0';
- int result = ArrowSchemaSetFormat(schema, buffer);
- if (result != NANOARROW_OK) {
- schema->release(schema);
- return result;
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowSchemaInitDecimal(struct ArrowSchema* schema,
- enum ArrowType data_type, int32_t decimal_precision,
- int32_t decimal_scale) {
- NANOARROW_RETURN_NOT_OK(ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED));
-
- if (decimal_precision <= 0) {
- schema->release(schema);
- return EINVAL;
- }
-
- char buffer[64];
- int n_chars;
- switch (data_type) {
- case NANOARROW_TYPE_DECIMAL128:
- n_chars =
- snprintf(buffer, sizeof(buffer), "d:%d,%d", decimal_precision, decimal_scale);
- break;
- case NANOARROW_TYPE_DECIMAL256:
- n_chars = snprintf(buffer, sizeof(buffer), "d:%d,%d,256", decimal_precision,
- decimal_scale);
- break;
- default:
- schema->release(schema);
- return EINVAL;
- }
-
- buffer[n_chars] = '\0';
-
- int result = ArrowSchemaSetFormat(schema, buffer);
- if (result != NANOARROW_OK) {
- schema->release(schema);
- return result;
- }
-
- return NANOARROW_OK;
-}
-
-static const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) {
- switch (time_unit) {
- case NANOARROW_TIME_UNIT_SECOND:
- return "s";
- case NANOARROW_TIME_UNIT_MILLI:
- return "m";
- case NANOARROW_TIME_UNIT_MICRO:
- return "u";
- case NANOARROW_TIME_UNIT_NANO:
- return "n";
- default:
- return NULL;
- }
-}
-
-ArrowErrorCode ArrowSchemaInitDateTime(struct ArrowSchema* schema,
- enum ArrowType data_type,
- enum ArrowTimeUnit time_unit,
- const char* timezone) {
- int result = ArrowSchemaInit(schema, NANOARROW_TYPE_UNINITIALIZED);
- if (result != NANOARROW_OK) {
- return result;
- }
-
- const char* time_unit_str = ArrowTimeUnitString(time_unit);
- if (time_unit_str == NULL) {
- schema->release(schema);
- return EINVAL;
- }
-
- char buffer[128];
- int n_chars;
- switch (data_type) {
- case NANOARROW_TYPE_TIME32:
- case NANOARROW_TYPE_TIME64:
- if (timezone != NULL) {
- schema->release(schema);
- return EINVAL;
- }
- n_chars = snprintf(buffer, sizeof(buffer), "tt%s", time_unit_str);
- break;
- case NANOARROW_TYPE_TIMESTAMP:
- if (timezone == NULL) {
- timezone = "";
- }
- n_chars = snprintf(buffer, sizeof(buffer), "ts%s:%s", time_unit_str, timezone);
- break;
- case NANOARROW_TYPE_DURATION:
- if (timezone != NULL) {
- schema->release(schema);
- return EINVAL;
- }
- n_chars = snprintf(buffer, sizeof(buffer), "tD%s", time_unit_str);
- break;
- default:
- schema->release(schema);
- return EINVAL;
- }
-
- if (n_chars >= sizeof(buffer)) {
- schema->release(schema);
- return ERANGE;
- }
-
- buffer[n_chars] = '\0';
-
- result = ArrowSchemaSetFormat(schema, buffer);
- if (result != NANOARROW_OK) {
- schema->release(schema);
- return result;
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format) {
- if (schema->format != NULL) {
- ArrowFree((void*)schema->format);
- }
-
- if (format != NULL) {
- size_t format_size = strlen(format) + 1;
- schema->format = (const char*)ArrowMalloc(format_size);
- if (schema->format == NULL) {
- return ENOMEM;
- }
-
- memcpy((void*)schema->format, format, format_size);
- } else {
- schema->format = NULL;
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name) {
- if (schema->name != NULL) {
- ArrowFree((void*)schema->name);
- }
-
- if (name != NULL) {
- size_t name_size = strlen(name) + 1;
- schema->name = (const char*)ArrowMalloc(name_size);
- if (schema->name == NULL) {
- return ENOMEM;
- }
-
- memcpy((void*)schema->name, name, name_size);
- } else {
- schema->name = NULL;
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata) {
- if (schema->metadata != NULL) {
- ArrowFree((void*)schema->metadata);
- }
-
- if (metadata != NULL) {
- size_t metadata_size = ArrowMetadataSizeOf(metadata);
- schema->metadata = (const char*)ArrowMalloc(metadata_size);
- if (schema->metadata == NULL) {
- return ENOMEM;
- }
-
- memcpy((void*)schema->metadata, metadata, metadata_size);
- } else {
- schema->metadata = NULL;
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema,
- int64_t n_children) {
- if (schema->children != NULL) {
- return EEXIST;
- }
-
- if (n_children > 0) {
- schema->children =
- (struct ArrowSchema**)ArrowMalloc(n_children * sizeof(struct ArrowSchema*));
-
- if (schema->children == NULL) {
- return ENOMEM;
- }
-
- schema->n_children = n_children;
-
- memset(schema->children, 0, n_children * sizeof(struct ArrowSchema*));
-
- for (int64_t i = 0; i < n_children; i++) {
- schema->children[i] = (struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema));
-
- if (schema->children[i] == NULL) {
- return ENOMEM;
- }
-
- schema->children[i]->release = NULL;
- }
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema) {
- if (schema->dictionary != NULL) {
- return EEXIST;
- }
-
- schema->dictionary = (struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema));
- if (schema->dictionary == NULL) {
- return ENOMEM;
- }
-
- schema->dictionary->release = NULL;
- return NANOARROW_OK;
-}
-
-int ArrowSchemaDeepCopy(struct ArrowSchema* schema, struct ArrowSchema* schema_out) {
- NANOARROW_RETURN_NOT_OK(ArrowSchemaInit(schema_out, NANOARROW_TYPE_NA));
-
- int result = ArrowSchemaSetFormat(schema_out, schema->format);
- if (result != NANOARROW_OK) {
- schema_out->release(schema_out);
- return result;
- }
-
- result = ArrowSchemaSetName(schema_out, schema->name);
- if (result != NANOARROW_OK) {
- schema_out->release(schema_out);
- return result;
- }
-
- result = ArrowSchemaSetMetadata(schema_out, schema->metadata);
- if (result != NANOARROW_OK) {
- schema_out->release(schema_out);
- return result;
- }
-
- result = ArrowSchemaAllocateChildren(schema_out, schema->n_children);
- if (result != NANOARROW_OK) {
- schema_out->release(schema_out);
- return result;
- }
-
- for (int64_t i = 0; i < schema->n_children; i++) {
- result = ArrowSchemaDeepCopy(schema->children[i], schema_out->children[i]);
- if (result != NANOARROW_OK) {
- schema_out->release(schema_out);
- return result;
- }
- }
-
- if (schema->dictionary != NULL) {
- result = ArrowSchemaAllocateDictionary(schema_out);
- if (result != NANOARROW_OK) {
- schema_out->release(schema_out);
- return result;
- }
-
- result = ArrowSchemaDeepCopy(schema->dictionary, schema_out->dictionary);
- if (result != NANOARROW_OK) {
- schema_out->release(schema_out);
- return result;
- }
- }
-
- return NANOARROW_OK;
-}
diff --git a/c/vendor/nanoarrow/schema_view.c b/c/vendor/nanoarrow/schema_view.c
deleted file mode 100644
index 691f737..0000000
--- a/c/vendor/nanoarrow/schema_view.c
+++ /dev/null
@@ -1,638 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <errno.h>
-#include <string.h>
-
-#include "nanoarrow.h"
-
-static void ArrowSchemaViewSetPrimitive(struct ArrowSchemaView* schema_view,
- enum ArrowType data_type) {
- schema_view->data_type = data_type;
- schema_view->storage_data_type = data_type;
-}
-
-static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
- const char* format,
- const char** format_end_out,
- struct ArrowError* error) {
- *format_end_out = format;
-
- // needed for decimal parsing
- const char* parse_start;
- char* parse_end;
-
- switch (format[0]) {
- case 'n':
- schema_view->data_type = NANOARROW_TYPE_NA;
- schema_view->storage_data_type = NANOARROW_TYPE_NA;
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 'b':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_BOOL);
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 'c':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT8);
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 'C':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT8);
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 's':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT16);
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 'S':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT16);
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 'i':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 'I':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT32);
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 'l':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 'L':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_UINT64);
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 'e':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_HALF_FLOAT);
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 'f':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_FLOAT);
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 'g':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DOUBLE);
- *format_end_out = format + 1;
- return NANOARROW_OK;
-
- // decimal
- case 'd':
- if (format[1] != ':' || format[2] == '\0') {
- ArrowErrorSet(error, "Expected ':precision,scale[,bitwidth]' following 'd'",
- format + 3);
- return EINVAL;
- }
-
- parse_start = format + 2;
- schema_view->decimal_precision = strtol(parse_start, &parse_end, 10);
- if (parse_end == parse_start || parse_end[0] != ',') {
- ArrowErrorSet(error, "Expected 'precision,scale[,bitwidth]' following 'd:'");
- return EINVAL;
- }
-
- parse_start = parse_end + 1;
- schema_view->decimal_scale = strtol(parse_start, &parse_end, 10);
- if (parse_end == parse_start) {
- ArrowErrorSet(error, "Expected 'scale[,bitwidth]' following 'd:precision,'");
- return EINVAL;
- } else if (parse_end[0] != ',') {
- schema_view->decimal_bitwidth = 128;
- } else {
- parse_start = parse_end + 1;
- schema_view->decimal_bitwidth = strtol(parse_start, &parse_end, 10);
- if (parse_start == parse_end) {
- ArrowErrorSet(error, "Expected precision following 'd:precision,scale,'");
- return EINVAL;
- }
- }
-
- *format_end_out = parse_end;
-
- switch (schema_view->decimal_bitwidth) {
- case 128:
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL128);
- return NANOARROW_OK;
- case 256:
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL256);
- return NANOARROW_OK;
- default:
- ArrowErrorSet(error, "Expected decimal bitwidth of 128 or 256 but found %d",
- (int)schema_view->decimal_bitwidth);
- return EINVAL;
- }
-
- // validity + data
- case 'w':
- schema_view->data_type = NANOARROW_TYPE_FIXED_SIZE_BINARY;
- schema_view->storage_data_type = NANOARROW_TYPE_FIXED_SIZE_BINARY;
- if (format[1] != ':' || format[2] == '\0') {
- ArrowErrorSet(error, "Expected ':<width>' following 'w'");
- return EINVAL;
- }
-
- schema_view->fixed_size = strtol(format + 2, (char**)format_end_out, 10);
- return NANOARROW_OK;
-
- // validity + offset + data
- case 'z':
- schema_view->data_type = NANOARROW_TYPE_BINARY;
- schema_view->storage_data_type = NANOARROW_TYPE_BINARY;
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 'u':
- schema_view->data_type = NANOARROW_TYPE_STRING;
- schema_view->storage_data_type = NANOARROW_TYPE_STRING;
- *format_end_out = format + 1;
- return NANOARROW_OK;
-
- // validity + large_offset + data
- case 'Z':
- schema_view->data_type = NANOARROW_TYPE_LARGE_BINARY;
- schema_view->storage_data_type = NANOARROW_TYPE_LARGE_BINARY;
- *format_end_out = format + 1;
- return NANOARROW_OK;
- case 'U':
- schema_view->data_type = NANOARROW_TYPE_LARGE_STRING;
- schema_view->storage_data_type = NANOARROW_TYPE_LARGE_STRING;
- *format_end_out = format + 1;
- return NANOARROW_OK;
-
- // nested types
- case '+':
- switch (format[1]) {
- // list has validity + offset or offset
- case 'l':
- schema_view->storage_data_type = NANOARROW_TYPE_LIST;
- schema_view->data_type = NANOARROW_TYPE_LIST;
- *format_end_out = format + 2;
- return NANOARROW_OK;
-
- // large list has validity + large_offset or large_offset
- case 'L':
- schema_view->storage_data_type = NANOARROW_TYPE_LARGE_LIST;
- schema_view->data_type = NANOARROW_TYPE_LARGE_LIST;
- *format_end_out = format + 2;
- return NANOARROW_OK;
-
- // just validity buffer
- case 'w':
- if (format[2] != ':' || format[3] == '\0') {
- ArrowErrorSet(error, "Expected ':<width>' following '+w'");
- return EINVAL;
- }
-
- schema_view->storage_data_type = NANOARROW_TYPE_FIXED_SIZE_LIST;
- schema_view->data_type = NANOARROW_TYPE_FIXED_SIZE_LIST;
- schema_view->fixed_size = strtol(format + 3, (char**)format_end_out, 10);
- return NANOARROW_OK;
- case 's':
- schema_view->storage_data_type = NANOARROW_TYPE_STRUCT;
- schema_view->data_type = NANOARROW_TYPE_STRUCT;
- *format_end_out = format + 2;
- return NANOARROW_OK;
- case 'm':
- schema_view->storage_data_type = NANOARROW_TYPE_MAP;
- schema_view->data_type = NANOARROW_TYPE_MAP;
- *format_end_out = format + 2;
- return NANOARROW_OK;
-
- // unions
- case 'u':
- switch (format[2]) {
- case 'd':
- schema_view->storage_data_type = NANOARROW_TYPE_DENSE_UNION;
- schema_view->data_type = NANOARROW_TYPE_DENSE_UNION;
- break;
- case 's':
- schema_view->storage_data_type = NANOARROW_TYPE_SPARSE_UNION;
- schema_view->data_type = NANOARROW_TYPE_SPARSE_UNION;
- break;
- default:
- ArrowErrorSet(error,
- "Expected union format string +us:<type_ids> or "
- "+ud:<type_ids> but found '%s'",
- format);
- return EINVAL;
- }
-
- if (format[3] == ':') {
- schema_view->union_type_ids.data = format + 4;
- schema_view->union_type_ids.n_bytes = strlen(format + 4);
- *format_end_out = format + strlen(format);
- return NANOARROW_OK;
- } else {
- ArrowErrorSet(error,
- "Expected union format string +us:<type_ids> or +ud:<type_ids> "
- "but found '%s'",
- format);
- return EINVAL;
- }
- }
-
- // date/time types
- case 't':
- switch (format[1]) {
- // date
- case 'd':
- switch (format[2]) {
- case 'D':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
- schema_view->data_type = NANOARROW_TYPE_DATE32;
- *format_end_out = format + 3;
- return NANOARROW_OK;
- case 'm':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
- schema_view->data_type = NANOARROW_TYPE_DATE64;
- *format_end_out = format + 3;
- return NANOARROW_OK;
- default:
- ArrowErrorSet(error, "Expected 'D' or 'm' following 'td' but found '%s'",
- format + 2);
- return EINVAL;
- }
-
- // time of day
- case 't':
- switch (format[2]) {
- case 's':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
- schema_view->data_type = NANOARROW_TYPE_TIME32;
- schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
- *format_end_out = format + 3;
- return NANOARROW_OK;
- case 'm':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
- schema_view->data_type = NANOARROW_TYPE_TIME32;
- schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
- *format_end_out = format + 3;
- return NANOARROW_OK;
- case 'u':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
- schema_view->data_type = NANOARROW_TYPE_TIME64;
- schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
- *format_end_out = format + 3;
- return NANOARROW_OK;
- case 'n':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
- schema_view->data_type = NANOARROW_TYPE_TIME64;
- schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
- *format_end_out = format + 3;
- return NANOARROW_OK;
- default:
- ArrowErrorSet(
- error, "Expected 's', 'm', 'u', or 'n' following 'tt' but found '%s'",
- format + 2);
- return EINVAL;
- }
-
- // timestamp
- case 's':
- switch (format[2]) {
- case 's':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
- schema_view->data_type = NANOARROW_TYPE_TIMESTAMP;
- schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
- break;
- case 'm':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
- schema_view->data_type = NANOARROW_TYPE_TIMESTAMP;
- schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
- break;
- case 'u':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
- schema_view->data_type = NANOARROW_TYPE_TIMESTAMP;
- schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
- break;
- case 'n':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
- schema_view->data_type = NANOARROW_TYPE_TIMESTAMP;
- schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
- break;
- default:
- ArrowErrorSet(
- error, "Expected 's', 'm', 'u', or 'n' following 'ts' but found '%s'",
- format + 2);
- return EINVAL;
- }
-
- if (format[3] != ':') {
- ArrowErrorSet(error, "Expected ':' following '%.3s' but found '%s'", format,
- format + 3);
- return EINVAL;
- }
-
- schema_view->timezone.data = format + 4;
- schema_view->timezone.n_bytes = strlen(format + 4);
- *format_end_out = format + strlen(format);
- return NANOARROW_OK;
-
- // duration
- case 'D':
- switch (format[2]) {
- case 's':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
- schema_view->data_type = NANOARROW_TYPE_DURATION;
- schema_view->time_unit = NANOARROW_TIME_UNIT_SECOND;
- *format_end_out = format + 3;
- return NANOARROW_OK;
- case 'm':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT32);
- schema_view->data_type = NANOARROW_TYPE_DURATION;
- schema_view->time_unit = NANOARROW_TIME_UNIT_MILLI;
- *format_end_out = format + 3;
- return NANOARROW_OK;
- case 'u':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
- schema_view->data_type = NANOARROW_TYPE_DURATION;
- schema_view->time_unit = NANOARROW_TIME_UNIT_MICRO;
- *format_end_out = format + 3;
- return NANOARROW_OK;
- case 'n':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INT64);
- schema_view->data_type = NANOARROW_TYPE_DURATION;
- schema_view->time_unit = NANOARROW_TIME_UNIT_NANO;
- *format_end_out = format + 3;
- return NANOARROW_OK;
- default:
- ArrowErrorSet(error,
- "Expected 's', 'm', u', or 'n' following 'tD' but found '%s'",
- format + 2);
- return EINVAL;
- }
-
- // interval
- case 'i':
- switch (format[2]) {
- case 'M':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_MONTHS);
- *format_end_out = format + 3;
- return NANOARROW_OK;
- case 'D':
- ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_INTERVAL_DAY_TIME);
- *format_end_out = format + 3;
- return NANOARROW_OK;
- case 'n':
- ArrowSchemaViewSetPrimitive(schema_view,
- NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO);
- *format_end_out = format + 3;
- return NANOARROW_OK;
- default:
- ArrowErrorSet(error,
- "Expected 'M', 'D', or 'n' following 'ti' but found '%s'",
- format + 2);
- return EINVAL;
- }
-
- default:
- ArrowErrorSet(
- error, "Expected 'd', 't', 's', 'D', or 'i' following 't' but found '%s'",
- format + 1);
- return EINVAL;
- }
-
- default:
- ArrowErrorSet(error, "Unknown format: '%s'", format);
- return EINVAL;
- }
-}
-
-static ArrowErrorCode ArrowSchemaViewValidateNChildren(
- struct ArrowSchemaView* schema_view, int64_t n_children, struct ArrowError* error) {
- if (n_children != -1 && schema_view->schema->n_children != n_children) {
- ArrowErrorSet(error, "Expected schema with %d children but found %d children",
- (int)n_children, (int)schema_view->schema->n_children);
- return EINVAL;
- }
-
- // Don't do a full validation of children but do check that they won't
- // segfault if inspected
- struct ArrowSchema* child;
- for (int64_t i = 0; i < schema_view->schema->n_children; i++) {
- child = schema_view->schema->children[i];
- if (child == NULL) {
- ArrowErrorSet(error, "Expected valid schema at schema->children[%d] but found NULL",
- i);
- return EINVAL;
- } else if (child->release == NULL) {
- ArrowErrorSet(
- error,
- "Expected valid schema at schema->children[%d] but found a released schema", i);
- return EINVAL;
- }
- }
-
- return NANOARROW_OK;
-}
-
-static ArrowErrorCode ArrowSchemaViewValidateUnion(struct ArrowSchemaView* schema_view,
- struct ArrowError* error) {
- return ArrowSchemaViewValidateNChildren(schema_view, -1, error);
-}
-
-static ArrowErrorCode ArrowSchemaViewValidateMap(struct ArrowSchemaView* schema_view,
- struct ArrowError* error) {
- NANOARROW_RETURN_NOT_OK(ArrowSchemaViewValidateNChildren(schema_view, 1, error));
-
- if (schema_view->schema->children[0]->n_children != 2) {
- ArrowErrorSet(error, "Expected child of map type to have 2 children but found %d",
- (int)schema_view->schema->children[0]->n_children);
- return EINVAL;
- }
-
- if (strcmp(schema_view->schema->children[0]->format, "+s") != 0) {
- ArrowErrorSet(error, "Expected format of child of map type to be '+s' but found '%s'",
- schema_view->schema->children[0]->format);
- return EINVAL;
- }
-
- return NANOARROW_OK;
-}
-
-static ArrowErrorCode ArrowSchemaViewValidateDictionary(
- struct ArrowSchemaView* schema_view, struct ArrowError* error) {
- // check for valid index type
- switch (schema_view->storage_data_type) {
- case NANOARROW_TYPE_UINT8:
- case NANOARROW_TYPE_INT8:
- case NANOARROW_TYPE_UINT16:
- case NANOARROW_TYPE_INT16:
- case NANOARROW_TYPE_UINT32:
- case NANOARROW_TYPE_INT32:
- case NANOARROW_TYPE_UINT64:
- case NANOARROW_TYPE_INT64:
- break;
- default:
- ArrowErrorSet(
- error,
- "Expected dictionary schema index type to be an integral type but found '%s'",
- schema_view->schema->format);
- return EINVAL;
- }
-
- struct ArrowSchemaView dictionary_schema_view;
- return ArrowSchemaViewInit(&dictionary_schema_view, schema_view->schema->dictionary,
- error);
-}
-
-static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_view,
- enum ArrowType data_type,
- struct ArrowError* error) {
- switch (data_type) {
- case NANOARROW_TYPE_NA:
- case NANOARROW_TYPE_BOOL:
- case NANOARROW_TYPE_UINT8:
- case NANOARROW_TYPE_INT8:
- case NANOARROW_TYPE_UINT16:
- case NANOARROW_TYPE_INT16:
- case NANOARROW_TYPE_UINT32:
- case NANOARROW_TYPE_INT32:
- case NANOARROW_TYPE_UINT64:
- case NANOARROW_TYPE_INT64:
- case NANOARROW_TYPE_HALF_FLOAT:
- case NANOARROW_TYPE_FLOAT:
- case NANOARROW_TYPE_DOUBLE:
- case NANOARROW_TYPE_DECIMAL128:
- case NANOARROW_TYPE_DECIMAL256:
- case NANOARROW_TYPE_STRING:
- case NANOARROW_TYPE_LARGE_STRING:
- case NANOARROW_TYPE_BINARY:
- case NANOARROW_TYPE_LARGE_BINARY:
- case NANOARROW_TYPE_DATE32:
- case NANOARROW_TYPE_DATE64:
- case NANOARROW_TYPE_INTERVAL_MONTHS:
- case NANOARROW_TYPE_INTERVAL_DAY_TIME:
- case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
- case NANOARROW_TYPE_TIMESTAMP:
- case NANOARROW_TYPE_TIME32:
- case NANOARROW_TYPE_TIME64:
- case NANOARROW_TYPE_DURATION:
- return ArrowSchemaViewValidateNChildren(schema_view, 0, error);
-
- case NANOARROW_TYPE_FIXED_SIZE_BINARY:
- if (schema_view->fixed_size <= 0) {
- ArrowErrorSet(error, "Expected size > 0 for fixed size binary but found size %d",
- schema_view->fixed_size);
- return EINVAL;
- }
- return ArrowSchemaViewValidateNChildren(schema_view, 0, error);
-
- case NANOARROW_TYPE_LIST:
- case NANOARROW_TYPE_LARGE_LIST:
- case NANOARROW_TYPE_FIXED_SIZE_LIST:
- return ArrowSchemaViewValidateNChildren(schema_view, 1, error);
-
- case NANOARROW_TYPE_STRUCT:
- return ArrowSchemaViewValidateNChildren(schema_view, -1, error);
-
- case NANOARROW_TYPE_SPARSE_UNION:
- case NANOARROW_TYPE_DENSE_UNION:
- return ArrowSchemaViewValidateUnion(schema_view, error);
-
- case NANOARROW_TYPE_MAP:
- return ArrowSchemaViewValidateMap(schema_view, error);
-
- case NANOARROW_TYPE_DICTIONARY:
- return ArrowSchemaViewValidateDictionary(schema_view, error);
-
- default:
- ArrowErrorSet(error, "Expected a valid enum ArrowType value but found %d",
- (int)schema_view->data_type);
- return EINVAL;
- }
-
- return NANOARROW_OK;
-}
-
-ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
- struct ArrowSchema* schema, struct ArrowError* error) {
- if (schema == NULL) {
- ArrowErrorSet(error, "Expected non-NULL schema");
- return EINVAL;
- }
-
- if (schema->release == NULL) {
- ArrowErrorSet(error, "Expected non-released schema");
- return EINVAL;
- }
-
- schema_view->schema = schema;
-
- const char* format = schema->format;
- if (format == NULL) {
- ArrowErrorSet(
- error,
- "Error parsing schema->format: Expected a null-terminated string but found NULL");
- return EINVAL;
- }
-
- int format_len = strlen(format);
- if (format_len == 0) {
- ArrowErrorSet(error, "Error parsing schema->format: Expected a string with size > 0");
- return EINVAL;
- }
-
- const char* format_end_out;
- ArrowErrorCode result =
- ArrowSchemaViewParse(schema_view, format, &format_end_out, error);
-
- if (result != NANOARROW_OK) {
- char child_error[1024];
- memcpy(child_error, ArrowErrorMessage(error), 1024);
- ArrowErrorSet(error, "Error parsing schema->format: %s", child_error);
- return result;
- }
-
- if ((format + format_len) != format_end_out) {
- ArrowErrorSet(error, "Error parsing schema->format '%s': parsed %d/%d characters",
- format, (int)(format_end_out - format), (int)(format_len));
- return EINVAL;
- }
-
- if (schema->dictionary != NULL) {
- schema_view->data_type = NANOARROW_TYPE_DICTIONARY;
- }
-
- result = ArrowSchemaViewValidate(schema_view, schema_view->storage_data_type, error);
- if (result != NANOARROW_OK) {
- return result;
- }
-
- if (schema_view->storage_data_type != schema_view->data_type) {
- result = ArrowSchemaViewValidate(schema_view, schema_view->data_type, error);
- if (result != NANOARROW_OK) {
- return result;
- }
- }
-
- ArrowLayoutInit(&schema_view->layout, schema_view->storage_data_type);
- if (schema_view->storage_data_type == NANOARROW_TYPE_FIXED_SIZE_BINARY) {
- schema_view->layout.element_size_bits[1] = schema_view->fixed_size * 8;
- } else if (schema_view->storage_data_type == NANOARROW_TYPE_FIXED_SIZE_LIST) {
- schema_view->layout.child_size_elements = schema_view->fixed_size;
- }
-
- schema_view->extension_name = ArrowCharView(NULL);
- schema_view->extension_metadata = ArrowCharView(NULL);
- ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:name"),
- &schema_view->extension_name);
- ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:metadata"),
- &schema_view->extension_metadata);
-
- return NANOARROW_OK;
-}
diff --git a/c/vendor/nanoarrow/typedefs_inline.h b/c/vendor/nanoarrow/typedefs_inline.h
deleted file mode 100644
index 47e2892..0000000
--- a/c/vendor/nanoarrow/typedefs_inline.h
+++ /dev/null
@@ -1,310 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef NANOARROW_TYPEDEFS_INLINE_H_INCLUDED
-#define NANOARROW_TYPEDEFS_INLINE_H_INCLUDED
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/// \defgroup nanoarrow-inline-typedef Type definitions used in inlined implementations
-
-// Extra guard for versions of Arrow without the canonical guard
-#ifndef ARROW_FLAG_DICTIONARY_ORDERED
-
-#ifndef ARROW_C_DATA_INTERFACE
-#define ARROW_C_DATA_INTERFACE
-
-#define ARROW_FLAG_DICTIONARY_ORDERED 1
-#define ARROW_FLAG_NULLABLE 2
-#define ARROW_FLAG_MAP_KEYS_SORTED 4
-
-struct ArrowSchema {
- // Array type description
- const char* format;
- const char* name;
- const char* metadata;
- int64_t flags;
- int64_t n_children;
- struct ArrowSchema** children;
- struct ArrowSchema* dictionary;
-
- // Release callback
- void (*release)(struct ArrowSchema*);
- // Opaque producer-specific data
- void* private_data;
-};
-
-struct ArrowArray {
- // Array data description
- int64_t length;
- int64_t null_count;
- int64_t offset;
- int64_t n_buffers;
- int64_t n_children;
- const void** buffers;
- struct ArrowArray** children;
- struct ArrowArray* dictionary;
-
- // Release callback
- void (*release)(struct ArrowArray*);
- // Opaque producer-specific data
- void* private_data;
-};
-
-#endif // ARROW_C_DATA_INTERFACE
-
-#ifndef ARROW_C_STREAM_INTERFACE
-#define ARROW_C_STREAM_INTERFACE
-
-struct ArrowArrayStream {
- // Callback to get the stream type
- // (will be the same for all arrays in the stream).
- //
- // Return value: 0 if successful, an `errno`-compatible error code otherwise.
- //
- // If successful, the ArrowSchema must be released independently from the stream.
- int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
-
- // Callback to get the next array
- // (if no error and the array is released, the stream has ended)
- //
- // Return value: 0 if successful, an `errno`-compatible error code otherwise.
- //
- // If successful, the ArrowArray must be released independently from the stream.
- int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
-
- // Callback to get optional detailed error information.
- // This must only be called if the last stream operation failed
- // with a non-0 return code.
- //
- // Return value: pointer to a null-terminated character array describing
- // the last error, or NULL if no description is available.
- //
- // The returned pointer is only valid until the next operation on this stream
- // (including release).
- const char* (*get_last_error)(struct ArrowArrayStream*);
-
- // Release callback: release the stream's own resources.
- // Note that arrays returned by `get_next` must be individually released.
- void (*release)(struct ArrowArrayStream*);
-
- // Opaque producer-specific data
- void* private_data;
-};
-
-#endif // ARROW_C_STREAM_INTERFACE
-#endif // ARROW_FLAG_DICTIONARY_ORDERED
-
-/// \brief Return code for success.
-#define NANOARROW_OK 0
-
-/// \brief Represents an errno-compatible error code
-typedef int ArrowErrorCode;
-
-/// \brief Arrow type enumerator
-///
-/// These names are intended to map to the corresponding arrow::Type::type
-/// enumerator; however, the numeric values are specifically not equal
-/// (i.e., do not rely on numeric comparison).
-enum ArrowType {
- NANOARROW_TYPE_UNINITIALIZED = 0,
- NANOARROW_TYPE_NA = 1,
- NANOARROW_TYPE_BOOL,
- NANOARROW_TYPE_UINT8,
- NANOARROW_TYPE_INT8,
- NANOARROW_TYPE_UINT16,
- NANOARROW_TYPE_INT16,
- NANOARROW_TYPE_UINT32,
- NANOARROW_TYPE_INT32,
- NANOARROW_TYPE_UINT64,
- NANOARROW_TYPE_INT64,
- NANOARROW_TYPE_HALF_FLOAT,
- NANOARROW_TYPE_FLOAT,
- NANOARROW_TYPE_DOUBLE,
- NANOARROW_TYPE_STRING,
- NANOARROW_TYPE_BINARY,
- NANOARROW_TYPE_FIXED_SIZE_BINARY,
- NANOARROW_TYPE_DATE32,
- NANOARROW_TYPE_DATE64,
- NANOARROW_TYPE_TIMESTAMP,
- NANOARROW_TYPE_TIME32,
- NANOARROW_TYPE_TIME64,
- NANOARROW_TYPE_INTERVAL_MONTHS,
- NANOARROW_TYPE_INTERVAL_DAY_TIME,
- NANOARROW_TYPE_DECIMAL128,
- NANOARROW_TYPE_DECIMAL256,
- NANOARROW_TYPE_LIST,
- NANOARROW_TYPE_STRUCT,
- NANOARROW_TYPE_SPARSE_UNION,
- NANOARROW_TYPE_DENSE_UNION,
- NANOARROW_TYPE_DICTIONARY,
- NANOARROW_TYPE_MAP,
- NANOARROW_TYPE_EXTENSION,
- NANOARROW_TYPE_FIXED_SIZE_LIST,
- NANOARROW_TYPE_DURATION,
- NANOARROW_TYPE_LARGE_STRING,
- NANOARROW_TYPE_LARGE_BINARY,
- NANOARROW_TYPE_LARGE_LIST,
- NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
-};
-
-/// \brief Functional types of buffers as described in the Arrow Columnar Specification
-enum ArrowBufferType {
- NANOARROW_BUFFER_TYPE_NONE,
- NANOARROW_BUFFER_TYPE_VALIDITY,
- NANOARROW_BUFFER_TYPE_TYPE_ID,
- NANOARROW_BUFFER_TYPE_UNION_OFFSET,
- NANOARROW_BUFFER_TYPE_DATA_OFFSET,
- NANOARROW_BUFFER_TYPE_DATA
-};
-
-/// \brief A description of an arrangement of buffers
-///
-/// Contains the minimum amount of information required to
-/// calculate the size of each buffer in an ArrowArray knowing only
-/// the length and offset of the array.
-struct ArrowLayout {
- /// \brief The function of each buffer
- enum ArrowBufferType buffer_type[3];
-
- /// \brief The size of an element each buffer or 0 if this size is variable or unknown
- int64_t element_size_bits[3];
-
- /// \brief The number of elements in the child array per element in this array for a
- /// fixed-size list
- int64_t child_size_elements;
-};
-
-/// \brief An non-owning view of a string
-struct ArrowStringView {
- /// \brief A pointer to the start of the string
- ///
- /// If n_bytes is 0, this value may be NULL.
- const char* data;
-
- /// \brief The size of the string in bytes,
- ///
- /// (Not including the null terminator.)
- int64_t n_bytes;
-};
-
-/// \brief An non-owning view of a buffer
-struct ArrowBufferView {
- /// \brief A pointer to the start of the buffer
- ///
- /// If n_bytes is 0, this value may be NULL.
- union {
- const void* data;
- const int8_t* as_int8;
- const uint8_t* as_uint8;
- const int16_t* as_int16;
- const uint16_t* as_uint16;
- const int32_t* as_int32;
- const uint32_t* as_uint32;
- const int64_t* as_int64;
- const uint64_t* as_uint64;
- } data;
-
- /// \brief The size of the buffer in bytes
- int64_t n_bytes;
-};
-
-/// \brief Array buffer allocation and deallocation
-///
-/// Container for allocate, reallocate, and free methods that can be used
-/// to customize allocation and deallocation of buffers when constructing
-/// an ArrowArray.
-struct ArrowBufferAllocator {
- /// \brief Reallocate a buffer or return NULL if it cannot be reallocated
- uint8_t* (*reallocate)(struct ArrowBufferAllocator* allocator, uint8_t* ptr,
- int64_t old_size, int64_t new_size);
-
- /// \brief Deallocate a buffer allocated by this allocator
- void (*free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size);
-
- /// \brief Opaque data specific to the allocator
- void* private_data;
-};
-
-/// \brief An owning mutable view of a buffer
-struct ArrowBuffer {
- /// \brief A pointer to the start of the buffer
- ///
- /// If capacity_bytes is 0, this value may be NULL.
- uint8_t* data;
-
- /// \brief The size of the buffer in bytes
- int64_t size_bytes;
-
- /// \brief The capacity of the buffer in bytes
- int64_t capacity_bytes;
-
- /// \brief The allocator that will be used to reallocate and/or free the buffer
- struct ArrowBufferAllocator allocator;
-};
-
-/// \brief An owning mutable view of a bitmap
-struct ArrowBitmap {
- /// \brief An ArrowBuffer to hold the allocated memory
- struct ArrowBuffer buffer;
-
- /// \brief The number of bits that have been appended to the bitmap
- int64_t size_bits;
-};
-
-// Used as the private data member for ArrowArrays allocated here and accessed
-// internally within inline ArrowArray* helpers.
-struct ArrowArrayPrivateData {
- // Holder for the validity buffer (or first buffer for union types, which are
- // the only type whose first buffer is not a valdiity buffer)
- struct ArrowBitmap bitmap;
-
- // Holder for additional buffers as required
- struct ArrowBuffer buffers[2];
-
- // The array of pointers to buffers. This must be updated after a sequence
- // of appends to synchronize its values with the actual buffer addresses
- // (which may have ben reallocated uring that time)
- const void* buffer_data[3];
-
- // The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
- enum ArrowType storage_type;
-
- // The buffer arrangement for the storage type
- struct ArrowLayout layout;
-};
-
-struct ArrowArrayView {
- struct ArrowArray* array;
- enum ArrowType storage_type;
- struct ArrowLayout layout;
- struct ArrowBufferView buffer_views[3];
- int64_t n_children;
- struct ArrowArrayView** children;
-};
-
-/// }@
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/c/vendor/nanoarrow/utils.c b/c/vendor/nanoarrow/utils.c
deleted file mode 100644
index 74e4560..0000000
--- a/c/vendor/nanoarrow/utils.c
+++ /dev/null
@@ -1,126 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "nanoarrow.h"
-
-void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) {
- layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY;
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
- layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_NONE;
-
- layout->element_size_bits[0] = 1;
- layout->element_size_bits[1] = 0;
- layout->element_size_bits[2] = 0;
-
- layout->child_size_elements = 0;
-
- switch (storage_type) {
- case NANOARROW_TYPE_UNINITIALIZED:
- case NANOARROW_TYPE_NA:
- layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE;
- layout->element_size_bits[0] = 0;
- break;
-
- case NANOARROW_TYPE_LIST:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
- layout->element_size_bits[1] = 32;
- break;
-
- case NANOARROW_TYPE_LARGE_LIST:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
- layout->element_size_bits[1] = 64;
- break;
-
- case NANOARROW_TYPE_BOOL:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
- layout->element_size_bits[1] = 1;
- break;
-
- case NANOARROW_TYPE_UINT8:
- case NANOARROW_TYPE_INT8:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
- layout->element_size_bits[1] = 8;
- break;
-
- case NANOARROW_TYPE_UINT16:
- case NANOARROW_TYPE_INT16:
- case NANOARROW_TYPE_HALF_FLOAT:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
- layout->element_size_bits[1] = 16;
- break;
-
- case NANOARROW_TYPE_UINT32:
- case NANOARROW_TYPE_INT32:
- case NANOARROW_TYPE_FLOAT:
- case NANOARROW_TYPE_INTERVAL_MONTHS:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
- layout->element_size_bits[1] = 32;
- break;
-
- case NANOARROW_TYPE_UINT64:
- case NANOARROW_TYPE_INT64:
- case NANOARROW_TYPE_DOUBLE:
- case NANOARROW_TYPE_INTERVAL_DAY_TIME:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
- layout->element_size_bits[1] = 64;
- break;
-
- case NANOARROW_TYPE_DECIMAL128:
- case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
- layout->element_size_bits[1] = 128;
- break;
-
- case NANOARROW_TYPE_DECIMAL256:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
- layout->element_size_bits[1] = 256;
- break;
-
- case NANOARROW_TYPE_FIXED_SIZE_BINARY:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
- break;
-
- case NANOARROW_TYPE_DENSE_UNION:
- layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
- layout->element_size_bits[0] = 8;
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_UNION_OFFSET;
- layout->element_size_bits[1] = 32;
- break;
-
- case NANOARROW_TYPE_SPARSE_UNION:
- layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
- layout->element_size_bits[0] = 8;
- break;
-
- case NANOARROW_TYPE_STRING:
- case NANOARROW_TYPE_BINARY:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
- layout->element_size_bits[1] = 32;
- layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
- break;
-
- case NANOARROW_TYPE_LARGE_STRING:
- case NANOARROW_TYPE_LARGE_BINARY:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
- layout->element_size_bits[1] = 64;
- layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
- break;
-
- default:
- break;
- }
-}
diff --git a/c/vendor/nanoarrow/utils_inline.h b/c/vendor/nanoarrow/utils_inline.h
deleted file mode 100644
index 3083339..0000000
--- a/c/vendor/nanoarrow/utils_inline.h
+++ /dev/null
@@ -1,62 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef NANOARROW_UTILS_INLINE_H_INCLUDED
-#define NANOARROW_UTILS_INLINE_H_INCLUDED
-
-#include <errno.h>
-#include <string.h>
-
-#include "typedefs_inline.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define _NANOARROW_CONCAT(x, y) x##y
-#define _NANOARROW_MAKE_NAME(x, y) _NANOARROW_CONCAT(x, y)
-
-#define _NANOARROW_RETURN_NOT_OK_IMPL(NAME, EXPR) \
- do { \
- const int NAME = (EXPR); \
- if (NAME) return NAME; \
- } while (0)
-
-#define NANOARROW_RETURN_NOT_OK(EXPR) \
- _NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR)
-
-#define _NANOARROW_CHECK_RANGE(x_, min_, max_) \
- NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL)
-
-static inline struct ArrowStringView ArrowCharView(const char* value) {
- struct ArrowStringView out;
-
- out.data = value;
- if (value) {
- out.n_bytes = (int64_t)strlen(value);
- } else {
- out.n_bytes = 0;
- }
-
- return out;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/c/vendor/vendor_nanoarrow.sh b/c/vendor/vendor_nanoarrow.sh
index 8aeee16..c3a9943 100755
--- a/c/vendor/vendor_nanoarrow.sh
+++ b/c/vendor/vendor_nanoarrow.sh
@@ -21,16 +21,23 @@
main() {
local -r repo_url="https://github.com/apache/arrow-nanoarrow"
local -r commit_sha=$(git ls-remote "$repo_url" HEAD | awk '{print $2}')
- TARBALL="$(pwd)/nanoarrow.tar.gz"
echo "Fetching $commit_sha from $repo_url"
- wget -O "$TARBALL" "$repo_url/archive/$commit_sha.tar.gz"
- trap 'rm "$TARBALL"' EXIT
+ SCRATCH=$(mktemp -d)
+ trap 'rm -rf "$SCRATCH"' EXIT
+ local -r tarball="$SCRATCH/nanoarrow.tar.gz"
+ wget -O "$tarball" "$repo_url/archive/$commit_sha.tar.gz"
+ rm -rf nanoarrow
mkdir -p nanoarrow
- # Keep only the sources
- tar --strip-components 3 -C nanoarrow -xf "$TARBALL"
- rm nanoarrow/*_test.cc
+ tar --strip-components 1 -C "$SCRATCH" -xf "$tarball"
+ mkdir "$SCRATCH/build"
+ pushd "$SCRATCH/build"
+ cmake .. -DNANOARROW_BUNDLE=ON
+ popd
+
+ cp "$SCRATCH/build/amalgamation/nanoarrow/nanoarrow.c" nanoarrow/
+ cp "$SCRATCH/build/amalgamation/nanoarrow/nanoarrow.h" nanoarrow/
}
main "$@"