You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pa...@apache.org on 2022/08/12 18:31:55 UTC
[arrow-nanoarrow] branch main updated: ArrowArray consumer buffer helpers (#19)
This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 0247fa9 ArrowArray consumer buffer helpers (#19)
0247fa9 is described below
commit 0247fa937ed4c5b5ec4ac039089d52bff410d98e
Author: Dewey Dunnington <de...@fishandwhistle.net>
AuthorDate: Fri Aug 12 15:31:50 2022 -0300
ArrowArray consumer buffer helpers (#19)
* add the basic layout struct
* use the layout in the schema view
* some array view sketches
* more plausible array view init
* lifecycle stuff for nested types
* shuffle some things to be not inline
* basic tests
* with passing tests
* with structs working
* documentation fixes
* don't make array view functions inline
* don't use inline function for layout init
* document + test the fixed-size list
* fix a length; improve coverage
---
CMakeLists.txt | 7 +-
src/nanoarrow/array.c | 1 -
src/nanoarrow/array_inline.h | 7 +
src/nanoarrow/array_view.c | 227 ++++++++++++++++++++++++++++
src/nanoarrow/array_view_test.cc | 307 ++++++++++++++++++++++++++++++++++++++
src/nanoarrow/nanoarrow.c | 2 +
src/nanoarrow/nanoarrow.h | 34 +++++
src/nanoarrow/schema_view.c | 7 +
src/nanoarrow/schema_view_test.cc | 118 +++++++++++++--
src/nanoarrow/typedefs_inline.h | 57 +++++++
src/nanoarrow/utils.c | 126 ++++++++++++++++
11 files changed, 878 insertions(+), 15 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a9e4637..95383cc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,10 +30,12 @@ add_library(
nanoarrow
src/nanoarrow/allocator.c
src/nanoarrow/array.c
+ src/nanoarrow/array_view.c
src/nanoarrow/error.c
src/nanoarrow/metadata.c
src/nanoarrow/schema.c
- src/nanoarrow/schema_view.c)
+ src/nanoarrow/schema_view.c
+ src/nanoarrow/utils.c)
install(TARGETS nanoarrow DESTINATION lib)
install(DIRECTORY src/ DESTINATION include FILES_MATCHING PATTERN "*.h")
@@ -61,6 +63,7 @@ if (NANOARROW_BUILD_TESTS)
add_executable(buffer_test src/nanoarrow/buffer_test.cc)
add_executable(bitmap_test src/nanoarrow/bitmap_test.cc)
add_executable(array_test src/nanoarrow/array_test.cc)
+ add_executable(array_view_test src/nanoarrow/array_view_test.cc)
add_executable(error_test src/nanoarrow/error_test.cc)
add_executable(metadata_test src/nanoarrow/metadata_test.cc)
add_executable(schema_test src/nanoarrow/schema_test.cc)
@@ -76,6 +79,7 @@ if (NANOARROW_BUILD_TESTS)
target_link_libraries(buffer_test nanoarrow GTest::gtest_main)
target_link_libraries(bitmap_test nanoarrow GTest::gtest_main)
target_link_libraries(array_test nanoarrow GTest::gtest_main)
+ target_link_libraries(array_view_test nanoarrow GTest::gtest_main)
target_link_libraries(error_test nanoarrow GTest::gtest_main)
target_link_libraries(metadata_test nanoarrow GTest::gtest_main arrow_shared arrow_testing_shared)
target_link_libraries(schema_test nanoarrow GTest::gtest_main arrow_shared arrow_testing_shared)
@@ -86,6 +90,7 @@ if (NANOARROW_BUILD_TESTS)
gtest_discover_tests(buffer_test)
gtest_discover_tests(bitmap_test)
gtest_discover_tests(array_test)
+ gtest_discover_tests(array_view_test)
gtest_discover_tests(error_test)
gtest_discover_tests(metadata_test)
gtest_discover_tests(schema_test)
diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index 89e6cac..460cfd1 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -16,7 +16,6 @@
// under the License.
#include <errno.h>
-#include <stdio.h>
#include <stdlib.h>
#include <string.h>
diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index 7b74534..f8bb4d7 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -67,6 +67,13 @@ static inline ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
}
+ for (int64_t i = 0; i < array->n_children; i++) {
+ result = ArrowArrayFinishBuilding(array->children[i], shrink_to_fit);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+ }
+
return NANOARROW_OK;
}
diff --git a/src/nanoarrow/array_view.c b/src/nanoarrow/array_view.c
new file mode 100644
index 0000000..a5427ac
--- /dev/null
+++ b/src/nanoarrow/array_view.c
@@ -0,0 +1,227 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nanoarrow.h"
+
+void ArrowArrayViewInit(struct ArrowArrayView* array_view, enum ArrowType storage_type) {
+ memset(array_view, 0, sizeof(struct ArrowArrayView));
+ array_view->storage_type = storage_type;
+ ArrowLayoutInit(&array_view->layout, storage_type);
+}
+
+ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
+ int64_t n_children) {
+ if (array_view->children != NULL) {
+ return EINVAL;
+ }
+
+ array_view->children =
+ (struct ArrowArrayView**)ArrowMalloc(n_children * sizeof(struct ArrowArrayView*));
+ if (array_view->children == NULL) {
+ return ENOMEM;
+ }
+
+ for (int64_t i = 0; i < n_children; i++) {
+ array_view->children[i] = NULL;
+ }
+
+ array_view->n_children = n_children;
+
+ for (int64_t i = 0; i < n_children; i++) {
+ array_view->children[i] =
+ (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView));
+ if (array_view->children[i] == NULL) {
+ return ENOMEM;
+ }
+ ArrowArrayViewInit(array_view->children[i], NANOARROW_TYPE_UNINITIALIZED);
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
+ struct ArrowSchema* schema,
+ struct ArrowError* error) {
+ struct ArrowSchemaView schema_view;
+ int result = ArrowSchemaViewInit(&schema_view, schema, error);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ ArrowArrayViewInit(array_view, schema_view.storage_data_type);
+ array_view->layout = schema_view.layout;
+
+ result = ArrowArrayViewAllocateChildren(array_view, schema->n_children);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+
+ for (int64_t i = 0; i < schema->n_children; i++) {
+ result =
+ ArrowArrayViewInitFromSchema(array_view->children[i], schema->children[i], error);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+void ArrowArrayViewReset(struct ArrowArrayView* array_view) {
+ if (array_view->children != NULL) {
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ if (array_view->children[i] != NULL) {
+ ArrowArrayViewReset(array_view->children[i]);
+ ArrowFree(array_view->children[i]);
+ }
+ }
+
+ ArrowFree(array_view->children);
+ }
+
+ ArrowArrayViewInit(array_view, NANOARROW_TYPE_UNINITIALIZED);
+}
+
+void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length) {
+ for (int i = 0; i < 3; i++) {
+ int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
+
+ switch (array_view->layout.buffer_type[i]) {
+ array_view->buffer_views[i].data.data = NULL;
+
+ case NANOARROW_BUFFER_TYPE_VALIDITY:
+ array_view->buffer_views[i].n_bytes = _ArrowBytesForBits(length);
+ continue;
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+ // Probably don't want/need to rely on the producer to have allocated an
+ // offsets buffer of length 1 for a zero-size array
+ array_view->buffer_views[i].n_bytes =
+ (length != 0) * element_size_bytes * (length + 1);
+ continue;
+ case NANOARROW_BUFFER_TYPE_DATA:
+ array_view->buffer_views[i].n_bytes =
+ _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] * length) /
+ 8;
+ continue;
+ case NANOARROW_BUFFER_TYPE_TYPE_ID:
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+ array_view->buffer_views[i].n_bytes = element_size_bytes * length;
+ continue;
+ case NANOARROW_BUFFER_TYPE_NONE:
+ array_view->buffer_views[i].n_bytes = 0;
+ continue;
+ }
+ }
+
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_STRUCT:
+ case NANOARROW_TYPE_SPARSE_UNION:
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ ArrowArrayViewSetLength(array_view->children[i], length);
+ }
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ ArrowArrayViewSetLength(array_view->children[0],
+ length * array_view->layout.child_size_elements);
+ default:
+ break;
+ }
+}
+
+ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
+ struct ArrowArray* array) {
+ array_view->array = array;
+ ArrowArrayViewSetLength(array_view, array->offset + array->length);
+
+ int64_t buffers_required = 0;
+ for (int i = 0; i < 3; i++) {
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
+ break;
+ }
+
+ buffers_required++;
+
+ // If the null_count is 0, the validity buffer can be NULL
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
+ array->null_count == 0 && array->buffers[i] == NULL) {
+ array_view->buffer_views[i].n_bytes = 0;
+ }
+
+ array_view->buffer_views[i].data.data = array->buffers[i];
+ }
+
+ if (buffers_required != array->n_buffers) {
+ return EINVAL;
+ }
+
+ if (array_view->n_children != array->n_children) {
+ return EINVAL;
+ }
+
+ // Check child sizes and calculate sizes that depend on data in the array buffers
+ int result;
+ int64_t last_offset;
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ if (array_view->buffer_views[1].n_bytes != 0) {
+ last_offset =
+ array_view->buffer_views[1].data.as_int32[array->offset + array->length];
+ array_view->buffer_views[2].n_bytes = last_offset;
+ }
+ break;
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ if (array_view->buffer_views[1].n_bytes != 0) {
+ last_offset =
+ array_view->buffer_views[1].data.as_int64[array->offset + array->length];
+ array_view->buffer_views[2].n_bytes = last_offset;
+ }
+ break;
+ case NANOARROW_TYPE_STRUCT:
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ if (array->children[i]->length < (array->offset + array->length)) {
+ return EINVAL;
+ }
+ }
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ last_offset =
+ (array->offset + array->length) * array_view->layout.child_size_elements;
+ if (array->n_children != 1 || array->children[0]->length < last_offset) {
+ return EINVAL;
+ }
+ break;
+ default:
+ break;
+ }
+
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ result = ArrowArrayViewSetArray(array_view->children[i], array->children[i]);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+ }
+
+ return NANOARROW_OK;
+}
diff --git a/src/nanoarrow/array_view_test.cc b/src/nanoarrow/array_view_test.cc
new file mode 100644
index 0000000..1ce4156
--- /dev/null
+++ b/src/nanoarrow/array_view_test.cc
@@ -0,0 +1,307 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "nanoarrow/nanoarrow.h"
+
+TEST(ArrayTest, ArrayViewTestBasic) {
+ struct ArrowArrayView array_view;
+ ArrowArrayViewInit(&array_view, NANOARROW_TYPE_INT32);
+
+ EXPECT_EQ(array_view.array, nullptr);
+ EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_INT32);
+ EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(array_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(array_view.layout.element_size_bits[1], 32);
+
+ ArrowArrayViewSetLength(&array_view, 5);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 1);
+ EXPECT_EQ(array_view.buffer_views[1].n_bytes, 5 * sizeof(int32_t));
+
+ struct ArrowArray array;
+
+ // Build with no validity buffer
+ ArrowArrayInit(&array, NANOARROW_TYPE_INT32);
+ ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(&array, 1), 11), NANOARROW_OK);
+ ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(&array, 1), 12), NANOARROW_OK);
+ ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(&array, 1), 13), NANOARROW_OK);
+ array.length = 3;
+ array.null_count = 0;
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, false), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 0);
+ EXPECT_EQ(array_view.buffer_views[1].n_bytes, 3 * sizeof(int32_t));
+ EXPECT_EQ(array_view.buffer_views[1].data.as_int32[0], 11);
+ EXPECT_EQ(array_view.buffer_views[1].data.as_int32[1], 12);
+ EXPECT_EQ(array_view.buffer_views[1].data.as_int32[2], 13);
+
+ // Build with validity buffer
+ ASSERT_EQ(ArrowBitmapAppend(ArrowArrayValidityBitmap(&array), 1, 3), NANOARROW_OK);
+ array.null_count = -1;
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, false), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 1);
+ EXPECT_EQ(array_view.buffer_views[1].n_bytes, 3 * sizeof(int32_t));
+
+ // Expect error for the wrong number of buffers
+ ArrowArrayViewReset(&array_view);
+ ArrowArrayViewInit(&array_view, NANOARROW_TYPE_STRING);
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), EINVAL);
+
+ array.release(&array);
+ ArrowArrayViewReset(&array_view);
+}
+
+TEST(ArrayTest, ArrayViewTestString) {
+ struct ArrowArrayView array_view;
+ ArrowArrayViewInit(&array_view, NANOARROW_TYPE_STRING);
+
+ EXPECT_EQ(array_view.array, nullptr);
+ EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_STRING);
+ EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(array_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(array_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(array_view.layout.element_size_bits[1], 32);
+ EXPECT_EQ(array_view.layout.element_size_bits[2], 0);
+
+ // Can't assume offset buffer size > 0 if length == 0
+ ArrowArrayViewSetLength(&array_view, 0);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 0);
+ EXPECT_EQ(array_view.buffer_views[1].n_bytes, 0);
+ EXPECT_EQ(array_view.buffer_views[2].n_bytes, 0);
+
+ ArrowArrayViewSetLength(&array_view, 5);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 1);
+ EXPECT_EQ(array_view.buffer_views[1].n_bytes, (5 + 1) * sizeof(int32_t));
+ EXPECT_EQ(array_view.buffer_views[2].n_bytes, 0);
+
+ struct ArrowArray array;
+
+ // Build + check zero length
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_STRING), NANOARROW_OK);
+ array.null_count = 0;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 0);
+ EXPECT_EQ(array_view.buffer_views[1].n_bytes, 0);
+ EXPECT_EQ(array_view.buffer_views[2].n_bytes, 0);
+
+ // Build non-zero length (the array ["abcd"])
+ ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(&array, 1), 0), NANOARROW_OK);
+ ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(&array, 1), 4), NANOARROW_OK);
+ ASSERT_EQ(ArrowBufferReserve(ArrowArrayBuffer(&array, 2), 4), NANOARROW_OK);
+ ArrowBufferAppendUnsafe(ArrowArrayBuffer(&array, 2), "abcd", 4);
+ array.length = 1;
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, false), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 0);
+ EXPECT_EQ(array_view.buffer_views[1].n_bytes, (1 + 1) * sizeof(int32_t));
+ EXPECT_EQ(array_view.buffer_views[2].n_bytes, 4);
+
+ array.release(&array);
+ ArrowArrayViewReset(&array_view);
+}
+
+TEST(ArrayTest, ArrayViewTestLargeString) {
+ struct ArrowArrayView array_view;
+ ArrowArrayViewInit(&array_view, NANOARROW_TYPE_LARGE_STRING);
+
+ EXPECT_EQ(array_view.array, nullptr);
+ EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_LARGE_STRING);
+ EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(array_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(array_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(array_view.layout.element_size_bits[1], 64);
+ EXPECT_EQ(array_view.layout.element_size_bits[2], 0);
+
+ // Can't assume offset buffer size > 0 if length == 0
+ ArrowArrayViewSetLength(&array_view, 0);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 0);
+ EXPECT_EQ(array_view.buffer_views[1].n_bytes, 0);
+ EXPECT_EQ(array_view.buffer_views[2].n_bytes, 0);
+
+ ArrowArrayViewSetLength(&array_view, 5);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 1);
+ EXPECT_EQ(array_view.buffer_views[1].n_bytes, (5 + 1) * sizeof(int64_t));
+ EXPECT_EQ(array_view.buffer_views[2].n_bytes, 0);
+
+ struct ArrowArray array;
+
+ // Build + check zero length
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_STRING), NANOARROW_OK);
+ array.null_count = 0;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 0);
+ EXPECT_EQ(array_view.buffer_views[1].n_bytes, 0);
+ EXPECT_EQ(array_view.buffer_views[2].n_bytes, 0);
+
+ // Build non-zero length (the array ["abcd"])
+ ASSERT_EQ(ArrowBufferAppendInt64(ArrowArrayBuffer(&array, 1), 0), NANOARROW_OK);
+ ASSERT_EQ(ArrowBufferAppendInt64(ArrowArrayBuffer(&array, 1), 4), NANOARROW_OK);
+ ASSERT_EQ(ArrowBufferReserve(ArrowArrayBuffer(&array, 2), 4), NANOARROW_OK);
+ ArrowBufferAppendUnsafe(ArrowArrayBuffer(&array, 2), "abcd", 4);
+ array.length = 1;
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, false), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 0);
+ EXPECT_EQ(array_view.buffer_views[1].n_bytes, (1 + 1) * sizeof(int64_t));
+ EXPECT_EQ(array_view.buffer_views[2].n_bytes, 4);
+
+ array.release(&array);
+ ArrowArrayViewReset(&array_view);
+}
+
+TEST(ArrayTest, ArrayViewTestStruct) {
+ struct ArrowArrayView array_view;
+ ArrowArrayViewInit(&array_view, NANOARROW_TYPE_STRUCT);
+
+ EXPECT_EQ(array_view.array, nullptr);
+ EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_STRUCT);
+ EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
+
+ // Exepct error for out-of-memory
+ EXPECT_EQ(
+ ArrowArrayViewAllocateChildren(&array_view, std::numeric_limits<int64_t>::max()),
+ ENOMEM);
+
+ EXPECT_EQ(ArrowArrayViewAllocateChildren(&array_view, 2), NANOARROW_OK);
+ EXPECT_EQ(array_view.n_children, 2);
+ ArrowArrayViewInit(array_view.children[0], NANOARROW_TYPE_INT32);
+ EXPECT_EQ(array_view.children[0]->storage_type, NANOARROW_TYPE_INT32);
+ ArrowArrayViewInit(array_view.children[1], NANOARROW_TYPE_NA);
+ EXPECT_EQ(array_view.children[1]->storage_type, NANOARROW_TYPE_NA);
+
+ ArrowArrayViewSetLength(&array_view, 5);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 1);
+ EXPECT_EQ(array_view.children[0]->buffer_views[1].n_bytes, 5 * sizeof(int32_t));
+
+ // Exepct error for attempting to allocate a children array that already exists
+ EXPECT_EQ(ArrowArrayViewAllocateChildren(&array_view, 1), EINVAL);
+
+ ArrowArrayViewReset(&array_view);
+}
+
+TEST(ArrayTest, ArrayViewTestFixedSizeList) {
+ struct ArrowArrayView array_view;
+ ArrowArrayViewInit(&array_view, NANOARROW_TYPE_FIXED_SIZE_LIST);
+ array_view.layout.child_size_elements = 3;
+
+ EXPECT_EQ(array_view.array, nullptr);
+ EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_FIXED_SIZE_LIST);
+ EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
+
+ EXPECT_EQ(ArrowArrayViewAllocateChildren(&array_view, 1), NANOARROW_OK);
+ EXPECT_EQ(array_view.n_children, 1);
+ ArrowArrayViewInit(array_view.children[0], NANOARROW_TYPE_INT32);
+ EXPECT_EQ(array_view.children[0]->storage_type, NANOARROW_TYPE_INT32);
+
+ ArrowArrayViewSetLength(&array_view, 5);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 1);
+ EXPECT_EQ(array_view.children[0]->buffer_views[1].n_bytes, 15 * sizeof(int32_t));
+
+ ArrowArrayViewReset(&array_view);
+}
+
+TEST(ArrayTest, ArrayViewTestStructArray) {
+ struct ArrowArrayView array_view;
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowError error;
+
+ ASSERT_EQ(ArrowSchemaInit(&schema, NANOARROW_TYPE_STRUCT), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaAllocateChildren(&schema, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaInit(schema.children[0], NANOARROW_TYPE_INT32), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK);
+ EXPECT_EQ(array_view.n_children, 1);
+ EXPECT_EQ(array_view.children[0]->storage_type, NANOARROW_TYPE_INT32);
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_STRUCT), NANOARROW_OK);
+
+ // Expect error for the wrong number of children
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), EINVAL);
+
+ ASSERT_EQ(ArrowArrayAllocateChildren(&array, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInit(array.children[0], NANOARROW_TYPE_INT32), NANOARROW_OK);
+
+ // Expect error for the wrong number of child elements
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), EINVAL);
+
+ ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(array.children[0], 1), 123),
+ NANOARROW_OK);
+ array.children[0]->length = 1;
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, false), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(array_view.children[0]->buffer_views[1].n_bytes, sizeof(int32_t));
+ EXPECT_EQ(array_view.children[0]->buffer_views[1].data.as_int32[0], 123);
+
+ ArrowArrayViewReset(&array_view);
+ schema.release(&schema);
+ array.release(&array);
+}
+
+TEST(ArrayTest, ArrayViewTestFixedSizeListArray) {
+ struct ArrowArrayView array_view;
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowError error;
+
+ ASSERT_EQ(ArrowSchemaInitFixedSize(&schema, NANOARROW_TYPE_FIXED_SIZE_LIST, 3),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaAllocateChildren(&schema, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaInit(schema.children[0], NANOARROW_TYPE_INT32), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK);
+ EXPECT_EQ(array_view.n_children, 1);
+ EXPECT_EQ(array_view.children[0]->storage_type, NANOARROW_TYPE_INT32);
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_FIXED_SIZE_LIST), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAllocateChildren(&array, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInit(array.children[0], NANOARROW_TYPE_INT32), NANOARROW_OK);
+
+ // Expect error for the wrong number of child elements
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), EINVAL);
+
+ ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(array.children[0], 1), 123),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(array.children[0], 1), 456),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(array.children[0], 1), 789),
+ NANOARROW_OK);
+ array.children[0]->length = 3;
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, false), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(array_view.children[0]->buffer_views[1].n_bytes, 3 * sizeof(int32_t));
+ EXPECT_EQ(array_view.children[0]->buffer_views[1].data.as_int32[0], 123);
+
+ ArrowArrayViewReset(&array_view);
+ schema.release(&schema);
+ array.release(&array);
+}
diff --git a/src/nanoarrow/nanoarrow.c b/src/nanoarrow/nanoarrow.c
index d3e33c9..f7504e8 100644
--- a/src/nanoarrow/nanoarrow.c
+++ b/src/nanoarrow/nanoarrow.c
@@ -17,7 +17,9 @@
#include "allocator.c"
#include "array.c"
+#include "array_view.c"
#include "error.c"
#include "metadata.c"
#include "schema.c"
#include "schema_view.c"
+#include "utils.c"
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index c432e60..4635e24 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -87,6 +87,9 @@ const char* ArrowErrorMessage(struct ArrowError* error);
/// \defgroup nanoarrow-utils Utility data structures
+/// \brief Initialize a description of buffer arrangements from a storage type
+void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type);
+
/// \brief Create a string view from a null-terminated string
static inline struct ArrowStringView ArrowCharView(const char* value);
@@ -259,6 +262,8 @@ struct ArrowSchemaView {
/// interpret the buffers in the array.
enum ArrowType storage_data_type;
+ struct ArrowLayout layout;
+
/// \brief The extension type name if it exists
///
/// If the ARROW:extension:name key is present in schema.metadata,
@@ -566,6 +571,35 @@ static inline ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
/// }@
+/// \defgroup nanoarrow-array Array consumer helpers
+/// These functions read and validate the contents ArrowArray structures
+
+/// \brief Initialize the contents of an ArrowArrayView
+void ArrowArrayViewInit(struct ArrowArrayView* array_view, enum ArrowType storage_type);
+
+/// \brief Initialize the contents of an ArrowArrayView from an ArrowSchema
+ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
+ struct ArrowSchema* schema,
+ struct ArrowError* error);
+
+/// \brief Allocate the schema_view->children array
+///
+/// Includes the memory for each child struct ArrowArrayView
+ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
+ int64_t n_children);
+
+/// \brief Set data-independent buffer sizes from length
+void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length);
+
+/// \brief Set buffer sizes and data pointers from an ArrowArray
+ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
+ struct ArrowArray* array);
+
+/// \brief Reset the contents of an ArrowArrayView and frees resources
+void ArrowArrayViewReset(struct ArrowArrayView* array_view);
+
+/// }@
+
// Inline function definitions
#include "array_inline.h"
#include "bitmap_inline.h"
diff --git a/src/nanoarrow/schema_view.c b/src/nanoarrow/schema_view.c
index 7a3ca93..21f8e23 100644
--- a/src/nanoarrow/schema_view.c
+++ b/src/nanoarrow/schema_view.c
@@ -668,6 +668,13 @@ ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
}
}
+ ArrowLayoutInit(&schema_view->layout, schema_view->storage_data_type);
+ if (schema_view->storage_data_type == NANOARROW_TYPE_FIXED_SIZE_BINARY) {
+ schema_view->layout.element_size_bits[1] = schema_view->fixed_size * 8;
+ } else if (schema_view->storage_data_type == NANOARROW_TYPE_FIXED_SIZE_LIST) {
+ schema_view->layout.child_size_elements = schema_view->fixed_size;
+ }
+
schema_view->extension_name = ArrowCharView(NULL);
schema_view->extension_metadata = ArrowCharView(NULL);
ArrowMetadataGetValue(schema->metadata, ArrowCharView("ARROW:extension:name"),
diff --git a/src/nanoarrow/schema_view_test.cc b/src/nanoarrow/schema_view_test.cc
index bb1fb4e..a5fa963 100644
--- a/src/nanoarrow/schema_view_test.cc
+++ b/src/nanoarrow/schema_view_test.cc
@@ -61,7 +61,8 @@ TEST(SchemaViewTest, SchemaViewInitErrors) {
schema.release(&schema);
}
-void ExpectSimpleTypeOk(std::shared_ptr<DataType> arrow_t, enum ArrowType nanoarrow_t) {
+void ExpectSimpleTypeOk(std::shared_ptr<DataType> arrow_t, enum ArrowType nanoarrow_t,
+ int bitwidth) {
struct ArrowSchema schema;
struct ArrowSchemaView schema_view;
struct ArrowError error;
@@ -73,6 +74,12 @@ void ExpectSimpleTypeOk(std::shared_ptr<DataType> arrow_t, enum ArrowType nanoar
EXPECT_EQ(schema_view.data_buffer_id, 1);
EXPECT_EQ(schema_view.data_type, nanoarrow_t);
EXPECT_EQ(schema_view.storage_data_type, nanoarrow_t);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], bitwidth);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
schema.release(&schema);
}
@@ -90,18 +97,18 @@ TEST(SchemaViewTest, SchemaViewInitSimple) {
EXPECT_EQ(schema_view.extension_metadata.data, nullptr);
schema.release(&schema);
- ExpectSimpleTypeOk(boolean(), NANOARROW_TYPE_BOOL);
- ExpectSimpleTypeOk(int8(), NANOARROW_TYPE_INT8);
- ExpectSimpleTypeOk(uint8(), NANOARROW_TYPE_UINT8);
- ExpectSimpleTypeOk(int16(), NANOARROW_TYPE_INT16);
- ExpectSimpleTypeOk(uint16(), NANOARROW_TYPE_UINT16);
- ExpectSimpleTypeOk(int32(), NANOARROW_TYPE_INT32);
- ExpectSimpleTypeOk(uint32(), NANOARROW_TYPE_UINT32);
- ExpectSimpleTypeOk(int64(), NANOARROW_TYPE_INT64);
- ExpectSimpleTypeOk(uint64(), NANOARROW_TYPE_UINT64);
- ExpectSimpleTypeOk(float16(), NANOARROW_TYPE_HALF_FLOAT);
- ExpectSimpleTypeOk(float64(), NANOARROW_TYPE_DOUBLE);
- ExpectSimpleTypeOk(float32(), NANOARROW_TYPE_FLOAT);
+ ExpectSimpleTypeOk(boolean(), NANOARROW_TYPE_BOOL, 1);
+ ExpectSimpleTypeOk(int8(), NANOARROW_TYPE_INT8, 8);
+ ExpectSimpleTypeOk(uint8(), NANOARROW_TYPE_UINT8, 8);
+ ExpectSimpleTypeOk(int16(), NANOARROW_TYPE_INT16, 16);
+ ExpectSimpleTypeOk(uint16(), NANOARROW_TYPE_UINT16, 16);
+ ExpectSimpleTypeOk(int32(), NANOARROW_TYPE_INT32, 32);
+ ExpectSimpleTypeOk(uint32(), NANOARROW_TYPE_UINT32, 32);
+ ExpectSimpleTypeOk(int64(), NANOARROW_TYPE_INT64, 64);
+ ExpectSimpleTypeOk(uint64(), NANOARROW_TYPE_UINT64, 64);
+ ExpectSimpleTypeOk(float16(), NANOARROW_TYPE_HALF_FLOAT, 16);
+ ExpectSimpleTypeOk(float64(), NANOARROW_TYPE_DOUBLE, 64);
+ ExpectSimpleTypeOk(float32(), NANOARROW_TYPE_FLOAT, 32);
}
TEST(SchemaViewTest, SchemaViewInitSimpleErrors) {
@@ -130,6 +137,12 @@ TEST(SchemaViewTest, SchemaViewInitDecimal) {
EXPECT_EQ(schema_view.data_buffer_id, 1);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_DECIMAL128);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_DECIMAL128);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 128);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
EXPECT_EQ(schema_view.decimal_bitwidth, 128);
EXPECT_EQ(schema_view.decimal_precision, 5);
EXPECT_EQ(schema_view.decimal_scale, 6);
@@ -142,6 +155,12 @@ TEST(SchemaViewTest, SchemaViewInitDecimal) {
EXPECT_EQ(schema_view.data_buffer_id, 1);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_DECIMAL256);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_DECIMAL256);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 256);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
EXPECT_EQ(schema_view.decimal_bitwidth, 256);
EXPECT_EQ(schema_view.decimal_precision, 5);
EXPECT_EQ(schema_view.decimal_scale, 6);
@@ -205,6 +224,12 @@ TEST(SchemaViewTest, SchemaViewInitBinaryAndString) {
EXPECT_EQ(schema_view.data_buffer_id, 1);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_FIXED_SIZE_BINARY);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_FIXED_SIZE_BINARY);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 123 * 8);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
EXPECT_EQ(schema_view.fixed_size, 123);
schema.release(&schema);
@@ -216,6 +241,12 @@ TEST(SchemaViewTest, SchemaViewInitBinaryAndString) {
EXPECT_EQ(schema_view.data_buffer_id, 2);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_STRING);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_STRING);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 32);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
schema.release(&schema);
ARROW_EXPECT_OK(ExportType(*binary(), &schema));
@@ -226,6 +257,12 @@ TEST(SchemaViewTest, SchemaViewInitBinaryAndString) {
EXPECT_EQ(schema_view.data_buffer_id, 2);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_BINARY);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_BINARY);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 32);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
schema.release(&schema);
ARROW_EXPECT_OK(ExportType(*large_binary(), &schema));
@@ -236,6 +273,12 @@ TEST(SchemaViewTest, SchemaViewInitBinaryAndString) {
EXPECT_EQ(schema_view.data_buffer_id, 2);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_LARGE_BINARY);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_LARGE_BINARY);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 64);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
schema.release(&schema);
ARROW_EXPECT_OK(ExportType(*large_utf8(), &schema));
@@ -246,6 +289,12 @@ TEST(SchemaViewTest, SchemaViewInitBinaryAndString) {
EXPECT_EQ(schema_view.data_buffer_id, 2);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_LARGE_STRING);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_LARGE_STRING);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 64);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
schema.release(&schema);
}
@@ -532,6 +581,12 @@ TEST(SchemaViewTest, SchemaViewInitNestedList) {
EXPECT_EQ(schema_view.offset_buffer_id, 1);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_LIST);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_LIST);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 32);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
schema.release(&schema);
ARROW_EXPECT_OK(ExportType(*large_list(int32()), &schema));
@@ -541,6 +596,12 @@ TEST(SchemaViewTest, SchemaViewInitNestedList) {
EXPECT_EQ(schema_view.offset_buffer_id, 1);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_LARGE_LIST);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_LARGE_LIST);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 64);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
schema.release(&schema);
ARROW_EXPECT_OK(ExportType(*fixed_size_list(int32(), 123), &schema));
@@ -549,7 +610,14 @@ TEST(SchemaViewTest, SchemaViewInitNestedList) {
EXPECT_EQ(schema_view.validity_buffer_id, 0);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_FIXED_SIZE_LIST);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_FIXED_SIZE_LIST);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 0);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
EXPECT_EQ(schema_view.fixed_size, 123);
+ EXPECT_EQ(schema_view.layout.child_size_elements, 123);
schema.release(&schema);
}
@@ -588,6 +656,12 @@ TEST(SchemaViewTest, SchemaViewInitNestedStruct) {
EXPECT_EQ(schema_view.validity_buffer_id, 0);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_STRUCT);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_STRUCT);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 0);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
// Make sure child validates
EXPECT_EQ(ArrowSchemaViewInit(&schema_view, schema.children[0], &error), NANOARROW_OK);
@@ -633,6 +707,12 @@ TEST(SchemaViewTest, SchemaViewInitNestedMap) {
EXPECT_EQ(schema_view.validity_buffer_id, 0);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_MAP);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_MAP);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 0);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
schema.release(&schema);
}
@@ -687,6 +767,12 @@ TEST(SchemaViewTest, SchemaViewInitNestedUnion) {
EXPECT_EQ(schema_view.offset_buffer_id, 1);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_DENSE_UNION);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_DENSE_UNION);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_TYPE_ID);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_UNION_OFFSET);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 8);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 32);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
EXPECT_EQ(
std::string(schema_view.union_type_ids.data, schema_view.union_type_ids.n_bytes),
std::string("0"));
@@ -698,6 +784,12 @@ TEST(SchemaViewTest, SchemaViewInitNestedUnion) {
EXPECT_EQ(schema_view.type_id_buffer_id, 0);
EXPECT_EQ(schema_view.data_type, NANOARROW_TYPE_SPARSE_UNION);
EXPECT_EQ(schema_view.storage_data_type, NANOARROW_TYPE_SPARSE_UNION);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_TYPE_ID);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 8);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 0);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
EXPECT_EQ(
std::string(schema_view.union_type_ids.data, schema_view.union_type_ids.n_bytes),
std::string("0"));
diff --git a/src/nanoarrow/typedefs_inline.h b/src/nanoarrow/typedefs_inline.h
index 5aca1ec..73d839e 100644
--- a/src/nanoarrow/typedefs_inline.h
+++ b/src/nanoarrow/typedefs_inline.h
@@ -166,6 +166,33 @@ enum ArrowType {
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
};
+/// \brief Functional types of buffers as described in the Arrow Columnar Specification
+enum ArrowBufferType {
+ NANOARROW_BUFFER_TYPE_NONE,
+ NANOARROW_BUFFER_TYPE_VALIDITY,
+ NANOARROW_BUFFER_TYPE_TYPE_ID,
+ NANOARROW_BUFFER_TYPE_UNION_OFFSET,
+ NANOARROW_BUFFER_TYPE_DATA_OFFSET,
+ NANOARROW_BUFFER_TYPE_DATA
+};
+
+/// \brief A description of an arrangement of buffers
+///
+/// Contains the minimum amount of information required to
+/// calculate the size of each buffer in an ArrowArray knowing only
+/// the length and offset of the array.
+struct ArrowLayout {
+ /// \brief The function of each buffer
+ enum ArrowBufferType buffer_type[3];
+
+ /// \brief The size of an element each buffer or 0 if this size is variable or unknown
+ int64_t element_size_bits[3];
+
+ /// \brief The number of elements in the child array per element in this array for a
+ /// fixed-size list
+ int64_t child_size_elements;
+};
+
/// \brief An non-owning view of a string
struct ArrowStringView {
/// \brief A pointer to the start of the string
@@ -179,6 +206,27 @@ struct ArrowStringView {
int64_t n_bytes;
};
+/// \brief An non-owning view of a buffer
+struct ArrowBufferView {
+ /// \brief A pointer to the start of the buffer
+ ///
+ /// If n_bytes is 0, this value may be NULL.
+ union {
+ const void* data;
+ const int8_t* as_int8;
+ const uint8_t* as_uint8;
+ const int16_t* as_int16;
+ const uint16_t* as_uint16;
+ const int32_t* as_int32;
+ const uint32_t* as_uint32;
+ const int64_t* as_int64;
+ const uint64_t* as_uint64;
+ } data;
+
+ /// \brief The size of the buffer in bytes
+ int64_t n_bytes;
+};
+
/// \brief Array buffer allocation and deallocation
///
/// Container for allocate, reallocate, and free methods that can be used
@@ -244,6 +292,15 @@ struct ArrowArrayPrivateData {
enum ArrowType storage_type;
};
+struct ArrowArrayView {
+ struct ArrowArray* array;
+ enum ArrowType storage_type;
+ struct ArrowLayout layout;
+ struct ArrowBufferView buffer_views[3];
+ int64_t n_children;
+ struct ArrowArrayView** children;
+};
+
/// }@
#ifdef __cplusplus
diff --git a/src/nanoarrow/utils.c b/src/nanoarrow/utils.c
new file mode 100644
index 0000000..74e4560
--- /dev/null
+++ b/src/nanoarrow/utils.c
@@ -0,0 +1,126 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "nanoarrow.h"
+
+void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) {
+ layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY;
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+ layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_NONE;
+
+ layout->element_size_bits[0] = 1;
+ layout->element_size_bits[1] = 0;
+ layout->element_size_bits[2] = 0;
+
+ layout->child_size_elements = 0;
+
+ switch (storage_type) {
+ case NANOARROW_TYPE_UNINITIALIZED:
+ case NANOARROW_TYPE_NA:
+ layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE;
+ layout->element_size_bits[0] = 0;
+ break;
+
+ case NANOARROW_TYPE_LIST:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->element_size_bits[1] = 32;
+ break;
+
+ case NANOARROW_TYPE_LARGE_LIST:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->element_size_bits[1] = 64;
+ break;
+
+ case NANOARROW_TYPE_BOOL:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 1;
+ break;
+
+ case NANOARROW_TYPE_UINT8:
+ case NANOARROW_TYPE_INT8:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 8;
+ break;
+
+ case NANOARROW_TYPE_UINT16:
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_HALF_FLOAT:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 16;
+ break;
+
+ case NANOARROW_TYPE_UINT32:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_FLOAT:
+ case NANOARROW_TYPE_INTERVAL_MONTHS:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 32;
+ break;
+
+ case NANOARROW_TYPE_UINT64:
+ case NANOARROW_TYPE_INT64:
+ case NANOARROW_TYPE_DOUBLE:
+ case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 64;
+ break;
+
+ case NANOARROW_TYPE_DECIMAL128:
+ case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 128;
+ break;
+
+ case NANOARROW_TYPE_DECIMAL256:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->element_size_bits[1] = 256;
+ break;
+
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ break;
+
+ case NANOARROW_TYPE_DENSE_UNION:
+ layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
+ layout->element_size_bits[0] = 8;
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_UNION_OFFSET;
+ layout->element_size_bits[1] = 32;
+ break;
+
+ case NANOARROW_TYPE_SPARSE_UNION:
+ layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
+ layout->element_size_bits[0] = 8;
+ break;
+
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->element_size_bits[1] = 32;
+ layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+ break;
+
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+ layout->element_size_bits[1] = 64;
+ layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+ break;
+
+ default:
+ break;
+ }
+}