You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by gi...@apache.org on 2023/06/10 01:32:57 UTC
[arrow-nanoarrow] branch main updated: Update dist/ for commit c738f90e874ec38b2fa2acebd154a33da36dc9a4
This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new e112613 Update dist/ for commit c738f90e874ec38b2fa2acebd154a33da36dc9a4
e112613 is described below
commit e1126138c267e4fdc2aa44da65463d78a2f737d6
Author: GitHub Actions <ac...@github.com>
AuthorDate: Sat Jun 10 01:32:51 2023 +0000
Update dist/ for commit c738f90e874ec38b2fa2acebd154a33da36dc9a4
---
dist/nanoarrow.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++----
dist/nanoarrow.h | 20 +++++++-
dist/nanoarrow_ipc.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 259 insertions(+), 17 deletions(-)
diff --git a/dist/nanoarrow.c b/dist/nanoarrow.c
index 8c83f48..4ba74d9 100644
--- a/dist/nanoarrow.c
+++ b/dist/nanoarrow.c
@@ -1926,21 +1926,40 @@ ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array,
ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
struct ArrowArrayView* array_view,
struct ArrowError* error) {
- ArrowArrayInitFromType(array, array_view->storage_type);
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowArrayInitFromType(array, array_view->storage_type), error);
+ int result;
+
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
+ private_data->layout = array_view->layout;
- int result = ArrowArrayAllocateChildren(array, array_view->n_children);
- if (result != NANOARROW_OK) {
- array->release(array);
- return result;
+ if (array_view->n_children > 0) {
+ result = ArrowArrayAllocateChildren(array, array_view->n_children);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
+
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ result =
+ ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
+ }
}
- private_data->layout = array_view->layout;
+ if (array_view->dictionary != NULL) {
+ result = ArrowArrayAllocateDictionary(array);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
- for (int64_t i = 0; i < array_view->n_children; i++) {
- int result =
- ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error);
+ result =
+ ArrowArrayInitFromArrayView(array->dictionary, array_view->dictionary, error);
if (result != NANOARROW_OK) {
array->release(array);
return result;
@@ -2079,6 +2098,20 @@ static ArrowErrorCode ArrowArrayViewInitFromArray(struct ArrowArrayView* array_v
}
}
+ if (array->dictionary != NULL) {
+ result = ArrowArrayViewAllocateDictionary(array_view);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+
+ result = ArrowArrayViewInitFromArray(array_view->dictionary, array->dictionary);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+ }
+
return NANOARROW_OK;
}
@@ -2151,6 +2184,10 @@ static ArrowErrorCode ArrowArrayFinalizeBuffers(struct ArrowArray* array) {
NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->children[i]));
}
+ if (array->dictionary != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->dictionary));
+ }
+
return NANOARROW_OK;
}
@@ -2165,6 +2202,10 @@ static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) {
for (int64_t i = 0; i < array->n_children; i++) {
ArrowArrayFlushInternalPointers(array->children[i]);
}
+
+ if (array->dictionary != NULL) {
+ ArrowArrayFlushInternalPointers(array->dictionary);
+ }
}
ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
@@ -2236,6 +2277,21 @@ ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
return NANOARROW_OK;
}
+ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view) {
+ if (array_view->dictionary != NULL) {
+ return EINVAL;
+ }
+
+ array_view->dictionary =
+ (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView));
+ if (array_view->dictionary == NULL) {
+ return ENOMEM;
+ }
+
+ ArrowArrayViewInitFromType(array_view->dictionary, NANOARROW_TYPE_UNINITIALIZED);
+ return NANOARROW_OK;
+}
+
ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
struct ArrowSchema* schema,
struct ArrowError* error) {
@@ -2264,6 +2320,21 @@ ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
}
}
+ if (schema->dictionary != NULL) {
+ result = ArrowArrayViewAllocateDictionary(array_view);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+
+ result =
+ ArrowArrayViewInitFromSchema(array_view->dictionary, schema->dictionary, error);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+ }
+
if (array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION ||
array_view->storage_type == NANOARROW_TYPE_DENSE_UNION) {
array_view->union_type_id_map = (int8_t*)ArrowMalloc(256 * sizeof(int8_t));
@@ -2295,6 +2366,11 @@ void ArrowArrayViewReset(struct ArrowArrayView* array_view) {
ArrowFree(array_view->children);
}
+ if (array_view->dictionary != NULL) {
+ ArrowArrayViewReset(array_view->dictionary);
+ ArrowFree(array_view->dictionary);
+ }
+
if (array_view->union_type_id_map != NULL) {
ArrowFree(array_view->union_type_id_map);
}
@@ -2410,6 +2486,22 @@ static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
array->children[i], error));
}
+ // Check dictionary
+ if (array->dictionary == NULL && array_view->dictionary != NULL) {
+ ArrowErrorSet(error, "Expected dictionary but found NULL");
+ return EINVAL;
+ }
+
+ if (array->dictionary != NULL && array_view->dictionary == NULL) {
+ ArrowErrorSet(error, "Expected NULL dictionary but found dictionary member");
+ return EINVAL;
+ }
+
+ if (array->dictionary != NULL) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayViewSetArrayInternal(array_view->dictionary, array->dictionary, error));
+ }
+
return NANOARROW_OK;
}
@@ -2525,6 +2617,11 @@ static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
ArrowArrayViewValidateMinimal(array_view->children[i], error));
}
+ // Recurse for dictionary
+ if (array_view->dictionary != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view->dictionary, error));
+ }
+
return NANOARROW_OK;
}
@@ -2661,6 +2758,11 @@ static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view,
ArrowArrayViewValidateDefault(array_view->children[i], error));
}
+ // Recurse for dictionary
+ if (array_view->dictionary != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view->dictionary, error));
+ }
+
return NANOARROW_OK;
}
@@ -2805,10 +2907,17 @@ static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
}
}
+ // Recurse for children
for (int64_t i = 0; i < array_view->n_children; i++) {
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i], error));
}
+ // Dictionary valiation not implemented
+ if (array_view->dictionary != NULL) {
+ ArrowErrorSet(error, "Validation for dictionary-encoded arrays is not implemented");
+ return ENOTSUP;
+ }
+
return NANOARROW_OK;
}
diff --git a/dist/nanoarrow.h b/dist/nanoarrow.h
index db71117..7239385 100644
--- a/dist/nanoarrow.h
+++ b/dist/nanoarrow.h
@@ -640,6 +640,9 @@ struct ArrowArrayView {
/// \brief Pointers to views of this array's children
struct ArrowArrayView** children;
+ /// \brief Pointer to a view of this array's dictionary
+ struct ArrowArrayView* dictionary;
+
/// \brief Union type id to child index mapping
///
/// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer
@@ -879,6 +882,8 @@ static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal,
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromSchema)
#define ArrowArrayViewAllocateChildren \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateChildren)
+#define ArrowArrayViewAllocateDictionary \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateDictionary)
#define ArrowArrayViewSetLength \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength)
#define ArrowArrayViewSetArray \
@@ -1706,12 +1711,15 @@ ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
struct ArrowSchema* schema,
struct ArrowError* error);
-/// \brief Allocate the schema_view->children array
+/// \brief Allocate the array_view->children array
///
/// Includes the memory for each child struct ArrowArrayView
ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
int64_t n_children);
+/// \brief Allocate array_view->dictionary
+ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view);
+
/// \brief Set data-independent buffer sizes from length
void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length);
@@ -2484,11 +2492,15 @@ static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array)
}
}
- // Start building any child arrays
+ // Start building any child arrays or dictionaries
for (int64_t i = 0; i < array->n_children; i++) {
NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i]));
}
+ if (array->dictionary != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->dictionary));
+ }
+
return NANOARROW_OK;
}
@@ -2502,6 +2514,10 @@ static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) {
NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i]));
}
+ if (array->dictionary != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->dictionary));
+ }
+
return NANOARROW_OK;
}
diff --git a/dist/nanoarrow_ipc.c b/dist/nanoarrow_ipc.c
index dbd8651..d623feb 100644
--- a/dist/nanoarrow_ipc.c
+++ b/dist/nanoarrow_ipc.c
@@ -21492,6 +21492,8 @@ struct ArrowIpcBufferSource {
int64_t body_offset_bytes;
int64_t buffer_length_bytes;
enum ArrowIpcCompressionType codec;
+ enum ArrowType data_type;
+ int32_t element_size_bits;
int swap_endian;
};
@@ -21568,6 +21570,118 @@ static struct ArrowIpcBufferFactory ArrowIpcBufferFactoryFromShared(
return out;
}
+// Just for the purposes of endian-swapping
+struct ArrowIpcIntervalMonthDayNano {
+ uint32_t months;
+ uint32_t days;
+ uint64_t ns;
+};
+
+static int ArrowIpcDecoderSwapEndian(struct ArrowIpcBufferSource* src,
+ struct ArrowBufferView* out_view,
+ struct ArrowBuffer* dst, struct ArrowError* error) {
+ // Some buffer data types don't need any endian swapping
+ switch (src->data_type) {
+ case NANOARROW_TYPE_BOOL:
+ case NANOARROW_TYPE_INT8:
+ case NANOARROW_TYPE_UINT8:
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ return NANOARROW_OK;
+ default:
+ break;
+ }
+
+ // Make sure dst is not a shared buffer that we can't modify
+ struct ArrowBuffer tmp;
+ ArrowBufferInit(&tmp);
+
+ if (dst->allocator.private_data != NULL) {
+ ArrowBufferMove(dst, &tmp);
+ ArrowBufferInit(dst);
+ }
+
+ if (dst->size_bytes == 0) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(dst, out_view->size_bytes));
+ dst->size_bytes = out_view->size_bytes;
+ }
+
+ switch (src->data_type) {
+ case NANOARROW_TYPE_DECIMAL128:
+ case NANOARROW_TYPE_DECIMAL256: {
+ const uint64_t* ptr_src = out_view->data.as_uint64;
+ uint64_t* ptr_dst = (uint64_t*)dst->data;
+ uint64_t words[4];
+ int n_words = src->element_size_bits / 64;
+
+ for (int64_t i = 0; i < (dst->size_bytes / n_words / 8); i++) {
+ for (int j = 0; j < n_words; j++) {
+ words[j] = bswap64(ptr_src[i * n_words + j]);
+ }
+
+ for (int j = 0; j < n_words; j++) {
+ ptr_dst[i * n_words + j] = words[n_words - j - 1];
+ }
+ }
+ break;
+ }
+ case NANOARROW_TYPE_INTERVAL_DAY_TIME: {
+ uint32_t* ptr = (uint32_t*)dst->data;
+ for (int64_t i = 0; i < (dst->size_bytes / 4); i++) {
+ ptr[i] = bswap32(out_view->data.as_uint32[i]);
+ }
+ break;
+ }
+ case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: {
+ const uint8_t* ptr_src = out_view->data.as_uint8;
+ uint8_t* ptr_dst = dst->data;
+ int item_size_bytes = 16;
+ struct ArrowIpcIntervalMonthDayNano item;
+ for (int64_t i = 0; i < (dst->size_bytes / item_size_bytes); i++) {
+ memcpy(&item, ptr_src + i * item_size_bytes, item_size_bytes);
+ item.months = bswap32(item.months);
+ item.days = bswap32(item.days);
+ item.ns = bswap64(item.ns);
+ memcpy(ptr_dst + i * item_size_bytes, &item, item_size_bytes);
+ }
+ break;
+ }
+ default:
+ switch (src->element_size_bits) {
+ case 16: {
+ uint16_t* ptr = (uint16_t*)dst->data;
+ for (int64_t i = 0; i < (dst->size_bytes / 2); i++) {
+ ptr[i] = bswap16(out_view->data.as_uint16[i]);
+ }
+ break;
+ }
+ case 32: {
+ uint32_t* ptr = (uint32_t*)dst->data;
+ for (int64_t i = 0; i < (dst->size_bytes / 4); i++) {
+ ptr[i] = bswap32(out_view->data.as_uint32[i]);
+ }
+ break;
+ }
+ case 64: {
+ uint64_t* ptr = (uint64_t*)dst->data;
+ for (int64_t i = 0; i < (dst->size_bytes / 8); i++) {
+ ptr[i] = bswap64(out_view->data.as_uint64[i]);
+ }
+ break;
+ }
+ default:
+ ArrowErrorSet(error, "Endian swapping for element bitwidth %d is not supported",
+ (int)src->element_size_bits);
+ return ENOTSUP;
+ }
+ break;
+ }
+
+ ArrowBufferReset(&tmp);
+ out_view->data.data = dst->data;
+ return NANOARROW_OK;
+}
+
struct ArrowIpcArraySetter {
ns(FieldNode_vec_t) fields;
int64_t field_i;
@@ -21605,16 +21719,16 @@ static int ArrowIpcDecoderMakeBuffer(struct ArrowIpcArraySetter* setter, int64_t
return ENOTSUP;
}
- if (setter->src.swap_endian) {
- ArrowErrorSet(error,
- "The nanoarrow_ipc extension does not support non-system endianness");
- return ENOTSUP;
- }
-
setter->src.body_offset_bytes = offset;
setter->src.buffer_length_bytes = length;
NANOARROW_RETURN_NOT_OK(
setter->factory.make_buffer(&setter->factory, &setter->src, out_view, out, error));
+
+ if (setter->src.swap_endian) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowIpcDecoderSwapEndian(&setter->src, out_view, out, error));
+ }
+
return NANOARROW_OK;
}
@@ -21683,6 +21797,9 @@ static int ArrowIpcDecoderWalkSetArrayView(struct ArrowIpcArraySetter* setter,
buffer_dst->size_bytes = 0;
}
+ setter->src.data_type = array_view->layout.buffer_data_type[i];
+ setter->src.element_size_bits = array_view->layout.element_size_bits[i];
+
NANOARROW_RETURN_NOT_OK(
ArrowIpcDecoderMakeBuffer(setter, buffer_offset, buffer_length,
&array_view->buffer_views[i], buffer_dst, error));