You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by gi...@apache.org on 2023/06/10 01:32:57 UTC

[arrow-nanoarrow] branch main updated: Update dist/ for commit c738f90e874ec38b2fa2acebd154a33da36dc9a4

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new e112613  Update dist/ for commit c738f90e874ec38b2fa2acebd154a33da36dc9a4
e112613 is described below

commit e1126138c267e4fdc2aa44da65463d78a2f737d6
Author: GitHub Actions <ac...@github.com>
AuthorDate: Sat Jun 10 01:32:51 2023 +0000

    Update dist/ for commit c738f90e874ec38b2fa2acebd154a33da36dc9a4
---
 dist/nanoarrow.c     | 127 ++++++++++++++++++++++++++++++++++++++++++++++----
 dist/nanoarrow.h     |  20 +++++++-
 dist/nanoarrow_ipc.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 259 insertions(+), 17 deletions(-)

diff --git a/dist/nanoarrow.c b/dist/nanoarrow.c
index 8c83f48..4ba74d9 100644
--- a/dist/nanoarrow.c
+++ b/dist/nanoarrow.c
@@ -1926,21 +1926,40 @@ ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array,
 ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
                                            struct ArrowArrayView* array_view,
                                            struct ArrowError* error) {
-  ArrowArrayInitFromType(array, array_view->storage_type);
+  NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+      ArrowArrayInitFromType(array, array_view->storage_type), error);
+  int result;
+
   struct ArrowArrayPrivateData* private_data =
       (struct ArrowArrayPrivateData*)array->private_data;
+  private_data->layout = array_view->layout;
 
-  int result = ArrowArrayAllocateChildren(array, array_view->n_children);
-  if (result != NANOARROW_OK) {
-    array->release(array);
-    return result;
+  if (array_view->n_children > 0) {
+    result = ArrowArrayAllocateChildren(array, array_view->n_children);
+    if (result != NANOARROW_OK) {
+      array->release(array);
+      return result;
+    }
+
+    for (int64_t i = 0; i < array_view->n_children; i++) {
+      result =
+          ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error);
+      if (result != NANOARROW_OK) {
+        array->release(array);
+        return result;
+      }
+    }
   }
 
-  private_data->layout = array_view->layout;
+  if (array_view->dictionary != NULL) {
+    result = ArrowArrayAllocateDictionary(array);
+    if (result != NANOARROW_OK) {
+      array->release(array);
+      return result;
+    }
 
-  for (int64_t i = 0; i < array_view->n_children; i++) {
-    int result =
-        ArrowArrayInitFromArrayView(array->children[i], array_view->children[i], error);
+    result =
+        ArrowArrayInitFromArrayView(array->dictionary, array_view->dictionary, error);
     if (result != NANOARROW_OK) {
       array->release(array);
       return result;
@@ -2079,6 +2098,20 @@ static ArrowErrorCode ArrowArrayViewInitFromArray(struct ArrowArrayView* array_v
     }
   }
 
+  if (array->dictionary != NULL) {
+    result = ArrowArrayViewAllocateDictionary(array_view);
+    if (result != NANOARROW_OK) {
+      ArrowArrayViewReset(array_view);
+      return result;
+    }
+
+    result = ArrowArrayViewInitFromArray(array_view->dictionary, array->dictionary);
+    if (result != NANOARROW_OK) {
+      ArrowArrayViewReset(array_view);
+      return result;
+    }
+  }
+
   return NANOARROW_OK;
 }
 
@@ -2151,6 +2184,10 @@ static ArrowErrorCode ArrowArrayFinalizeBuffers(struct ArrowArray* array) {
     NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->children[i]));
   }
 
+  if (array->dictionary != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->dictionary));
+  }
+
   return NANOARROW_OK;
 }
 
@@ -2165,6 +2202,10 @@ static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) {
   for (int64_t i = 0; i < array->n_children; i++) {
     ArrowArrayFlushInternalPointers(array->children[i]);
   }
+
+  if (array->dictionary != NULL) {
+    ArrowArrayFlushInternalPointers(array->dictionary);
+  }
 }
 
 ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
@@ -2236,6 +2277,21 @@ ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
   return NANOARROW_OK;
 }
 
+ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view) {
+  if (array_view->dictionary != NULL) {
+    return EINVAL;
+  }
+
+  array_view->dictionary =
+      (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView));
+  if (array_view->dictionary == NULL) {
+    return ENOMEM;
+  }
+
+  ArrowArrayViewInitFromType(array_view->dictionary, NANOARROW_TYPE_UNINITIALIZED);
+  return NANOARROW_OK;
+}
+
 ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
                                             struct ArrowSchema* schema,
                                             struct ArrowError* error) {
@@ -2264,6 +2320,21 @@ ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
     }
   }
 
+  if (schema->dictionary != NULL) {
+    result = ArrowArrayViewAllocateDictionary(array_view);
+    if (result != NANOARROW_OK) {
+      ArrowArrayViewReset(array_view);
+      return result;
+    }
+
+    result =
+        ArrowArrayViewInitFromSchema(array_view->dictionary, schema->dictionary, error);
+    if (result != NANOARROW_OK) {
+      ArrowArrayViewReset(array_view);
+      return result;
+    }
+  }
+
   if (array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION ||
       array_view->storage_type == NANOARROW_TYPE_DENSE_UNION) {
     array_view->union_type_id_map = (int8_t*)ArrowMalloc(256 * sizeof(int8_t));
@@ -2295,6 +2366,11 @@ void ArrowArrayViewReset(struct ArrowArrayView* array_view) {
     ArrowFree(array_view->children);
   }
 
+  if (array_view->dictionary != NULL) {
+    ArrowArrayViewReset(array_view->dictionary);
+    ArrowFree(array_view->dictionary);
+  }
+
   if (array_view->union_type_id_map != NULL) {
     ArrowFree(array_view->union_type_id_map);
   }
@@ -2410,6 +2486,22 @@ static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
                                                            array->children[i], error));
   }
 
+  // Check dictionary
+  if (array->dictionary == NULL && array_view->dictionary != NULL) {
+    ArrowErrorSet(error, "Expected dictionary but found NULL");
+    return EINVAL;
+  }
+
+  if (array->dictionary != NULL && array_view->dictionary == NULL) {
+    ArrowErrorSet(error, "Expected NULL dictionary but found dictionary member");
+    return EINVAL;
+  }
+
+  if (array->dictionary != NULL) {
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayViewSetArrayInternal(array_view->dictionary, array->dictionary, error));
+  }
+
   return NANOARROW_OK;
 }
 
@@ -2525,6 +2617,11 @@ static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
         ArrowArrayViewValidateMinimal(array_view->children[i], error));
   }
 
+  // Recurse for dictionary
+  if (array_view->dictionary != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view->dictionary, error));
+  }
+
   return NANOARROW_OK;
 }
 
@@ -2661,6 +2758,11 @@ static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view,
         ArrowArrayViewValidateDefault(array_view->children[i], error));
   }
 
+  // Recurse for dictionary
+  if (array_view->dictionary != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view->dictionary, error));
+  }
+
   return NANOARROW_OK;
 }
 
@@ -2805,10 +2907,17 @@ static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
     }
   }
 
+  // Recurse for children
   for (int64_t i = 0; i < array_view->n_children; i++) {
     NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i], error));
   }
 
+  // Dictionary valiation not implemented
+  if (array_view->dictionary != NULL) {
+    ArrowErrorSet(error, "Validation for dictionary-encoded arrays is not implemented");
+    return ENOTSUP;
+  }
+
   return NANOARROW_OK;
 }
 
diff --git a/dist/nanoarrow.h b/dist/nanoarrow.h
index db71117..7239385 100644
--- a/dist/nanoarrow.h
+++ b/dist/nanoarrow.h
@@ -640,6 +640,9 @@ struct ArrowArrayView {
   /// \brief Pointers to views of this array's children
   struct ArrowArrayView** children;
 
+  /// \brief Pointer to a view of this array's dictionary
+  struct ArrowArrayView* dictionary;
+
   /// \brief Union type id to child index mapping
   ///
   /// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer
@@ -879,6 +882,8 @@ static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal,
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromSchema)
 #define ArrowArrayViewAllocateChildren \
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateChildren)
+#define ArrowArrayViewAllocateDictionary \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateDictionary)
 #define ArrowArrayViewSetLength \
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength)
 #define ArrowArrayViewSetArray \
@@ -1706,12 +1711,15 @@ ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
                                             struct ArrowSchema* schema,
                                             struct ArrowError* error);
 
-/// \brief Allocate the schema_view->children array
+/// \brief Allocate the array_view->children array
 ///
 /// Includes the memory for each child struct ArrowArrayView
 ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view,
                                               int64_t n_children);
 
+/// \brief Allocate array_view->dictionary
+ArrowErrorCode ArrowArrayViewAllocateDictionary(struct ArrowArrayView* array_view);
+
 /// \brief Set data-independent buffer sizes from length
 void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length);
 
@@ -2484,11 +2492,15 @@ static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array)
     }
   }
 
-  // Start building any child arrays
+  // Start building any child arrays or dictionaries
   for (int64_t i = 0; i < array->n_children; i++) {
     NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i]));
   }
 
+  if (array->dictionary != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->dictionary));
+  }
+
   return NANOARROW_OK;
 }
 
@@ -2502,6 +2514,10 @@ static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) {
     NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i]));
   }
 
+  if (array->dictionary != NULL) {
+    NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->dictionary));
+  }
+
   return NANOARROW_OK;
 }
 
diff --git a/dist/nanoarrow_ipc.c b/dist/nanoarrow_ipc.c
index dbd8651..d623feb 100644
--- a/dist/nanoarrow_ipc.c
+++ b/dist/nanoarrow_ipc.c
@@ -21492,6 +21492,8 @@ struct ArrowIpcBufferSource {
   int64_t body_offset_bytes;
   int64_t buffer_length_bytes;
   enum ArrowIpcCompressionType codec;
+  enum ArrowType data_type;
+  int32_t element_size_bits;
   int swap_endian;
 };
 
@@ -21568,6 +21570,118 @@ static struct ArrowIpcBufferFactory ArrowIpcBufferFactoryFromShared(
   return out;
 }
 
+// Just for the purposes of endian-swapping
+struct ArrowIpcIntervalMonthDayNano {
+  uint32_t months;
+  uint32_t days;
+  uint64_t ns;
+};
+
+static int ArrowIpcDecoderSwapEndian(struct ArrowIpcBufferSource* src,
+                                     struct ArrowBufferView* out_view,
+                                     struct ArrowBuffer* dst, struct ArrowError* error) {
+  // Some buffer data types don't need any endian swapping
+  switch (src->data_type) {
+    case NANOARROW_TYPE_BOOL:
+    case NANOARROW_TYPE_INT8:
+    case NANOARROW_TYPE_UINT8:
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_BINARY:
+      return NANOARROW_OK;
+    default:
+      break;
+  }
+
+  // Make sure dst is not a shared buffer that we can't modify
+  struct ArrowBuffer tmp;
+  ArrowBufferInit(&tmp);
+
+  if (dst->allocator.private_data != NULL) {
+    ArrowBufferMove(dst, &tmp);
+    ArrowBufferInit(dst);
+  }
+
+  if (dst->size_bytes == 0) {
+    NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(dst, out_view->size_bytes));
+    dst->size_bytes = out_view->size_bytes;
+  }
+
+  switch (src->data_type) {
+    case NANOARROW_TYPE_DECIMAL128:
+    case NANOARROW_TYPE_DECIMAL256: {
+      const uint64_t* ptr_src = out_view->data.as_uint64;
+      uint64_t* ptr_dst = (uint64_t*)dst->data;
+      uint64_t words[4];
+      int n_words = src->element_size_bits / 64;
+
+      for (int64_t i = 0; i < (dst->size_bytes / n_words / 8); i++) {
+        for (int j = 0; j < n_words; j++) {
+          words[j] = bswap64(ptr_src[i * n_words + j]);
+        }
+
+        for (int j = 0; j < n_words; j++) {
+          ptr_dst[i * n_words + j] = words[n_words - j - 1];
+        }
+      }
+      break;
+    }
+    case NANOARROW_TYPE_INTERVAL_DAY_TIME: {
+      uint32_t* ptr = (uint32_t*)dst->data;
+      for (int64_t i = 0; i < (dst->size_bytes / 4); i++) {
+        ptr[i] = bswap32(out_view->data.as_uint32[i]);
+      }
+      break;
+    }
+    case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: {
+      const uint8_t* ptr_src = out_view->data.as_uint8;
+      uint8_t* ptr_dst = dst->data;
+      int item_size_bytes = 16;
+      struct ArrowIpcIntervalMonthDayNano item;
+      for (int64_t i = 0; i < (dst->size_bytes / item_size_bytes); i++) {
+        memcpy(&item, ptr_src + i * item_size_bytes, item_size_bytes);
+        item.months = bswap32(item.months);
+        item.days = bswap32(item.days);
+        item.ns = bswap64(item.ns);
+        memcpy(ptr_dst + i * item_size_bytes, &item, item_size_bytes);
+      }
+      break;
+    }
+    default:
+      switch (src->element_size_bits) {
+        case 16: {
+          uint16_t* ptr = (uint16_t*)dst->data;
+          for (int64_t i = 0; i < (dst->size_bytes / 2); i++) {
+            ptr[i] = bswap16(out_view->data.as_uint16[i]);
+          }
+          break;
+        }
+        case 32: {
+          uint32_t* ptr = (uint32_t*)dst->data;
+          for (int64_t i = 0; i < (dst->size_bytes / 4); i++) {
+            ptr[i] = bswap32(out_view->data.as_uint32[i]);
+          }
+          break;
+        }
+        case 64: {
+          uint64_t* ptr = (uint64_t*)dst->data;
+          for (int64_t i = 0; i < (dst->size_bytes / 8); i++) {
+            ptr[i] = bswap64(out_view->data.as_uint64[i]);
+          }
+          break;
+        }
+        default:
+          ArrowErrorSet(error, "Endian swapping for element bitwidth %d is not supported",
+                        (int)src->element_size_bits);
+          return ENOTSUP;
+      }
+      break;
+  }
+
+  ArrowBufferReset(&tmp);
+  out_view->data.data = dst->data;
+  return NANOARROW_OK;
+}
+
 struct ArrowIpcArraySetter {
   ns(FieldNode_vec_t) fields;
   int64_t field_i;
@@ -21605,16 +21719,16 @@ static int ArrowIpcDecoderMakeBuffer(struct ArrowIpcArraySetter* setter, int64_t
     return ENOTSUP;
   }
 
-  if (setter->src.swap_endian) {
-    ArrowErrorSet(error,
-                  "The nanoarrow_ipc extension does not support non-system endianness");
-    return ENOTSUP;
-  }
-
   setter->src.body_offset_bytes = offset;
   setter->src.buffer_length_bytes = length;
   NANOARROW_RETURN_NOT_OK(
       setter->factory.make_buffer(&setter->factory, &setter->src, out_view, out, error));
+
+  if (setter->src.swap_endian) {
+    NANOARROW_RETURN_NOT_OK(
+        ArrowIpcDecoderSwapEndian(&setter->src, out_view, out, error));
+  }
+
   return NANOARROW_OK;
 }
 
@@ -21683,6 +21797,9 @@ static int ArrowIpcDecoderWalkSetArrayView(struct ArrowIpcArraySetter* setter,
       buffer_dst->size_bytes = 0;
     }
 
+    setter->src.data_type = array_view->layout.buffer_data_type[i];
+    setter->src.element_size_bits = array_view->layout.element_size_bits[i];
+
     NANOARROW_RETURN_NOT_OK(
         ArrowIpcDecoderMakeBuffer(setter, buffer_offset, buffer_length,
                                   &array_view->buffer_views[i], buffer_dst, error));