You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "lidavidm (via GitHub)" <gi...@apache.org> on 2023/06/08 18:30:05 UTC

[GitHub] [arrow-nanoarrow] lidavidm commented on a diff in pull request #214: feat(extensions/nanoarrow_ipc): Add endian swapping to IPC reader

lidavidm commented on code in PR #214:
URL: https://github.com/apache/arrow-nanoarrow/pull/214#discussion_r1223407433


##########
extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c:
##########
@@ -1297,6 +1299,143 @@ static struct ArrowIpcBufferFactory ArrowIpcBufferFactoryFromShared(
   return out;
 }
 
+// Just for the purposes of endian-swapping (not data access)
+struct ArrowIpcDecimal128 {
+  uint64_t words[2];
+};
+
+struct ArrowIpcDecimal256 {
+  uint64_t words[4];
+};
+
+struct ArrowIpcMonthsDays {
+  uint32_t months;
+  uint32_t days;
+};
+
+struct ArrowIpcIntervalMonthDayNano {
+  union {
+    uint64_t force_align_uint64;
+    struct ArrowIpcMonthsDays months_days;
+  };
+  uint64_t ns;
+};
+
+static int ArrowIpcDecoderSwapEndian(struct ArrowIpcBufferSource* src,
+                                     struct ArrowBufferView* out_view,
+                                     struct ArrowBuffer* dst, struct ArrowError* error) {
+  // Some buffer data types don't need any endian swapping
+  switch (src->data_type) {
+    case NANOARROW_TYPE_BOOL:
+    case NANOARROW_TYPE_INT8:
+    case NANOARROW_TYPE_UINT8:
+    case NANOARROW_TYPE_STRING:

Review Comment:
   Hmm, we don't need to swap the offsets?



##########
extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c:
##########
@@ -1297,6 +1299,143 @@ static struct ArrowIpcBufferFactory ArrowIpcBufferFactoryFromShared(
   return out;
 }
 
+// Just for the purposes of endian-swapping (not data access)
+struct ArrowIpcDecimal128 {
+  uint64_t words[2];
+};
+
+struct ArrowIpcDecimal256 {
+  uint64_t words[4];
+};
+
+struct ArrowIpcMonthsDays {
+  uint32_t months;
+  uint32_t days;
+};
+
+struct ArrowIpcIntervalMonthDayNano {
+  union {
+    uint64_t force_align_uint64;
+    struct ArrowIpcMonthsDays months_days;
+  };
+  uint64_t ns;
+};
+
+static int ArrowIpcDecoderSwapEndian(struct ArrowIpcBufferSource* src,
+                                     struct ArrowBufferView* out_view,
+                                     struct ArrowBuffer* dst, struct ArrowError* error) {
+  // Some buffer data types don't need any endian swapping
+  switch (src->data_type) {
+    case NANOARROW_TYPE_BOOL:
+    case NANOARROW_TYPE_INT8:
+    case NANOARROW_TYPE_UINT8:
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_BINARY:
+      return NANOARROW_OK;
+    default:
+      break;
+  }
+
+  // Make sure dst is not a shared buffer that we can't modify
+  struct ArrowBuffer tmp;
+  ArrowBufferInit(&tmp);
+
+  if (dst->allocator.private_data != NULL) {
+    ArrowBufferMove(dst, &tmp);
+    ArrowBufferInit(dst);
+  }
+
+  if (dst->size_bytes == 0) {
+    NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(dst, out_view->size_bytes));
+    dst->size_bytes = out_view->size_bytes;
+  }
+
+  switch (src->data_type) {
+    case NANOARROW_TYPE_DECIMAL128: {
+      struct ArrowIpcDecimal128* ptr_src =
+          (struct ArrowIpcDecimal128*)out_view->data.data;
+      struct ArrowIpcDecimal128* ptr = (struct ArrowIpcDecimal128*)dst->data;
+      uint64_t words[2];
+      for (int64_t i = 0; i < (dst->size_bytes / 16); i++) {
+        words[0] = bswap64(ptr_src[i].words[0]);
+        words[1] = bswap64(ptr_src[i].words[1]);
+        ptr[i].words[0] = words[1];
+        ptr[i].words[1] = words[0];
+      }

Review Comment:
   I think the safe way to do this is to memcpy from the byte array to the struct and back. That said we already do casting to and from other types...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org