You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pa...@apache.org on 2022/08/11 12:35:57 UTC

[arrow-nanoarrow] branch main updated: Buffer element appenders (#17)

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 3b30507  Buffer element appenders (#17)
3b30507 is described below

commit 3b305075d2c4c8489ac0e6288edb58a52a64884d
Author: Dewey Dunnington <de...@fishandwhistle.net>
AuthorDate: Thu Aug 11 09:35:52 2022 -0300

    Buffer element appenders (#17)
    
    * element appenders
    
    * don't do unaligned access in tests, add minimum functions to do a build-by-buffer
    
    * remove copied bit from test-build-by-buffer
---
 src/nanoarrow/array_inline.h  | 77 +++++++++++++++++++++++++++++++++++++++++++
 src/nanoarrow/array_test.cc   | 43 +++++++++++++++++++++++-
 src/nanoarrow/buffer_inline.h | 50 ++++++++++++++++++++++++++++
 src/nanoarrow/buffer_test.cc  | 45 +++++++++++++++++++++++++
 src/nanoarrow/nanoarrow.h     | 57 ++++++++++++++++++++++++++++++++
 5 files changed, 271 insertions(+), 1 deletion(-)

diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
new file mode 100644
index 0000000..7b74534
--- /dev/null
+++ b/src/nanoarrow/array_inline.h
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_ARRAY_INLINE_H_INCLUDED
+#define NANOARROW_ARRAY_INLINE_H_INCLUDED
+
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "bitmap_inline.h"
+#include "buffer_inline.h"
+#include "typedefs_inline.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+  return &private_data->bitmap;
+}
+
+static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+  switch (i) {
+    case 0:
+      return &private_data->bitmap.buffer;
+    default:
+      return private_data->buffers + i - 1;
+  }
+}
+
+static inline ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
+                                                      char shrink_to_fit) {
+  struct ArrowArrayPrivateData* private_data =
+      (struct ArrowArrayPrivateData*)array->private_data;
+
+  // Make sure the value we get with array->buffers[i] is set to the actual
+  // pointer (which may have changed from the original due to reallocation)
+  int result;
+  for (int64_t i = 0; i < 3; i++) {
+    struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
+    if (shrink_to_fit) {
+      result = ArrowBufferResize(buffer, buffer->size_bytes, shrink_to_fit);
+      if (result != NANOARROW_OK) {
+        return result;
+      }
+    }
+
+    private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
+  }
+
+  return NANOARROW_OK;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index d61047c..4d32837 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -95,13 +95,14 @@ TEST(ArrayTest, ArrayTestSetBitmap) {
   const uint8_t* bitmap_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
   EXPECT_EQ(bitmap_buffer[0], 0xff);
   EXPECT_EQ(bitmap_buffer[1], 0x01);
+  EXPECT_EQ(ArrowArrayValidityBitmap(&array)->buffer.data, array.buffers[0]);
 
   array.release(&array);
 }
 
 TEST(ArrayTest, ArrayTestSetBuffer) {
   // the array ["a", null, "bc", null, "def", null, "ghij"]
-  uint8_t validity_bitmap[] = {0x05};
+  uint8_t validity_bitmap[] = {0x55};
   int32_t offsets[] = {0, 1, 1, 3, 3, 6, 6, 10, 10};
   const char* data = "abcdefghij";
 
@@ -123,8 +124,48 @@ TEST(ArrayTest, ArrayTestSetBuffer) {
   EXPECT_EQ(memcmp(array.buffers[1], offsets, 8 * sizeof(int32_t)), 0);
   EXPECT_EQ(memcmp(array.buffers[2], data, 10), 0);
 
+  EXPECT_EQ(ArrowArrayBuffer(&array, 0)->data, array.buffers[0]);
+  EXPECT_EQ(ArrowArrayBuffer(&array, 1)->data, array.buffers[1]);
+  EXPECT_EQ(ArrowArrayBuffer(&array, 2)->data, array.buffers[2]);
+
   // try to set a buffer that isn't, 0, 1, or 2
   EXPECT_EQ(ArrowArraySetBuffer(&array, 3, &buffer0), EINVAL);
 
   array.release(&array);
 }
+
+TEST(ArrayTest, ArrayTestBuildByBuffer) {
+  // the array ["a", null, "bc", null, "def", null, "ghij"]
+  uint8_t validity_bitmap[] = {0x55};
+  int8_t validity_array[] = {1, 0, 1, 0, 1, 0, 1};
+  int32_t offsets[] = {0, 1, 1, 3, 3, 6, 6, 10, 10};
+  const char* data = "abcdefghij";
+
+  struct ArrowArray array;
+  ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_STRING), NANOARROW_OK);
+
+  ASSERT_EQ(ArrowBitmapReserve(ArrowArrayValidityBitmap(&array), 100), NANOARROW_OK);
+  ArrowBitmapAppendInt8Unsafe(ArrowArrayValidityBitmap(&array), validity_array, 7);
+
+  ASSERT_EQ(ArrowBufferReserve(ArrowArrayBuffer(&array, 1), 100), NANOARROW_OK);
+  ArrowBufferAppendUnsafe(ArrowArrayBuffer(&array, 1), offsets, 8 * sizeof(int32_t));
+
+  ASSERT_EQ(ArrowBufferReserve(ArrowArrayBuffer(&array, 2), 100), NANOARROW_OK);
+  ArrowBufferAppendUnsafe(ArrowArrayBuffer(&array, 2), data, 10);
+
+  EXPECT_EQ(ArrowArrayFinishBuilding(&array, true), NANOARROW_OK);
+
+  EXPECT_EQ(memcmp(array.buffers[0], validity_bitmap, 1), 0);
+  EXPECT_EQ(memcmp(array.buffers[1], offsets, 8 * sizeof(int32_t)), 0);
+  EXPECT_EQ(memcmp(array.buffers[2], data, 10), 0);
+
+  EXPECT_EQ(ArrowArrayBuffer(&array, 0)->data, array.buffers[0]);
+  EXPECT_EQ(ArrowArrayBuffer(&array, 1)->data, array.buffers[1]);
+  EXPECT_EQ(ArrowArrayBuffer(&array, 2)->data, array.buffers[2]);
+
+  EXPECT_EQ(ArrowArrayBuffer(&array, 0)->size_bytes, 1);
+  EXPECT_EQ(ArrowArrayBuffer(&array, 1)->size_bytes, 8 * sizeof(int32_t));
+  EXPECT_EQ(ArrowArrayBuffer(&array, 2)->size_bytes, 10);
+
+  array.release(&array);
+}
diff --git a/src/nanoarrow/buffer_inline.h b/src/nanoarrow/buffer_inline.h
index a560304..5400a1b 100644
--- a/src/nanoarrow/buffer_inline.h
+++ b/src/nanoarrow/buffer_inline.h
@@ -129,6 +129,56 @@ static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer,
   return NANOARROW_OK;
 }
 
+static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer,
+                                                   int8_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(int8_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer,
+                                                    uint8_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(uint8_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer,
+                                                    int16_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(int16_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer,
+                                                     uint16_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(uint16_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer,
+                                                    int32_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(int32_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer,
+                                                     uint32_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(uint32_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer,
+                                                    int64_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(int64_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer,
+                                                     uint64_t value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(uint64_t));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer,
+                                                     double value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(double));
+}
+
+static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer,
+                                                    float value) {
+  return ArrowBufferAppend(buffer, &value, sizeof(float));
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/nanoarrow/buffer_test.cc b/src/nanoarrow/buffer_test.cc
index 42c4a85..1c7946b 100644
--- a/src/nanoarrow/buffer_test.cc
+++ b/src/nanoarrow/buffer_test.cc
@@ -160,3 +160,48 @@ TEST(BufferTest, BufferTestError) {
 
   ArrowBufferReset(&buffer);
 }
+
+TEST(BufferTest, BufferTestAppendHelpers) {
+  struct ArrowBuffer buffer;
+  ArrowBufferInit(&buffer);
+
+  EXPECT_EQ(ArrowBufferAppendInt8(&buffer, 123), NANOARROW_OK);
+  EXPECT_EQ(reinterpret_cast<int8_t*>(buffer.data)[0], 123);
+  ArrowBufferReset(&buffer);
+
+  EXPECT_EQ(ArrowBufferAppendUInt8(&buffer, 123), NANOARROW_OK);
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(buffer.data)[0], 123);
+  ArrowBufferReset(&buffer);
+
+  EXPECT_EQ(ArrowBufferAppendInt16(&buffer, 123), NANOARROW_OK);
+  EXPECT_EQ(reinterpret_cast<int16_t*>(buffer.data)[0], 123);
+  ArrowBufferReset(&buffer);
+
+  EXPECT_EQ(ArrowBufferAppendUInt16(&buffer, 123), NANOARROW_OK);
+  EXPECT_EQ(reinterpret_cast<uint16_t*>(buffer.data)[0], 123);
+  ArrowBufferReset(&buffer);
+
+  EXPECT_EQ(ArrowBufferAppendInt32(&buffer, 123), NANOARROW_OK);
+  EXPECT_EQ(reinterpret_cast<int32_t*>(buffer.data)[0], 123);
+  ArrowBufferReset(&buffer);
+
+  EXPECT_EQ(ArrowBufferAppendUInt32(&buffer, 123), NANOARROW_OK);
+  EXPECT_EQ(reinterpret_cast<uint32_t*>(buffer.data)[0], 123);
+  ArrowBufferReset(&buffer);
+
+  EXPECT_EQ(ArrowBufferAppendInt64(&buffer, 123), NANOARROW_OK);
+  EXPECT_EQ(reinterpret_cast<int64_t*>(buffer.data)[0], 123);
+  ArrowBufferReset(&buffer);
+
+  EXPECT_EQ(ArrowBufferAppendUInt64(&buffer, 123), NANOARROW_OK);
+  EXPECT_EQ(reinterpret_cast<uint64_t*>(buffer.data)[0], 123);
+  ArrowBufferReset(&buffer);
+
+  EXPECT_EQ(ArrowBufferAppendDouble(&buffer, 123), NANOARROW_OK);
+  EXPECT_EQ(reinterpret_cast<double*>(buffer.data)[0], 123);
+  ArrowBufferReset(&buffer);
+
+  EXPECT_EQ(ArrowBufferAppendFloat(&buffer, 123), NANOARROW_OK);
+  EXPECT_EQ(reinterpret_cast<float*>(buffer.data)[0], 123);
+  ArrowBufferReset(&buffer);
+}
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index a6dbe89..c432e60 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -399,6 +399,46 @@ static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const voi
 static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer,
                                                const void* data, int64_t size_bytes);
 
+/// \brief Write an 8-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer,
+                                                   int8_t value);
+
+/// \brief Write an unsigned 8-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer,
+                                                    uint8_t value);
+
+/// \brief Write a 16-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer,
+                                                    int16_t value);
+
+/// \brief Write an unsigned 16-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer,
+                                                     uint16_t value);
+
+/// \brief Write a 32-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer,
+                                                    int32_t value);
+
+/// \brief Write an unsigned 32-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer,
+                                                     uint32_t value);
+
+/// \brief Write a 64-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer,
+                                                    int64_t value);
+
+/// \brief Write an unsigned 64-bit integer to a buffer
+static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer,
+                                                     uint64_t value);
+
+/// \brief Write a double to a buffer
+static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer,
+                                                     double value);
+
+/// \brief Write a float to a buffer
+static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer,
+                                                    float value);
+
 /// }@
 
 /// \defgroup nanoarrow-bitmap Bitmap utilities
@@ -508,9 +548,26 @@ void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* b
 ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i,
                                    struct ArrowBuffer* buffer);
 
+/// \brief Get the validity bitmap of an ArrowArray
+///
+/// array must have been allocated using ArrowArrayInit
+static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array);
+
+/// \brief Get a buffer of an ArrowArray
+///
+/// array must have been allocated using ArrowArrayInit
+static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i);
+
+/// \brief Finish building an ArrowArray
+///
+/// array must have been allocated using ArrowArrayInit
+static inline ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
+                                                      char shrink_to_fit);
+
 /// }@
 
 // Inline function definitions
+#include "array_inline.h"
 #include "bitmap_inline.h"
 #include "buffer_inline.h"
 #include "utils_inline.h"