You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pa...@apache.org on 2022/08/22 16:49:27 UTC

[arrow-nanoarrow] branch main updated: Implement getters (#26)

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 6c44081  Implement getters (#26)
6c44081 is described below

commit 6c440819101eabae4bb4c1e522aefcc48e18dac6
Author: Dewey Dunnington <de...@fishandwhistle.net>
AuthorDate: Mon Aug 22 13:49:22 2022 -0300

    Implement getters (#26)
    
    * sketch getter API
    
    * re-sketch getter API
    
    * test getters
    
    * document getters
---
 CMakeLists.txt                   |   2 +-
 src/nanoarrow/array_inline.h     | 173 +++++++++++++++++++++++++++++++++++++++
 src/nanoarrow/array_view_test.cc | 104 +++++++++++++++++++++++
 src/nanoarrow/nanoarrow.h        |  36 ++++++++
 src/nanoarrow/typedefs_inline.h  |   3 +
 5 files changed, 317 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 12fe19a..d1c447f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -79,7 +79,7 @@ if (NANOARROW_BUILD_TESTS)
     target_link_libraries(buffer_test nanoarrow GTest::gtest_main)
     target_link_libraries(bitmap_test nanoarrow GTest::gtest_main)
     target_link_libraries(array_test nanoarrow GTest::gtest_main arrow_shared arrow_testing_shared)
-    target_link_libraries(array_view_test nanoarrow GTest::gtest_main)
+    target_link_libraries(array_view_test nanoarrow GTest::gtest_main arrow_shared arrow_testing_shared)
     target_link_libraries(error_test nanoarrow GTest::gtest_main)
     target_link_libraries(metadata_test nanoarrow GTest::gtest_main arrow_shared arrow_testing_shared)
     target_link_libraries(schema_test nanoarrow GTest::gtest_main arrow_shared arrow_testing_shared)
diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index 14335a8..ce4ceac 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -445,6 +445,179 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) {
   return NANOARROW_OK;
 }
 
+static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i) {
+  const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8;
+  i += array_view->array->offset;
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_NA:
+      return 0x01;
+    case NANOARROW_TYPE_DENSE_UNION:
+    case NANOARROW_TYPE_SPARSE_UNION:
+      // Not supported yet
+      return 0xff;
+    default:
+      return validity_buffer != NULL && !ArrowBitGet(validity_buffer, i);
+  }
+}
+
+static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view,
+                                                 int64_t i) {
+  struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+  i += array_view->array->offset;
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_INT64:
+      return data_view->data.as_int64[i];
+    case NANOARROW_TYPE_UINT64:
+      return data_view->data.as_uint64[i];
+    case NANOARROW_TYPE_INT32:
+      return data_view->data.as_int32[i];
+    case NANOARROW_TYPE_UINT32:
+      return data_view->data.as_uint32[i];
+    case NANOARROW_TYPE_INT16:
+      return data_view->data.as_int16[i];
+    case NANOARROW_TYPE_UINT16:
+      return data_view->data.as_uint16[i];
+    case NANOARROW_TYPE_INT8:
+      return data_view->data.as_int8[i];
+    case NANOARROW_TYPE_UINT8:
+      return data_view->data.as_uint8[i];
+    case NANOARROW_TYPE_DOUBLE:
+      return data_view->data.as_double[i];
+    case NANOARROW_TYPE_FLOAT:
+      return data_view->data.as_float[i];
+    case NANOARROW_TYPE_BOOL:
+      return ArrowBitGet(data_view->data.as_uint8, i);
+    default:
+      return INT64_MAX;
+  }
+}
+
+static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view,
+                                                   int64_t i) {
+  i += array_view->array->offset;
+  struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_INT64:
+      return data_view->data.as_int64[i];
+    case NANOARROW_TYPE_UINT64:
+      return data_view->data.as_uint64[i];
+    case NANOARROW_TYPE_INT32:
+      return data_view->data.as_int32[i];
+    case NANOARROW_TYPE_UINT32:
+      return data_view->data.as_uint32[i];
+    case NANOARROW_TYPE_INT16:
+      return data_view->data.as_int16[i];
+    case NANOARROW_TYPE_UINT16:
+      return data_view->data.as_uint16[i];
+    case NANOARROW_TYPE_INT8:
+      return data_view->data.as_int8[i];
+    case NANOARROW_TYPE_UINT8:
+      return data_view->data.as_uint8[i];
+    case NANOARROW_TYPE_DOUBLE:
+      return data_view->data.as_double[i];
+    case NANOARROW_TYPE_FLOAT:
+      return data_view->data.as_float[i];
+    case NANOARROW_TYPE_BOOL:
+      return ArrowBitGet(data_view->data.as_uint8, i);
+    default:
+      return UINT64_MAX;
+  }
+}
+
+static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view,
+                                                   int64_t i) {
+  i += array_view->array->offset;
+  struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_INT64:
+      return data_view->data.as_int64[i];
+    case NANOARROW_TYPE_UINT64:
+      return data_view->data.as_uint64[i];
+    case NANOARROW_TYPE_INT32:
+      return data_view->data.as_int32[i];
+    case NANOARROW_TYPE_UINT32:
+      return data_view->data.as_uint32[i];
+    case NANOARROW_TYPE_INT16:
+      return data_view->data.as_int16[i];
+    case NANOARROW_TYPE_UINT16:
+      return data_view->data.as_uint16[i];
+    case NANOARROW_TYPE_INT8:
+      return data_view->data.as_int8[i];
+    case NANOARROW_TYPE_UINT8:
+      return data_view->data.as_uint8[i];
+    case NANOARROW_TYPE_DOUBLE:
+      return data_view->data.as_double[i];
+    case NANOARROW_TYPE_FLOAT:
+      return data_view->data.as_float[i];
+    case NANOARROW_TYPE_BOOL:
+      return ArrowBitGet(data_view->data.as_uint8, i);
+    default:
+      return DBL_MAX;
+  }
+}
+
+static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
+    struct ArrowArrayView* array_view, int64_t i) {
+  i += array_view->array->offset;
+  struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
+  const char* data_view = array_view->buffer_views[2].data.as_char;
+
+  struct ArrowStringView view;
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_BINARY:
+      view.data = data_view + offsets_view->data.as_int32[i];
+      view.n_bytes = offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i];
+      break;
+    case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_LARGE_BINARY:
+      view.data = data_view + offsets_view->data.as_int64[i];
+      view.n_bytes = offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i];
+      break;
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+      view.n_bytes = array_view->layout.element_size_bits[1] / 8;
+      view.data = array_view->buffer_views[1].data.as_char + (i * view.n_bytes);
+      break;
+    default:
+      view.data = NULL;
+      view.n_bytes = 0;
+      break;
+  }
+
+  return view;
+}
+
+static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
+    struct ArrowArrayView* array_view, int64_t i) {
+  i += array_view->array->offset;
+  struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
+  const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8;
+
+  struct ArrowBufferView view;
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_BINARY:
+      view.n_bytes = offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i];
+      view.data.as_uint8 = data_view + offsets_view->data.as_int32[i];
+      break;
+    case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_LARGE_BINARY:
+      view.n_bytes = offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i];
+      view.data.as_uint8 = data_view + offsets_view->data.as_int64[i];
+      break;
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+      view.n_bytes = array_view->layout.element_size_bits[1] / 8;
+      view.data.as_uint8 = array_view->buffer_views[1].data.as_uint8 + (i * view.n_bytes);
+      break;
+    default:
+      view.data.data = NULL;
+      view.n_bytes = 0;
+      break;
+  }
+
+  return view;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/nanoarrow/array_view_test.cc b/src/nanoarrow/array_view_test.cc
index f54eed5..cb6cdac 100644
--- a/src/nanoarrow/array_view_test.cc
+++ b/src/nanoarrow/array_view_test.cc
@@ -17,8 +17,14 @@
 
 #include <gtest/gtest.h>
 
+#include <arrow/array.h>
+#include <arrow/c/bridge.h>
+#include <arrow/testing/gtest_util.h>
+
 #include "nanoarrow/nanoarrow.h"
 
+using namespace arrow;
+
 TEST(ArrayTest, ArrayViewTestBasic) {
   struct ArrowArrayView array_view;
   struct ArrowError error;
@@ -354,3 +360,101 @@ TEST(ArrayTest, ArrayViewTestFixedSizeListArray) {
   schema.release(&schema);
   array.release(&array);
 }
+
+void TestGetFromNumericArrayView(const std::shared_ptr<DataType>& data_type) {
+  struct ArrowArray array;
+  struct ArrowSchema schema;
+  struct ArrowArrayView array_view;
+  struct ArrowError error;
+
+  // Array with nulls
+  auto arrow_array = ArrayFromJSON(data_type, "[1, null, null, 4]");
+  ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
+  ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+
+  EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 2), 1);
+  EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 3), 0);
+
+  EXPECT_EQ(ArrowArrayViewGetIntUnsafe(&array_view, 3), 4);
+  EXPECT_EQ(ArrowArrayViewGetUIntUnsafe(&array_view, 3), 4);
+  EXPECT_EQ(ArrowArrayViewGetDoubleUnsafe(&array_view, 3), 4.0);
+
+  auto string_view = ArrowArrayViewGetStringUnsafe(&array_view, 0);
+  EXPECT_EQ(string_view.data, nullptr);
+  EXPECT_EQ(string_view.n_bytes, 0);
+  auto buffer_view = ArrowArrayViewGetBytesUnsafe(&array_view, 0);
+  EXPECT_EQ(buffer_view.data.data, nullptr);
+  EXPECT_EQ(buffer_view.n_bytes, 0);
+
+  ArrowArrayViewReset(&array_view);
+  array.release(&array);
+  schema.release(&schema);
+
+  // Array without nulls (Arrow does not allocate the validity buffer)
+  arrow_array = ArrayFromJSON(data_type, "[1, 2]");
+  ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
+  ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+
+  // We're trying to test behavior with no validity buffer, so make sure that's true
+  ASSERT_EQ(array_view.buffer_views[0].data.data, nullptr);
+
+  EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 0), 0);
+  EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 1), 0);
+
+  EXPECT_EQ(ArrowArrayViewGetIntUnsafe(&array_view, 0), 1);
+  EXPECT_EQ(ArrowArrayViewGetUIntUnsafe(&array_view, 1), 2);
+
+  ArrowArrayViewReset(&array_view);
+  array.release(&array);
+  schema.release(&schema);
+}
+
+TEST(ArrayViewTest, ArrayViewTestGetNumeric) {
+  TestGetFromNumericArrayView(int64());
+  TestGetFromNumericArrayView(uint64());
+  TestGetFromNumericArrayView(int32());
+  TestGetFromNumericArrayView(uint32());
+  TestGetFromNumericArrayView(int16());
+  TestGetFromNumericArrayView(uint16());
+  TestGetFromNumericArrayView(int8());
+  TestGetFromNumericArrayView(uint8());
+  TestGetFromNumericArrayView(float64());
+  TestGetFromNumericArrayView(float32());
+}
+
+void TestGetFromBinary(const std::shared_ptr<DataType>& data_type) {
+  struct ArrowArray array;
+  struct ArrowSchema schema;
+  struct ArrowArrayView array_view;
+  struct ArrowError error;
+
+  auto arrow_array = ArrayFromJSON(data_type, "[\"1234\", null, null, \"four\"]");
+  ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
+  ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+
+  EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 2), 1);
+  EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 3), 0);
+
+  auto string_view = ArrowArrayViewGetStringUnsafe(&array_view, 3);
+  EXPECT_EQ(string_view.n_bytes, strlen("four"));
+  EXPECT_EQ(memcmp(string_view.data, "four", string_view.n_bytes), 0);
+
+  auto buffer_view = ArrowArrayViewGetBytesUnsafe(&array_view, 3);
+  EXPECT_EQ(buffer_view.n_bytes, strlen("four"));
+  EXPECT_EQ(memcmp(buffer_view.data.as_char, "four", buffer_view.n_bytes), 0);
+
+  ArrowArrayViewReset(&array_view);
+  array.release(&array);
+  schema.release(&schema);
+}
+
+TEST(ArrayViewTest, ArrayViewTestGetString) {
+  TestGetFromBinary(utf8());
+  TestGetFromBinary(binary());
+  TestGetFromBinary(large_utf8());
+  TestGetFromBinary(large_binary());
+  TestGetFromBinary(fixed_size_binary(4));
+}
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 38b0efc..73836e8 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -689,6 +689,42 @@ ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
 /// \brief Reset the contents of an ArrowArrayView and frees resources
 void ArrowArrayViewReset(struct ArrowArrayView* array_view);
 
+/// \brief Check for a null element in an ArrowArrayView
+static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an integer
+///
+/// This function does not check for null values, that values are actually integers, or
+/// that values are within a valid range for an int64.
+static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view,
+                                                 int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an unsigned integer
+///
+/// This function does not check for null values, that values are actually integers, or
+/// that values are within a valid range for a uint64.
+static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view,
+                                                   int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as a double
+///
+/// This function does not check for null values, or
+/// that values are within a valid range for a double.
+static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view,
+                                                   int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an ArrowStringView
+///
+/// This function does not check for null values.
+static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
+    struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an ArrowBufferView
+///
+/// This function does not check for null values.
+static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
+    struct ArrowArrayView* array_view, int64_t i);
+
 /// }@
 
 // Inline function definitions
diff --git a/src/nanoarrow/typedefs_inline.h b/src/nanoarrow/typedefs_inline.h
index 47e2892..b61b0e5 100644
--- a/src/nanoarrow/typedefs_inline.h
+++ b/src/nanoarrow/typedefs_inline.h
@@ -221,6 +221,9 @@ struct ArrowBufferView {
     const uint32_t* as_uint32;
     const int64_t* as_int64;
     const uint64_t* as_uint64;
+    const double* as_double;
+    const float* as_float;
+    const char* as_char;
   } data;
 
   /// \brief The size of the buffer in bytes