You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pa...@apache.org on 2022/08/22 16:49:27 UTC
[arrow-nanoarrow] branch main updated: Implement getters (#26)
This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 6c44081 Implement getters (#26)
6c44081 is described below
commit 6c440819101eabae4bb4c1e522aefcc48e18dac6
Author: Dewey Dunnington <de...@fishandwhistle.net>
AuthorDate: Mon Aug 22 13:49:22 2022 -0300
Implement getters (#26)
* sketch getter API
* re-sketch getter API
* test getters
* document getters
---
CMakeLists.txt | 2 +-
src/nanoarrow/array_inline.h | 173 +++++++++++++++++++++++++++++++++++++++
src/nanoarrow/array_view_test.cc | 104 +++++++++++++++++++++++
src/nanoarrow/nanoarrow.h | 36 ++++++++
src/nanoarrow/typedefs_inline.h | 3 +
5 files changed, 317 insertions(+), 1 deletion(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 12fe19a..d1c447f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -79,7 +79,7 @@ if (NANOARROW_BUILD_TESTS)
target_link_libraries(buffer_test nanoarrow GTest::gtest_main)
target_link_libraries(bitmap_test nanoarrow GTest::gtest_main)
target_link_libraries(array_test nanoarrow GTest::gtest_main arrow_shared arrow_testing_shared)
- target_link_libraries(array_view_test nanoarrow GTest::gtest_main)
+ target_link_libraries(array_view_test nanoarrow GTest::gtest_main arrow_shared arrow_testing_shared)
target_link_libraries(error_test nanoarrow GTest::gtest_main)
target_link_libraries(metadata_test nanoarrow GTest::gtest_main arrow_shared arrow_testing_shared)
target_link_libraries(schema_test nanoarrow GTest::gtest_main arrow_shared arrow_testing_shared)
diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index 14335a8..ce4ceac 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -445,6 +445,179 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) {
return NANOARROW_OK;
}
+static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i) {
+ const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8;
+ i += array_view->array->offset;
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_NA:
+ return 0x01;
+ case NANOARROW_TYPE_DENSE_UNION:
+ case NANOARROW_TYPE_SPARSE_UNION:
+ // Not supported yet
+ return 0xff;
+ default:
+ return validity_buffer != NULL && !ArrowBitGet(validity_buffer, i);
+ }
+}
+
+static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view,
+ int64_t i) {
+ struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+ i += array_view->array->offset;
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_INT64:
+ return data_view->data.as_int64[i];
+ case NANOARROW_TYPE_UINT64:
+ return data_view->data.as_uint64[i];
+ case NANOARROW_TYPE_INT32:
+ return data_view->data.as_int32[i];
+ case NANOARROW_TYPE_UINT32:
+ return data_view->data.as_uint32[i];
+ case NANOARROW_TYPE_INT16:
+ return data_view->data.as_int16[i];
+ case NANOARROW_TYPE_UINT16:
+ return data_view->data.as_uint16[i];
+ case NANOARROW_TYPE_INT8:
+ return data_view->data.as_int8[i];
+ case NANOARROW_TYPE_UINT8:
+ return data_view->data.as_uint8[i];
+ case NANOARROW_TYPE_DOUBLE:
+ return data_view->data.as_double[i];
+ case NANOARROW_TYPE_FLOAT:
+ return data_view->data.as_float[i];
+ case NANOARROW_TYPE_BOOL:
+ return ArrowBitGet(data_view->data.as_uint8, i);
+ default:
+ return INT64_MAX;
+ }
+}
+
+static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view,
+ int64_t i) {
+ i += array_view->array->offset;
+ struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_INT64:
+ return data_view->data.as_int64[i];
+ case NANOARROW_TYPE_UINT64:
+ return data_view->data.as_uint64[i];
+ case NANOARROW_TYPE_INT32:
+ return data_view->data.as_int32[i];
+ case NANOARROW_TYPE_UINT32:
+ return data_view->data.as_uint32[i];
+ case NANOARROW_TYPE_INT16:
+ return data_view->data.as_int16[i];
+ case NANOARROW_TYPE_UINT16:
+ return data_view->data.as_uint16[i];
+ case NANOARROW_TYPE_INT8:
+ return data_view->data.as_int8[i];
+ case NANOARROW_TYPE_UINT8:
+ return data_view->data.as_uint8[i];
+ case NANOARROW_TYPE_DOUBLE:
+ return data_view->data.as_double[i];
+ case NANOARROW_TYPE_FLOAT:
+ return data_view->data.as_float[i];
+ case NANOARROW_TYPE_BOOL:
+ return ArrowBitGet(data_view->data.as_uint8, i);
+ default:
+ return UINT64_MAX;
+ }
+}
+
+static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view,
+ int64_t i) {
+ i += array_view->array->offset;
+ struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_INT64:
+ return data_view->data.as_int64[i];
+ case NANOARROW_TYPE_UINT64:
+ return data_view->data.as_uint64[i];
+ case NANOARROW_TYPE_INT32:
+ return data_view->data.as_int32[i];
+ case NANOARROW_TYPE_UINT32:
+ return data_view->data.as_uint32[i];
+ case NANOARROW_TYPE_INT16:
+ return data_view->data.as_int16[i];
+ case NANOARROW_TYPE_UINT16:
+ return data_view->data.as_uint16[i];
+ case NANOARROW_TYPE_INT8:
+ return data_view->data.as_int8[i];
+ case NANOARROW_TYPE_UINT8:
+ return data_view->data.as_uint8[i];
+ case NANOARROW_TYPE_DOUBLE:
+ return data_view->data.as_double[i];
+ case NANOARROW_TYPE_FLOAT:
+ return data_view->data.as_float[i];
+ case NANOARROW_TYPE_BOOL:
+ return ArrowBitGet(data_view->data.as_uint8, i);
+ default:
+ return DBL_MAX;
+ }
+}
+
+static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
+ struct ArrowArrayView* array_view, int64_t i) {
+ i += array_view->array->offset;
+ struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
+ const char* data_view = array_view->buffer_views[2].data.as_char;
+
+ struct ArrowStringView view;
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ view.data = data_view + offsets_view->data.as_int32[i];
+ view.n_bytes = offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i];
+ break;
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ view.data = data_view + offsets_view->data.as_int64[i];
+ view.n_bytes = offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i];
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ view.n_bytes = array_view->layout.element_size_bits[1] / 8;
+ view.data = array_view->buffer_views[1].data.as_char + (i * view.n_bytes);
+ break;
+ default:
+ view.data = NULL;
+ view.n_bytes = 0;
+ break;
+ }
+
+ return view;
+}
+
+static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
+ struct ArrowArrayView* array_view, int64_t i) {
+ i += array_view->array->offset;
+ struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
+ const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8;
+
+ struct ArrowBufferView view;
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ view.n_bytes = offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i];
+ view.data.as_uint8 = data_view + offsets_view->data.as_int32[i];
+ break;
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ view.n_bytes = offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i];
+ view.data.as_uint8 = data_view + offsets_view->data.as_int64[i];
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ view.n_bytes = array_view->layout.element_size_bits[1] / 8;
+ view.data.as_uint8 = array_view->buffer_views[1].data.as_uint8 + (i * view.n_bytes);
+ break;
+ default:
+ view.data.data = NULL;
+ view.n_bytes = 0;
+ break;
+ }
+
+ return view;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/src/nanoarrow/array_view_test.cc b/src/nanoarrow/array_view_test.cc
index f54eed5..cb6cdac 100644
--- a/src/nanoarrow/array_view_test.cc
+++ b/src/nanoarrow/array_view_test.cc
@@ -17,8 +17,14 @@
#include <gtest/gtest.h>
+#include <arrow/array.h>
+#include <arrow/c/bridge.h>
+#include <arrow/testing/gtest_util.h>
+
#include "nanoarrow/nanoarrow.h"
+using namespace arrow;
+
TEST(ArrayTest, ArrayViewTestBasic) {
struct ArrowArrayView array_view;
struct ArrowError error;
@@ -354,3 +360,101 @@ TEST(ArrayTest, ArrayViewTestFixedSizeListArray) {
schema.release(&schema);
array.release(&array);
}
+
+void TestGetFromNumericArrayView(const std::shared_ptr<DataType>& data_type) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowArrayView array_view;
+ struct ArrowError error;
+
+ // Array with nulls
+ auto arrow_array = ArrayFromJSON(data_type, "[1, null, null, 4]");
+ ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
+ ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 2), 1);
+ EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 3), 0);
+
+ EXPECT_EQ(ArrowArrayViewGetIntUnsafe(&array_view, 3), 4);
+ EXPECT_EQ(ArrowArrayViewGetUIntUnsafe(&array_view, 3), 4);
+ EXPECT_EQ(ArrowArrayViewGetDoubleUnsafe(&array_view, 3), 4.0);
+
+ auto string_view = ArrowArrayViewGetStringUnsafe(&array_view, 0);
+ EXPECT_EQ(string_view.data, nullptr);
+ EXPECT_EQ(string_view.n_bytes, 0);
+ auto buffer_view = ArrowArrayViewGetBytesUnsafe(&array_view, 0);
+ EXPECT_EQ(buffer_view.data.data, nullptr);
+ EXPECT_EQ(buffer_view.n_bytes, 0);
+
+ ArrowArrayViewReset(&array_view);
+ array.release(&array);
+ schema.release(&schema);
+
+ // Array without nulls (Arrow does not allocate the validity buffer)
+ arrow_array = ArrayFromJSON(data_type, "[1, 2]");
+ ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
+ ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+
+ // We're trying to test behavior with no validity buffer, so make sure that's true
+ ASSERT_EQ(array_view.buffer_views[0].data.data, nullptr);
+
+ EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 0), 0);
+ EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 1), 0);
+
+ EXPECT_EQ(ArrowArrayViewGetIntUnsafe(&array_view, 0), 1);
+ EXPECT_EQ(ArrowArrayViewGetUIntUnsafe(&array_view, 1), 2);
+
+ ArrowArrayViewReset(&array_view);
+ array.release(&array);
+ schema.release(&schema);
+}
+
+TEST(ArrayViewTest, ArrayViewTestGetNumeric) {
+ TestGetFromNumericArrayView(int64());
+ TestGetFromNumericArrayView(uint64());
+ TestGetFromNumericArrayView(int32());
+ TestGetFromNumericArrayView(uint32());
+ TestGetFromNumericArrayView(int16());
+ TestGetFromNumericArrayView(uint16());
+ TestGetFromNumericArrayView(int8());
+ TestGetFromNumericArrayView(uint8());
+ TestGetFromNumericArrayView(float64());
+ TestGetFromNumericArrayView(float32());
+}
+
+void TestGetFromBinary(const std::shared_ptr<DataType>& data_type) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowArrayView array_view;
+ struct ArrowError error;
+
+ auto arrow_array = ArrayFromJSON(data_type, "[\"1234\", null, null, \"four\"]");
+ ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
+ ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 2), 1);
+ EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 3), 0);
+
+ auto string_view = ArrowArrayViewGetStringUnsafe(&array_view, 3);
+ EXPECT_EQ(string_view.n_bytes, strlen("four"));
+ EXPECT_EQ(memcmp(string_view.data, "four", string_view.n_bytes), 0);
+
+ auto buffer_view = ArrowArrayViewGetBytesUnsafe(&array_view, 3);
+ EXPECT_EQ(buffer_view.n_bytes, strlen("four"));
+ EXPECT_EQ(memcmp(buffer_view.data.as_char, "four", buffer_view.n_bytes), 0);
+
+ ArrowArrayViewReset(&array_view);
+ array.release(&array);
+ schema.release(&schema);
+}
+
+TEST(ArrayViewTest, ArrayViewTestGetString) {
+ TestGetFromBinary(utf8());
+ TestGetFromBinary(binary());
+ TestGetFromBinary(large_utf8());
+ TestGetFromBinary(large_binary());
+ TestGetFromBinary(fixed_size_binary(4));
+}
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 38b0efc..73836e8 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -689,6 +689,42 @@ ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
/// \brief Reset the contents of an ArrowArrayView and frees resources
void ArrowArrayViewReset(struct ArrowArrayView* array_view);
+/// \brief Check for a null element in an ArrowArrayView
+static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an integer
+///
+/// This function does not check for null values, that values are actually integers, or
+/// that values are within a valid range for an int64.
+static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view,
+ int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an unsigned integer
+///
+/// This function does not check for null values, that values are actually integers, or
+/// that values are within a valid range for a uint64.
+static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view,
+ int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as a double
+///
+/// This function does not check for null values, or
+/// that values are within a valid range for a double.
+static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view,
+ int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an ArrowStringView
+///
+/// This function does not check for null values.
+static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
+ struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get an element in an ArrowArrayView as an ArrowBufferView
+///
+/// This function does not check for null values.
+static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
+ struct ArrowArrayView* array_view, int64_t i);
+
/// }@
// Inline function definitions
diff --git a/src/nanoarrow/typedefs_inline.h b/src/nanoarrow/typedefs_inline.h
index 47e2892..b61b0e5 100644
--- a/src/nanoarrow/typedefs_inline.h
+++ b/src/nanoarrow/typedefs_inline.h
@@ -221,6 +221,9 @@ struct ArrowBufferView {
const uint32_t* as_uint32;
const int64_t* as_int64;
const uint64_t* as_uint64;
+ const double* as_double;
+ const float* as_float;
+ const char* as_char;
} data;
/// \brief The size of the buffer in bytes