You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2022/11/10 18:45:18 UTC
[arrow-adbc] branch main updated: chore(c): update nanoarrow (#173)
This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 0ef2b58 chore(c): update nanoarrow (#173)
0ef2b58 is described below
commit 0ef2b5896a977eaf070ba5f8b0b8d501b0390d5b
Author: David Li <li...@gmail.com>
AuthorDate: Thu Nov 10 13:45:12 2022 -0500
chore(c): update nanoarrow (#173)
---
c/vendor/nanoarrow/nanoarrow.c | 206 ++++++++++++++++++--
c/vendor/nanoarrow/nanoarrow.h | 417 ++++++++++++++++++++++++++++++-----------
2 files changed, 502 insertions(+), 121 deletions(-)
diff --git a/c/vendor/nanoarrow/nanoarrow.c b/c/vendor/nanoarrow/nanoarrow.c
index 8cec4e0..d73322d 100644
--- a/c/vendor/nanoarrow/nanoarrow.c
+++ b/c/vendor/nanoarrow/nanoarrow.c
@@ -40,7 +40,7 @@ int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) {
if (chars_needed < 0) {
return EINVAL;
- } else if (chars_needed >= sizeof(error->message)) {
+ } else if (((size_t)chars_needed) >= sizeof(error->message)) {
return ERANGE;
} else {
return NANOARROW_OK;
@@ -432,7 +432,7 @@ ArrowErrorCode ArrowSchemaInitDecimal(struct ArrowSchema* schema,
return NANOARROW_OK;
}
-static const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) {
+static const char* ArrowTimeUnitFormatString(enum ArrowTimeUnit time_unit) {
switch (time_unit) {
case NANOARROW_TIME_UNIT_SECOND:
return "s";
@@ -456,7 +456,7 @@ ArrowErrorCode ArrowSchemaInitDateTime(struct ArrowSchema* schema,
return result;
}
- const char* time_unit_str = ArrowTimeUnitString(time_unit);
+ const char* time_unit_str = ArrowTimeUnitFormatString(time_unit);
if (time_unit_str == NULL) {
schema->release(schema);
return EINVAL;
@@ -491,7 +491,7 @@ ArrowErrorCode ArrowSchemaInitDateTime(struct ArrowSchema* schema,
return EINVAL;
}
- if (n_chars >= sizeof(buffer)) {
+ if (((size_t)n_chars) >= sizeof(buffer)) {
schema->release(schema);
return ERANGE;
}
@@ -745,14 +745,14 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
}
parse_start = format + 2;
- schema_view->decimal_precision = strtol(parse_start, &parse_end, 10);
+ schema_view->decimal_precision = (int32_t)strtol(parse_start, &parse_end, 10);
if (parse_end == parse_start || parse_end[0] != ',') {
ArrowErrorSet(error, "Expected 'precision,scale[,bitwidth]' following 'd:'");
return EINVAL;
}
parse_start = parse_end + 1;
- schema_view->decimal_scale = strtol(parse_start, &parse_end, 10);
+ schema_view->decimal_scale = (int32_t)strtol(parse_start, &parse_end, 10);
if (parse_end == parse_start) {
ArrowErrorSet(error, "Expected 'scale[,bitwidth]' following 'd:precision,'");
return EINVAL;
@@ -760,7 +760,7 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
schema_view->decimal_bitwidth = 128;
} else {
parse_start = parse_end + 1;
- schema_view->decimal_bitwidth = strtol(parse_start, &parse_end, 10);
+ schema_view->decimal_bitwidth = (int32_t)strtol(parse_start, &parse_end, 10);
if (parse_start == parse_end) {
ArrowErrorSet(error, "Expected precision following 'd:precision,scale,'");
return EINVAL;
@@ -791,7 +791,7 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
return EINVAL;
}
- schema_view->fixed_size = strtol(format + 2, (char**)format_end_out, 10);
+ schema_view->fixed_size = (int32_t)strtol(format + 2, (char**)format_end_out, 10);
return NANOARROW_OK;
// validity + offset + data
@@ -844,7 +844,8 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
schema_view->storage_data_type = NANOARROW_TYPE_FIXED_SIZE_LIST;
schema_view->data_type = NANOARROW_TYPE_FIXED_SIZE_LIST;
- schema_view->fixed_size = strtol(format + 3, (char**)format_end_out, 10);
+ schema_view->fixed_size =
+ (int32_t)strtol(format + 3, (char**)format_end_out, 10);
return NANOARROW_OK;
case 's':
schema_view->storage_data_type = NANOARROW_TYPE_STRUCT;
@@ -888,6 +889,11 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
format);
return EINVAL;
}
+
+ default:
+ ArrowErrorSet(error, "Expected nested type format string but found '%s'",
+ format);
+ return EINVAL;
}
// date/time types
@@ -1226,7 +1232,7 @@ ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
return EINVAL;
}
- int format_len = strlen(format);
+ size_t format_len = strlen(format);
if (format_len == 0) {
ArrowErrorSet(error, "Error parsing schema->format: Expected a string with size > 0");
return EINVAL;
@@ -1282,6 +1288,151 @@ ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
return NANOARROW_OK;
}
+static int64_t ArrowSchemaTypeToStringInternal(struct ArrowSchemaView* schema_view,
+ char* out, int64_t n) {
+ const char* type_string = ArrowTypeString(schema_view->data_type);
+ switch (schema_view->data_type) {
+ case NANOARROW_TYPE_DECIMAL128:
+ case NANOARROW_TYPE_DECIMAL256:
+ return snprintf(out, n, "%s(%d, %d)", type_string,
+ (int)schema_view->decimal_precision,
+ (int)schema_view->decimal_scale);
+ case NANOARROW_TYPE_TIMESTAMP:
+ return snprintf(out, n, "%s('%s', '%.*s')", type_string,
+ ArrowTimeUnitString(schema_view->time_unit),
+ (int)schema_view->timezone.n_bytes, schema_view->timezone.data);
+ case NANOARROW_TYPE_TIME32:
+ case NANOARROW_TYPE_TIME64:
+ case NANOARROW_TYPE_DURATION:
+ return snprintf(out, n, "%s('%s')", type_string,
+ ArrowTimeUnitString(schema_view->time_unit));
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ return snprintf(out, n, "%s(%ld)", type_string, (long)schema_view->fixed_size);
+ case NANOARROW_TYPE_SPARSE_UNION:
+ case NANOARROW_TYPE_DENSE_UNION:
+ return snprintf(out, n, "%s([%.*s])", type_string,
+ (int)schema_view->union_type_ids.n_bytes,
+ schema_view->union_type_ids.data);
+ default:
+ return snprintf(out, n, "%s", type_string);
+ }
+}
+
+int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n,
+ char recursive) {
+ if (schema == NULL) {
+ return snprintf(out, n, "[invalid: pointer is null]");
+ }
+
+ if (schema->release == NULL) {
+ return snprintf(out, n, "[invalid: schema is released]");
+ }
+
+ struct ArrowSchemaView schema_view;
+ struct ArrowError error;
+
+ if (ArrowSchemaViewInit(&schema_view, schema, &error) != NANOARROW_OK) {
+ return snprintf(out, n, "[invalid: %s]", ArrowErrorMessage(&error));
+ }
+
+ // Extension type and dictionary should include both the top-level type
+ // and the storage type.
+ int is_extension = schema_view.extension_name.n_bytes > 0;
+ int is_dictionary = schema->dictionary != NULL;
+ int64_t n_chars = 0;
+ int64_t n_chars_last = 0;
+
+ // Uncommon but not technically impossible that both are true
+ if (is_extension && is_dictionary) {
+ n_chars_last = snprintf(
+ out + n_chars, n, "%.*s{dictionary(%s)<", (int)schema_view.extension_name.n_bytes,
+ schema_view.extension_name.data, ArrowTypeString(schema_view.storage_data_type));
+ } else if (is_extension) {
+ n_chars_last =
+ snprintf(out + n_chars, n, "%.*s{", (int)schema_view.extension_name.n_bytes,
+ schema_view.extension_name.data);
+ } else if (is_dictionary) {
+ n_chars_last = snprintf(out + n_chars, n, "dictionary(%s)<",
+ ArrowTypeString(schema_view.storage_data_type));
+ }
+
+ n_chars += n_chars_last;
+ n -= n_chars_last;
+ if (n < 0) {
+ n = 0;
+ }
+
+ if (!is_dictionary) {
+ n_chars_last = ArrowSchemaTypeToStringInternal(&schema_view, out + n_chars, n);
+ } else {
+ n_chars_last = ArrowSchemaToString(schema->dictionary, out + n_chars, n, recursive);
+ }
+
+ n_chars += n_chars_last;
+ n -= n_chars_last;
+ if (n < 0) {
+ n = 0;
+ }
+
+ if (recursive && schema->format[0] == '+') {
+ n_chars_last = snprintf(out + n_chars, n, "<");
+ n_chars += n_chars_last;
+ n -= n_chars_last;
+ if (n < 0) {
+ n = 0;
+ }
+
+ for (int64_t i = 0; i < schema->n_children; i++) {
+ if (i > 0) {
+ n_chars_last = snprintf(out + n_chars, n, ", ");
+ n_chars += n_chars_last;
+ n -= n_chars_last;
+ if (n < 0) {
+ n = 0;
+ }
+ }
+
+ // ArrowSchemaToStringInternal() will validate the child and print the error,
+ // but we need the name first
+ if (schema->children[i] != NULL && schema->children[i]->release != NULL &&
+ schema->children[i]->name != NULL) {
+ n_chars_last = snprintf(out + n_chars, n, "%s: ", schema->children[i]->name);
+ n_chars += n_chars_last;
+ n -= n_chars_last;
+ if (n < 0) {
+ n = 0;
+ }
+ }
+
+ n_chars_last =
+ ArrowSchemaToString(schema->children[i], out + n_chars, n, recursive);
+ n_chars += n_chars_last;
+ n -= n_chars_last;
+ if (n < 0) {
+ n = 0;
+ }
+ }
+
+ n_chars_last = snprintf(out + n_chars, n, ">");
+ n_chars += n_chars_last;
+ n -= n_chars_last;
+ if (n < 0) {
+ n = 0;
+ }
+ }
+
+ if (is_extension && is_dictionary) {
+ n_chars += snprintf(out + n_chars, n, ">}");
+ } else if (is_extension) {
+ n_chars += snprintf(out + n_chars, n, "}");
+ } else if (is_dictionary) {
+ n_chars += snprintf(out + n_chars, n, ">");
+ }
+
+ return n_chars;
+}
+
ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader,
const char* metadata) {
reader->metadata = metadata;
@@ -1399,15 +1550,15 @@ static ArrowErrorCode ArrowMetadataBuilderAppendInternal(struct ArrowBuffer* buf
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(buffer, 0));
}
- if (buffer->capacity_bytes < sizeof(int32_t)) {
+ if (((size_t)buffer->capacity_bytes) < sizeof(int32_t)) {
return EINVAL;
}
int32_t n_keys;
memcpy(&n_keys, buffer->data, sizeof(int32_t));
- int32_t key_size = key->n_bytes;
- int32_t value_size = value->n_bytes;
+ int32_t key_size = (int32_t)key->n_bytes;
+ int32_t value_size = (int32_t)value->n_bytes;
NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(
buffer, sizeof(int32_t) + key_size + sizeof(int32_t) + value_size));
@@ -1841,6 +1992,32 @@ ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array,
return NANOARROW_OK;
}
+static ArrowErrorCode ArrowArrayFinalizeBuffers(struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ // The only buffer finalizing this currently does is make sure the data
+ // buffer for (Large)String|Binary is never NULL
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_BINARY:
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ case NANOARROW_TYPE_LARGE_STRING:
+ if (ArrowArrayBuffer(array, 2)->data == NULL) {
+ ArrowBufferAppendUInt8(ArrowArrayBuffer(array, 2), 0);
+ }
+ break;
+ default:
+ break;
+ }
+
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array->children[i]));
+ }
+
+ return NANOARROW_OK;
+}
+
static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
@@ -1889,6 +2066,9 @@ static ArrowErrorCode ArrowArrayCheckInternalBufferSizes(
ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
struct ArrowError* error) {
+ // Even if the data buffer is size zero, the value needs to be non-null
+ NANOARROW_RETURN_NOT_OK(ArrowArrayFinalizeBuffers(array));
+
// Make sure the value we get with array->buffers[i] is set to the actual
// pointer (which may have changed from the original due to reallocation)
ArrowArrayFlushInternalPointers(array);
diff --git a/c/vendor/nanoarrow/nanoarrow.h b/c/vendor/nanoarrow/nanoarrow.h
index 8ea267c..b53ac44 100644
--- a/c/vendor/nanoarrow/nanoarrow.h
+++ b/c/vendor/nanoarrow/nanoarrow.h
@@ -20,7 +20,7 @@
// #define NANOARROW_NAMESPACE YourNamespaceHere
-#define NANOARROW_BUILD_ID "gha7325d629245c290bd96fb645a5d38e72bba2f8af"
+#define NANOARROW_BUILD_ID "ghaa66afcc5a9faf48fe7062eb2a025d808ccfac5dd"
#endif
// Licensed to the Apache Software Foundation (ASF) under one
@@ -52,11 +52,20 @@
extern "C" {
#endif
-/// \defgroup nanoarrow-inline-typedef Type definitions used in inlined implementations
-
// Extra guard for versions of Arrow without the canonical guard
#ifndef ARROW_FLAG_DICTIONARY_ORDERED
+/// \defgroup nanoarrow-arrow-cdata Arrow C Data interface
+///
+/// The Arrow C Data (https://arrow.apache.org/docs/format/CDataInterface.html)
+/// and Arrow C Stream (https://arrow.apache.org/docs/format/CStreamInterface.html)
+/// interfaces are part of the
+/// Arrow Columnar Format specification
+/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow documentation for
+/// documentation of these structures.
+///
+/// @{
+
#ifndef ARROW_C_DATA_INTERFACE
#define ARROW_C_DATA_INTERFACE
@@ -141,13 +150,36 @@ struct ArrowArrayStream {
#endif // ARROW_C_STREAM_INTERFACE
#endif // ARROW_FLAG_DICTIONARY_ORDERED
+/// @}
+
+// Utility macros
+#define _NANOARROW_CONCAT(x, y) x##y
+#define _NANOARROW_MAKE_NAME(x, y) _NANOARROW_CONCAT(x, y)
+
+#define _NANOARROW_RETURN_NOT_OK_IMPL(NAME, EXPR) \
+ do { \
+ const int NAME = (EXPR); \
+ if (NAME) return NAME; \
+ } while (0)
+
+#define _NANOARROW_CHECK_RANGE(x_, min_, max_) \
+ NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL)
+
/// \brief Return code for success.
+/// \ingroup nanoarrow-errors
#define NANOARROW_OK 0
/// \brief Represents an errno-compatible error code
+/// \ingroup nanoarrow-errors
typedef int ArrowErrorCode;
+/// \brief Check the result of an expression and return it if not NANOARROW_OK
+/// \ingroup nanoarrow-errors
+#define NANOARROW_RETURN_NOT_OK(EXPR) \
+ _NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR)
+
/// \brief Arrow type enumerator
+/// \ingroup nanoarrow-utils
///
/// These names are intended to map to the corresponding arrow::Type::type
/// enumerator; however, the numeric values are specifically not equal
@@ -194,7 +226,126 @@ enum ArrowType {
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
};
+/// \brief Get a string value of an enum ArrowType value
+/// \ingroup nanoarrow-utils
+///
+/// Returns NULL for invalid values for type
+static inline const char* ArrowTypeString(enum ArrowType type) {
+ switch (type) {
+ case NANOARROW_TYPE_NA:
+ return "na";
+ case NANOARROW_TYPE_BOOL:
+ return "bool";
+ case NANOARROW_TYPE_UINT8:
+ return "uint8";
+ case NANOARROW_TYPE_INT8:
+ return "int8";
+ case NANOARROW_TYPE_UINT16:
+ return "uint16";
+ case NANOARROW_TYPE_INT16:
+ return "int16";
+ case NANOARROW_TYPE_UINT32:
+ return "uint32";
+ case NANOARROW_TYPE_INT32:
+ return "int32";
+ case NANOARROW_TYPE_UINT64:
+ return "uint64";
+ case NANOARROW_TYPE_INT64:
+ return "int64";
+ case NANOARROW_TYPE_HALF_FLOAT:
+ return "half_float";
+ case NANOARROW_TYPE_FLOAT:
+ return "float";
+ case NANOARROW_TYPE_DOUBLE:
+ return "double";
+ case NANOARROW_TYPE_STRING:
+ return "string";
+ case NANOARROW_TYPE_BINARY:
+ return "binary";
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ return "fixed_size_binary";
+ case NANOARROW_TYPE_DATE32:
+ return "date32";
+ case NANOARROW_TYPE_DATE64:
+ return "date64";
+ case NANOARROW_TYPE_TIMESTAMP:
+ return "timestamp";
+ case NANOARROW_TYPE_TIME32:
+ return "time32";
+ case NANOARROW_TYPE_TIME64:
+ return "time64";
+ case NANOARROW_TYPE_INTERVAL_MONTHS:
+ return "interval_months";
+ case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+ return "interval_day_time";
+ case NANOARROW_TYPE_DECIMAL128:
+ return "decimal128";
+ case NANOARROW_TYPE_DECIMAL256:
+ return "decimal256";
+ case NANOARROW_TYPE_LIST:
+ return "list";
+ case NANOARROW_TYPE_STRUCT:
+ return "struct";
+ case NANOARROW_TYPE_SPARSE_UNION:
+ return "sparse_union";
+ case NANOARROW_TYPE_DENSE_UNION:
+ return "dense_union";
+ case NANOARROW_TYPE_DICTIONARY:
+ return "dictionary";
+ case NANOARROW_TYPE_MAP:
+ return "map";
+ case NANOARROW_TYPE_EXTENSION:
+ return "extension";
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ return "fixed_size_list";
+ case NANOARROW_TYPE_DURATION:
+ return "duration";
+ case NANOARROW_TYPE_LARGE_STRING:
+ return "large_string";
+ case NANOARROW_TYPE_LARGE_BINARY:
+ return "large_binary";
+ case NANOARROW_TYPE_LARGE_LIST:
+ return "large_list";
+ case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+ return "interval_month_day_nano";
+ default:
+ return NULL;
+ }
+}
+
+/// \brief Arrow time unit enumerator
+/// \ingroup nanoarrow-utils
+///
+/// These names and values map to the corresponding arrow::TimeUnit::type
+/// enumerator.
+enum ArrowTimeUnit {
+ NANOARROW_TIME_UNIT_SECOND = 0,
+ NANOARROW_TIME_UNIT_MILLI = 1,
+ NANOARROW_TIME_UNIT_MICRO = 2,
+ NANOARROW_TIME_UNIT_NANO = 3
+};
+
+/// \brief Get a string value of an enum ArrowTimeUnit value
+/// \ingroup nanoarrow-utils
+///
+/// Returns NULL for invalid values for time_unit
+static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) {
+ switch (time_unit) {
+ case NANOARROW_TIME_UNIT_SECOND:
+ return "s";
+ case NANOARROW_TIME_UNIT_MILLI:
+ return "ms";
+ case NANOARROW_TIME_UNIT_MICRO:
+ return "us";
+ case NANOARROW_TIME_UNIT_NANO:
+ return "ns";
+ default:
+ return NULL;
+ }
+}
+
/// \brief Functional types of buffers as described in the Arrow Columnar Specification
+/// \ingroup nanoarrow-array-view
enum ArrowBufferType {
NANOARROW_BUFFER_TYPE_NONE,
NANOARROW_BUFFER_TYPE_VALIDITY,
@@ -204,39 +355,8 @@ enum ArrowBufferType {
NANOARROW_BUFFER_TYPE_DATA
};
-#define _NANOARROW_CONCAT(x, y) x##y
-#define _NANOARROW_MAKE_NAME(x, y) _NANOARROW_CONCAT(x, y)
-
-#define _NANOARROW_RETURN_NOT_OK_IMPL(NAME, EXPR) \
- do { \
- const int NAME = (EXPR); \
- if (NAME) return NAME; \
- } while (0)
-
-#define NANOARROW_RETURN_NOT_OK(EXPR) \
- _NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR)
-
-#define _NANOARROW_CHECK_RANGE(x_, min_, max_) \
- NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL)
-
-/// \brief A description of an arrangement of buffers
-///
-/// Contains the minimum amount of information required to
-/// calculate the size of each buffer in an ArrowArray knowing only
-/// the length and offset of the array.
-struct ArrowLayout {
- /// \brief The function of each buffer
- enum ArrowBufferType buffer_type[3];
-
- /// \brief The size of an element each buffer or 0 if this size is variable or unknown
- int64_t element_size_bits[3];
-
- /// \brief The number of elements in the child array per element in this array for a
- /// fixed-size list
- int64_t child_size_elements;
-};
-
/// \brief An non-owning view of a string
+/// \ingroup nanoarrow-utils
struct ArrowStringView {
/// \brief A pointer to the start of the string
///
@@ -249,6 +369,8 @@ struct ArrowStringView {
int64_t n_bytes;
};
+/// \brief Return a view of a const C string
+/// \ingroup nanoarrow-utils
static inline struct ArrowStringView ArrowCharView(const char* value) {
struct ArrowStringView out;
@@ -263,6 +385,7 @@ static inline struct ArrowStringView ArrowCharView(const char* value) {
}
/// \brief An non-owning view of a buffer
+/// \ingroup nanoarrow-utils
struct ArrowBufferView {
/// \brief A pointer to the start of the buffer
///
@@ -287,6 +410,7 @@ struct ArrowBufferView {
};
/// \brief Array buffer allocation and deallocation
+/// \ingroup nanoarrow-buffer
///
/// Container for allocate, reallocate, and free methods that can be used
/// to customize allocation and deallocation of buffers when constructing
@@ -304,6 +428,7 @@ struct ArrowBufferAllocator {
};
/// \brief An owning mutable view of a buffer
+/// \ingroup nanoarrow-buffer
struct ArrowBuffer {
/// \brief A pointer to the start of the buffer
///
@@ -321,6 +446,7 @@ struct ArrowBuffer {
};
/// \brief An owning mutable view of a bitmap
+/// \ingroup nanoarrow-bitmap
struct ArrowBitmap {
/// \brief An ArrowBuffer to hold the allocated memory
struct ArrowBuffer buffer;
@@ -329,6 +455,57 @@ struct ArrowBitmap {
int64_t size_bits;
};
+/// \brief A description of an arrangement of buffers
+/// \ingroup nanoarrow-utils
+///
+/// Contains the minimum amount of information required to
+/// calculate the size of each buffer in an ArrowArray knowing only
+/// the length and offset of the array.
+struct ArrowLayout {
+ /// \brief The function of each buffer
+ enum ArrowBufferType buffer_type[3];
+
+ /// \brief The size of an element each buffer or 0 if this size is variable or unknown
+ int64_t element_size_bits[3];
+
+ /// \brief The number of elements in the child array per element in this array for a
+ /// fixed-size list
+ int64_t child_size_elements;
+};
+
+/// \brief A non-owning view of an ArrowArray
+/// \ingroup nanoarrow-array-view
+///
+/// This data structure provides access to the values contained within
+/// an ArrowArray with fields provided in a more readily-extractible
+/// form. You can re-use an ArrowArrayView for multiple ArrowArrays
+/// with the same storage type, or use it to represent a hypothetical
+/// ArrowArray that does not exist yet.
+struct ArrowArrayView {
+ /// \brief The underlying ArrowArray or NULL if it has not been set
+ struct ArrowArray* array;
+
+ /// \brief The type used to store values in this array
+ ///
+ /// This type represents only the minimum required information to
+ /// extract values from the array buffers (e.g., for a Date32 array,
+ /// this value will be NANOARROW_TYPE_INT32). For dictionary-encoded
+ /// arrays, this will be the index type.
+ enum ArrowType storage_type;
+
+ /// \brief The buffer types, strides, and sizes of this Array's buffers
+ struct ArrowLayout layout;
+
+ /// \brief This Array's buffers as ArrowBufferView objects
+ struct ArrowBufferView buffer_views[3];
+
+ /// \brief The number of children of this view
+ int64_t n_children;
+
+ /// \brief Pointers to views of this array's children
+ struct ArrowArrayView** children;
+};
+
// Used as the private data member for ArrowArrays allocated here and accessed
// internally within inline ArrowArray* helpers.
struct ArrowArrayPrivateData {
@@ -351,17 +528,6 @@ struct ArrowArrayPrivateData {
struct ArrowLayout layout;
};
-struct ArrowArrayView {
- struct ArrowArray* array;
- enum ArrowType storage_type;
- struct ArrowLayout layout;
- struct ArrowBufferView buffer_views[3];
- int64_t n_children;
- struct ArrowArrayView** children;
-};
-
-/// }@
-
#ifdef __cplusplus
}
#endif
@@ -447,6 +613,7 @@ struct ArrowArrayView {
#define ArrowMetadataBuilderRemove \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderRemove)
#define ArrowSchemaViewInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaViewInit)
+#define ArrowSchemaToString NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaToString)
#define ArrowArrayInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInit)
#define ArrowArrayInitFromSchema \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromSchema)
@@ -477,11 +644,7 @@ struct ArrowArrayView {
extern "C" {
#endif
-/// \file Arrow C Implementation
-///
-/// EXPERIMENTAL. Interface subject to change.
-
-/// \page object-model Object Model
+/// \defgroup nanoarrow Nanoarrow C library
///
/// Except where noted, objects are not thread-safe and clients should
/// take care to serialize accesses to methods.
@@ -494,8 +657,10 @@ extern "C" {
///
/// Non-buffer members of a struct ArrowSchema and struct ArrowArray
/// must be allocated using ArrowMalloc() or ArrowRealloc() and freed
-/// using ArrowFree for schemas and arrays allocated here. Buffer members
+/// using ArrowFree() for schemas and arrays allocated here. Buffer members
/// are allocated using an ArrowBufferAllocator.
+///
+/// @{
/// \brief Allocate like malloc()
void* ArrowMalloc(int64_t size);
@@ -523,17 +688,21 @@ struct ArrowBufferAllocator ArrowBufferDeallocator(
int64_t size),
void* private_data);
-/// }@
+/// @}
-/// \defgroup nanoarrow-errors Error handling primitives
+/// \defgroup nanoarrow-errors Error handling
+///
/// Functions generally return an errno-compatible error code; functions that
/// need to communicate more verbose error information accept a pointer
/// to an ArrowError. This can be stack or statically allocated. The
/// content of the message is undefined unless an error code has been
/// returned.
+///
+/// @{
/// \brief Error type containing a UTF-8 encoded message.
struct ArrowError {
+ /// \brief A character buffer with space for an error message.
char message[1024];
};
@@ -543,9 +712,11 @@ ArrowErrorCode ArrowErrorSet(struct ArrowError* error, const char* fmt, ...);
/// \brief Get the contents of an error
const char* ArrowErrorMessage(struct ArrowError* error);
-/// }@
+/// @}
/// \defgroup nanoarrow-utils Utility data structures
+///
+/// @{
/// \brief Return the build id against which the library was compiled
const char* ArrowNanoarrowBuildId();
@@ -556,21 +727,13 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type);
/// \brief Create a string view from a null-terminated string
static inline struct ArrowStringView ArrowCharView(const char* value);
-/// \brief Arrow time unit enumerator
-///
-/// These names and values map to the corresponding arrow::TimeUnit::type
-/// enumerator.
-enum ArrowTimeUnit {
- NANOARROW_TIME_UNIT_SECOND = 0,
- NANOARROW_TIME_UNIT_MILLI = 1,
- NANOARROW_TIME_UNIT_MICRO = 2,
- NANOARROW_TIME_UNIT_NANO = 3
-};
-
-/// }@
+/// @}
-/// \defgroup nanoarrow-schema Schema producer helpers
+/// \defgroup nanoarrow-schema Creating schemas
+///
/// These functions allocate, copy, and destroy ArrowSchema structures
+///
+/// @{
/// \brief Initialize the fields of a schema
///
@@ -579,6 +742,15 @@ enum ArrowTimeUnit {
/// NANOARROW_OK is returned.
ArrowErrorCode ArrowSchemaInit(struct ArrowSchema* schema, enum ArrowType type);
+/// \brief Get a human-readable summary of a Schema
+///
+/// Writes a summary of an ArrowSchema to out (up to n - 1 characters)
+/// and returns the number of characters required for the output if
+/// n were sufficiently large. If recursive is non-zero, the result will
+/// also include children.
+int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n,
+ char recursive);
+
/// \brief Initialize the fields of a fixed-size schema
///
/// Returns EINVAL for fixed_size <= 0 or for data_type that is not
@@ -613,40 +785,54 @@ ArrowErrorCode ArrowSchemaDeepCopy(struct ArrowSchema* schema,
/// \brief Copy format into schema->format
///
-/// schema must have been allocated using ArrowSchemaInit or
-/// ArrowSchemaDeepCopy.
+/// schema must have been allocated using ArrowSchemaInit() or
+/// ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format);
/// \brief Copy name into schema->name
///
-/// schema must have been allocated using ArrowSchemaInit or
-/// ArrowSchemaDeepCopy.
+/// schema must have been allocated using ArrowSchemaInit() or
+/// ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name);
/// \brief Copy metadata into schema->metadata
///
-/// schema must have been allocated using ArrowSchemaInit or
+/// schema must have been allocated using ArrowSchemaInit() or
/// ArrowSchemaDeepCopy.
ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata);
/// \brief Allocate the schema->children array
///
/// Includes the memory for each child struct ArrowSchema.
-/// schema must have been allocated using ArrowSchemaInit or
-/// ArrowSchemaDeepCopy.
+/// schema must have been allocated using ArrowSchemaInit() or
+/// ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema,
int64_t n_children);
/// \brief Allocate the schema->dictionary member
///
-/// schema must have been allocated using ArrowSchemaInit or
-/// ArrowSchemaDeepCopy.
+/// schema must have been allocated using ArrowSchemaInit() or
+/// ArrowSchemaDeepCopy().
ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema);
+/// @}
+
+/// \defgroup nanoarrow-metadata Create, read, and modify schema metadata
+///
+/// @{
+
/// \brief Reader for key/value pairs in schema metadata
+///
+/// The ArrowMetadataReader does not own any data and is only valid
+/// for the lifetime of the underlying metadata pointer.
struct ArrowMetadataReader {
+ /// \brief A metadata string from a schema->metadata field.
const char* metadata;
+
+ /// \brief The current offset into the metadata string
int64_t offset;
+
+ /// \brief The number of remaining keys
int32_t remaining_keys;
};
@@ -695,9 +881,11 @@ ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer,
ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer,
struct ArrowStringView key);
-/// }@
+/// @}
-/// \defgroup nanoarrow-schema-view Schema consumer helpers
+/// \defgroup nanoarrow-schema-view Reading schemas
+///
+/// @{
/// \brief A non-owning view of a parsed ArrowSchema
///
@@ -792,9 +980,11 @@ struct ArrowSchemaView {
ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
struct ArrowSchema* schema, struct ArrowError* error);
-/// }@
+/// @}
/// \defgroup nanoarrow-buffer Owning, growable buffers
+///
+/// @{
/// \brief Initialize an ArrowBuffer
///
@@ -900,9 +1090,11 @@ static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer,
static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer,
float value);
-/// }@
+/// @}
/// \defgroup nanoarrow-bitmap Bitmap utilities
+///
+/// @{
/// \brief Extract a boolean value from a bitmap
static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i);
@@ -970,10 +1162,13 @@ static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap,
/// Releases any memory held by buffer, empties the cache, and resets the size to zero
static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap);
-/// }@
+/// @}
-/// \defgroup nanoarrow-array Array producer helpers
+/// \defgroup nanoarrow-array Creating arrays
+///
/// These functions allocate, copy, and destroy ArrowArray structures
+///
+/// @{
/// \brief Initialize the fields of an array
///
@@ -994,37 +1189,37 @@ ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array,
///
/// Includes the memory for each child struct ArrowArray,
/// whose members are marked as released and may be subsequently initialized
-/// with ArrowArrayInit or moved from an existing ArrowArray.
-/// schema must have been allocated using ArrowArrayInit.
+/// with ArrowArrayInit() or moved from an existing ArrowArray.
+/// schema must have been allocated using ArrowArrayInit().
ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children);
/// \brief Allocate the array->dictionary member
///
/// Includes the memory for the struct ArrowArray, whose contents
/// is marked as released and may be subsequently initialized
-/// with ArrowArrayInit or moved from an existing ArrowArray.
-/// array must have been allocated using ArrowArrayInit
+/// with ArrowArrayInit() or moved from an existing ArrowArray.
+/// array must have been allocated using ArrowArrayInit()
ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array);
/// \brief Set the validity bitmap of an ArrowArray
///
-/// array must have been allocated using ArrowArrayInit
+/// array must have been allocated using ArrowArrayInit()
void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap);
/// \brief Set a buffer of an ArrowArray
///
-/// array must have been allocated using ArrowArrayInit
+/// array must have been allocated using ArrowArrayInit()
ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i,
struct ArrowBuffer* buffer);
/// \brief Get the validity bitmap of an ArrowArray
///
-/// array must have been allocated using ArrowArrayInit
+/// array must have been allocated using ArrowArrayInit()
static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array);
/// \brief Get a buffer of an ArrowArray
///
-/// array must have been allocated using ArrowArrayInit
+/// array must have been allocated using ArrowArrayInit()
static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i);
/// \brief Start element-wise appending to an ArrowArray
@@ -1032,7 +1227,7 @@ static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int
/// Initializes any values needed to use ArrowArrayAppend*() functions.
/// All element-wise appenders append by value and return EINVAL if the exact value
/// cannot be represented by the underlying storage type.
-/// array must have been allocated using ArrowArrayInit
+/// array must have been allocated using ArrowArrayInit()
static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array);
/// \brief Reserve space for future appends
@@ -1108,14 +1303,17 @@ static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array);
/// Flushes any pointers from internal buffers that may have been reallocated
/// into the array->buffers array and checks the actual size of the buffers
/// against the expected size based on the final length.
-/// array must have been allocated using ArrowArrayInit
+/// array must have been allocated using ArrowArrayInit()
ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
struct ArrowError* error);
-/// }@
+/// @}
-/// \defgroup nanoarrow-array Array consumer helpers
+/// \defgroup nanoarrow-array-view Reading arrays
+///
/// These functions read and validate the contents ArrowArray structures
+///
+/// @{
/// \brief Initialize the contents of an ArrowArrayView
void ArrowArrayViewInit(struct ArrowArrayView* array_view, enum ArrowType storage_type);
@@ -1177,7 +1375,7 @@ static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
struct ArrowArrayView* array_view, int64_t i);
-/// }@
+/// @}
// Inline function definitions
@@ -1481,17 +1679,20 @@ static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t start_offset
const int64_t bytes_begin = i_begin / 8;
const int64_t bytes_end = i_end / 8 + 1;
- const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
- const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8];
-
if (bytes_end == bytes_begin + 1) {
// count bits within a single byte
+ const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_end % 8];
+ const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_begin % 8];
+
const uint8_t only_byte_mask =
- i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
+ i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask & last_byte_mask);
+
const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask;
return _ArrowkBytePopcount[byte_masked];
}
+ const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8];
+ const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8];
int64_t count = 0;
// first byte
@@ -1871,15 +2072,15 @@ static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array,
break;
case NANOARROW_TYPE_INT32:
_NANOARROW_CHECK_RANGE(value, INT32_MIN, INT32_MAX);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value));
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, (int32_t)value));
break;
case NANOARROW_TYPE_INT16:
_NANOARROW_CHECK_RANGE(value, INT16_MIN, INT16_MAX);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt16(data_buffer, value));
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt16(data_buffer, (int16_t)value));
break;
case NANOARROW_TYPE_INT8:
_NANOARROW_CHECK_RANGE(value, INT8_MIN, INT8_MAX);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(data_buffer, value));
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(data_buffer, (int8_t)value));
break;
case NANOARROW_TYPE_UINT64:
case NANOARROW_TYPE_UINT32:
@@ -1921,15 +2122,15 @@ static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array,
break;
case NANOARROW_TYPE_UINT32:
_NANOARROW_CHECK_RANGE(value, 0, UINT32_MAX);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt32(data_buffer, value));
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt32(data_buffer, (uint32_t)value));
break;
case NANOARROW_TYPE_UINT16:
_NANOARROW_CHECK_RANGE(value, 0, UINT16_MAX);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt16(data_buffer, value));
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt16(data_buffer, (uint16_t)value));
break;
case NANOARROW_TYPE_UINT8:
_NANOARROW_CHECK_RANGE(value, 0, UINT8_MAX);
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(data_buffer, value));
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(data_buffer, (uint8_t)value));
break;
case NANOARROW_TYPE_INT64:
case NANOARROW_TYPE_INT32:
@@ -2072,7 +2273,7 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) {
return EINVAL;
}
NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), child_length));
+ ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), (int32_t)child_length));
break;
case NANOARROW_TYPE_LARGE_LIST:
child_length = array->children[0]->length;
@@ -2115,7 +2316,7 @@ static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int
case NANOARROW_TYPE_DENSE_UNION:
case NANOARROW_TYPE_SPARSE_UNION:
// Not supported yet
- return 0xff;
+ return -1;
default:
return validity_buffer != NULL && !ArrowBitGet(validity_buffer, i);
}