You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by sh...@apache.org on 2018/12/07 01:25:50 UTC
[arrow] branch master updated: ARROW-3946: [GLib] Add support for
union
This is an automated email from the ASF dual-hosted git repository.
shiro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 494ef75 ARROW-3946: [GLib] Add support for union
494ef75 is described below
commit 494ef75f4989064210a699b0e3c715a7d67d07fa
Author: Kouhei Sutou <ko...@clear-code.com>
AuthorDate: Fri Dec 7 10:25:34 2018 +0900
ARROW-3946: [GLib] Add support for union
Author: Kouhei Sutou <ko...@clear-code.com>
Closes #3112 from kou/glib-union-array and squashes the following commits:
b1d8870f <Kouhei Sutou> Fix a typo
c45e7fc0 <Kouhei Sutou> Add support for union
---
c_glib/arrow-glib/basic-array.cpp | 15 +-
c_glib/arrow-glib/basic-data-type.cpp | 11 +
c_glib/arrow-glib/composite-array.cpp | 171 ++++++++++++++-
c_glib/arrow-glib/composite-array.h | 52 ++++-
c_glib/arrow-glib/composite-data-type.cpp | 238 ++++++++++++++++++++-
c_glib/arrow-glib/composite-data-type.h | 60 ++++++
c_glib/test/test-dense-union-array.rb | 50 +++++
.../test/test-dense-union-data-type.rb | 56 ++---
c_glib/test/test-sparse-union-array.rb | 49 +++++
.../test/test-sparse-union-data-type.rb | 56 ++---
ruby/red-arrow/lib/arrow/array.rb | 6 +-
11 files changed, 661 insertions(+), 103 deletions(-)
diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp
index 77f64fc..47f9a95 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -209,7 +209,9 @@ enum {
PROP_ARRAY
};
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowArray, garrow_array, G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowArray,
+ garrow_array,
+ G_TYPE_OBJECT)
#define GARROW_ARRAY_GET_PRIVATE(obj) \
static_cast<GArrowArrayPrivate *>( \
@@ -2255,6 +2257,17 @@ garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array)
case arrow::Type::type::STRUCT:
type = GARROW_TYPE_STRUCT_ARRAY;
break;
+ case arrow::Type::type::UNION:
+ {
+ auto arrow_union_array =
+ std::static_pointer_cast<arrow::UnionArray>(*arrow_array);
+ if (arrow_union_array->mode() == arrow::UnionMode::SPARSE) {
+ type = GARROW_TYPE_SPARSE_UNION_ARRAY;
+ } else {
+ type = GARROW_TYPE_DENSE_UNION_ARRAY;
+ }
+ }
+ break;
case arrow::Type::type::DICTIONARY:
type = GARROW_TYPE_DICTIONARY_ARRAY;
break;
diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp
index 24133c9..86b86cf 100644
--- a/c_glib/arrow-glib/basic-data-type.cpp
+++ b/c_glib/arrow-glib/basic-data-type.cpp
@@ -1184,6 +1184,17 @@ garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type)
case arrow::Type::type::STRUCT:
type = GARROW_TYPE_STRUCT_DATA_TYPE;
break;
+ case arrow::Type::type::UNION:
+ {
+ auto arrow_union_data_type =
+ std::static_pointer_cast<arrow::UnionType>(*arrow_data_type);
+ if (arrow_union_data_type->mode() == arrow::UnionMode::SPARSE) {
+ type = GARROW_TYPE_SPARSE_UNION_DATA_TYPE;
+ } else {
+ type = GARROW_TYPE_DENSE_UNION_DATA_TYPE;
+ }
+ }
+ break;
case arrow::Type::type::DICTIONARY:
type = GARROW_TYPE_DICTIONARY_DATA_TYPE;
break;
diff --git a/c_glib/arrow-glib/composite-array.cpp b/c_glib/arrow-glib/composite-array.cpp
index b040ac7..bff1858 100644
--- a/c_glib/arrow-glib/composite-array.cpp
+++ b/c_glib/arrow-glib/composite-array.cpp
@@ -41,10 +41,18 @@ G_BEGIN_DECLS
* use #GArrowListArrayBuilder to create a new array.
*
* #GArrowStructArray is a class for struct array. It can store zero
- * or more structs. One struct has zero or more fields. If you don't
+ * or more structs. One struct has one or more fields. If you don't
* have Arrow format data, you need to use #GArrowStructArrayBuilder
* to create a new array.
*
+ * #GArrowUnionArray is a base class for union array. It can store
+ * zero or more unions. One union has one or more fields but one union
+ * can store only one field value.
+ *
+ * #GArrowDenseUnionArray is a class for dense union array.
+ *
+ * #GArrowSparseUnionArray is a class for sparse union array.
+ *
* #GArrowDictionaryArray is a class for dictionary array. It can
* store data with dictionary and indices. It's space effective than
* normal array when the array has many same values. You can convert a
@@ -159,7 +167,7 @@ garrow_struct_array_class_init(GArrowStructArrayClass *klass)
* garrow_struct_array_new:
* @data_type: The data type of the struct.
* @length: The number of elements.
- * @children: (element-type GArrowArray): The arrays for each field
+ * @fields: (element-type GArrowArray): The arrays for each field
* as #GList of #GArrowArray.
* @null_bitmap: (nullable): The bitmap that shows null elements. The
* N-th element is null when the N-th bit is 0, not null otherwise.
@@ -175,21 +183,21 @@ garrow_struct_array_class_init(GArrowStructArrayClass *klass)
GArrowStructArray *
garrow_struct_array_new(GArrowDataType *data_type,
gint64 length,
- GList *children,
+ GList *fields,
GArrowBuffer *null_bitmap,
gint64 n_nulls)
{
const auto arrow_data_type = garrow_data_type_get_raw(data_type);
- std::vector<std::shared_ptr<arrow::Array>> arrow_children;
- for (GList *node = children; node; node = node->next) {
- GArrowArray *child = GARROW_ARRAY(node->data);
- arrow_children.push_back(garrow_array_get_raw(child));
+ std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
+ for (auto node = fields; node; node = node->next) {
+ auto child = GARROW_ARRAY(node->data);
+ arrow_fields.push_back(garrow_array_get_raw(child));
}
const auto arrow_bitmap = garrow_buffer_get_raw(null_bitmap);
auto arrow_struct_array =
std::make_shared<arrow::StructArray>(arrow_data_type,
length,
- arrow_children,
+ arrow_fields,
arrow_bitmap,
n_nulls);
auto arrow_array =
@@ -264,6 +272,153 @@ garrow_struct_array_flatten(GArrowStructArray *array, GError **error)
}
+G_DEFINE_TYPE(GArrowUnionArray,
+ garrow_union_array,
+ GARROW_TYPE_ARRAY)
+
+static void
+garrow_union_array_init(GArrowUnionArray *object)
+{
+}
+
+static void
+garrow_union_array_class_init(GArrowUnionArrayClass *klass)
+{
+}
+
+/**
+ * garrow_union_array_get_field
+ * @array: A #GArrowUnionArray.
+ * @i: The index of the field in the union.
+ *
+ * Returns: (nullable) (transfer full): The i-th field values as a
+ * #GArrowArray or %NULL on out of range.
+ */
+GArrowArray *
+garrow_union_array_get_field(GArrowUnionArray *array,
+ gint i)
+{
+ auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
+ auto arrow_union_array =
+ std::static_pointer_cast<arrow::UnionArray>(arrow_array);
+ auto n_fields = arrow_array->num_fields();
+ if (i < 0) {
+ i += n_fields;
+ }
+ if (i < 0) {
+ return NULL;
+ }
+ if (i >= n_fields) {
+ return NULL;
+ }
+ auto arrow_field_array = arrow_union_array->child(i);
+ return garrow_array_new_raw(&arrow_field_array);
+}
+
+
+G_DEFINE_TYPE(GArrowSparseUnionArray,
+ garrow_sparse_union_array,
+ GARROW_TYPE_UNION_ARRAY)
+
+static void
+garrow_sparse_union_array_init(GArrowSparseUnionArray *object)
+{
+}
+
+static void
+garrow_sparse_union_array_class_init(GArrowSparseUnionArrayClass *klass)
+{
+}
+
+/**
+ * garrow_sparse_union_array_new:
+ * @type_ids: The field type IDs for each value as #GArrowInt8Array.
+ * @fields: (element-type GArrowArray): The arrays for each field
+ * as #GList of #GArrowArray.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowSparseUnionArray
+ * or %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GArrowSparseUnionArray *
+garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
+ GList *fields,
+ GError **error)
+{
+ auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids));
+ std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
+ for (auto node = fields; node; node = node->next) {
+ auto *field = GARROW_ARRAY(node->data);
+ arrow_fields.push_back(garrow_array_get_raw(field));
+ }
+ std::shared_ptr<arrow::Array> arrow_union_array;
+ auto status = arrow::UnionArray::MakeSparse(*arrow_type_ids,
+ arrow_fields,
+ &arrow_union_array);
+ if (garrow_error_check(error, status, "[sparse-union-array][new]")) {
+ return GARROW_SPARSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array));
+ } else {
+ return NULL;
+ }
+}
+
+
+G_DEFINE_TYPE(GArrowDenseUnionArray,
+ garrow_dense_union_array,
+ GARROW_TYPE_UNION_ARRAY)
+
+static void
+garrow_dense_union_array_init(GArrowDenseUnionArray *object)
+{
+}
+
+static void
+garrow_dense_union_array_class_init(GArrowDenseUnionArrayClass *klass)
+{
+}
+
+/**
+ * garrow_dense_union_array_new:
+ * @type_ids: The field type IDs for each value as #GArrowInt8Array.
+ * @value_offsets: The value offsets for each value as #GArrowInt32Array.
+ * Each offset is counted for each type.
+ * @fields: (element-type GArrowArray): The arrays for each field
+ * as #GList of #GArrowArray.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowDenseUnionArray
+ * or %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GArrowDenseUnionArray *
+garrow_dense_union_array_new(GArrowInt8Array *type_ids,
+ GArrowInt32Array *value_offsets,
+ GList *fields,
+ GError **error)
+{
+ auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids));
+ auto arrow_value_offsets = garrow_array_get_raw(GARROW_ARRAY(value_offsets));
+ std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
+ for (auto node = fields; node; node = node->next) {
+ auto *field = GARROW_ARRAY(node->data);
+ arrow_fields.push_back(garrow_array_get_raw(field));
+ }
+ std::shared_ptr<arrow::Array> arrow_union_array;
+ auto status = arrow::UnionArray::MakeDense(*arrow_type_ids,
+ *arrow_value_offsets,
+ arrow_fields,
+ &arrow_union_array);
+ if (garrow_error_check(error, status, "[dense-union-array][new]")) {
+ return GARROW_DENSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array));
+ } else {
+ return NULL;
+ }
+}
+
+
G_DEFINE_TYPE(GArrowDictionaryArray,
garrow_dictionary_array,
GARROW_TYPE_ARRAY)
diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h
index ad6ad53..c634dbf 100644
--- a/c_glib/arrow-glib/composite-array.h
+++ b/c_glib/arrow-glib/composite-array.h
@@ -123,7 +123,7 @@ GType garrow_struct_array_get_type(void) G_GNUC_CONST;
GArrowStructArray *garrow_struct_array_new(GArrowDataType *data_type,
gint64 length,
- GList *children,
+ GList *fields,
GArrowBuffer *null_bitmap,
gint64 n_nulls);
@@ -137,6 +137,56 @@ GARROW_AVAILABLE_IN_0_10
GList *garrow_struct_array_flatten(GArrowStructArray *array, GError **error);
+#define GARROW_TYPE_UNION_ARRAY (garrow_union_array_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUnionArray,
+ garrow_union_array,
+ GARROW,
+ UNION_ARRAY,
+ GArrowArray)
+struct _GArrowUnionArrayClass
+{
+ GArrowArrayClass parent_class;
+};
+
+GArrowArray *
+garrow_union_array_get_field(GArrowUnionArray *array,
+ gint i);
+
+#define GARROW_TYPE_SPARSE_UNION_ARRAY (garrow_sparse_union_array_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArray,
+ garrow_sparse_union_array,
+ GARROW,
+ SPARSE_UNION_ARRAY,
+ GArrowUnionArray)
+struct _GArrowSparseUnionArrayClass
+{
+ GArrowUnionArrayClass parent_class;
+};
+
+GArrowSparseUnionArray *
+garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
+ GList *fields,
+ GError **error);
+
+
+#define GARROW_TYPE_DENSE_UNION_ARRAY (garrow_dense_union_array_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionArray,
+ garrow_dense_union_array,
+ GARROW,
+ DENSE_UNION_ARRAY,
+ GArrowUnionArray)
+struct _GArrowDenseUnionArrayClass
+{
+ GArrowUnionArrayClass parent_class;
+};
+
+GArrowDenseUnionArray *
+garrow_dense_union_array_new(GArrowInt8Array *type_ids,
+ GArrowInt32Array *value_offsets,
+ GList *fields,
+ GError **error);
+
+
#define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type())
G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryArray,
garrow_dictionary_array,
diff --git a/c_glib/arrow-glib/composite-data-type.cpp b/c_glib/arrow-glib/composite-data-type.cpp
index 2645bea..a4d3d84 100644
--- a/c_glib/arrow-glib/composite-data-type.cpp
+++ b/c_glib/arrow-glib/composite-data-type.cpp
@@ -40,6 +40,12 @@ G_BEGIN_DECLS
*
* #GArrowStructDataType is a class for struct data type.
*
+ * #GArrowUnionDataType is a base class for union data types.
+ *
+ * #GArrowSparseUnionDataType is a class for sparse union data type.
+ *
+ * #GArrowDenseUnionDataType is a class for dense union data type.
+ *
* #GArrowDictionaryDataType is a class for dictionary data type.
*/
@@ -122,18 +128,17 @@ GArrowStructDataType *
garrow_struct_data_type_new(GList *fields)
{
std::vector<std::shared_ptr<arrow::Field>> arrow_fields;
- for (GList *node = fields; node; node = g_list_next(node)) {
+ for (auto *node = fields; node; node = g_list_next(node)) {
auto field = GARROW_FIELD(node->data);
auto arrow_field = garrow_field_get_raw(field);
arrow_fields.push_back(arrow_field);
}
auto arrow_data_type = std::make_shared<arrow::StructType>(arrow_fields);
- GArrowStructDataType *data_type =
- GARROW_STRUCT_DATA_TYPE(g_object_new(GARROW_TYPE_STRUCT_DATA_TYPE,
- "data-type", &arrow_data_type,
- NULL));
- return data_type;
+ auto data_type = g_object_new(GARROW_TYPE_STRUCT_DATA_TYPE,
+ "data-type", &arrow_data_type,
+ NULL);
+ return GARROW_STRUCT_DATA_TYPE(data_type);
}
/**
@@ -189,9 +194,12 @@ garrow_struct_data_type_get_field(GArrowStructDataType *data_type,
{
auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
- while (i < 0) {
+ if (i < 0) {
i += arrow_data_type->num_children();
}
+ if (i < 0) {
+ return NULL;
+ }
if (i >= arrow_data_type->num_children()) {
return NULL;
}
@@ -252,6 +260,222 @@ garrow_struct_data_type_get_field_index(GArrowStructDataType *data_type,
}
+G_DEFINE_ABSTRACT_TYPE(GArrowUnionDataType,
+ garrow_union_data_type,
+ GARROW_TYPE_DATA_TYPE)
+
+static void
+garrow_union_data_type_init(GArrowUnionDataType *object)
+{
+}
+
+static void
+garrow_union_data_type_class_init(GArrowUnionDataTypeClass *klass)
+{
+}
+
+/**
+ * garrow_union_data_type_get_n_fields:
+ * @data_type: A #GArrowUnionDataType.
+ *
+ * Returns: The number of fields of the union data type.
+ *
+ * Since: 0.12.0
+ */
+gint
+garrow_union_data_type_get_n_fields(GArrowUnionDataType *data_type)
+{
+ auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+ return arrow_data_type->num_children();
+}
+
+/**
+ * garrow_union_data_type_get_fields:
+ * @data_type: A #GArrowUnionDataType.
+ *
+ * Returns: (transfer full) (element-type GArrowField):
+ * The fields of the union data type.
+ *
+ * Since: 0.12.0
+ */
+GList *
+garrow_union_data_type_get_fields(GArrowUnionDataType *data_type)
+{
+ auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+ auto arrow_fields = arrow_data_type->children();
+
+ GList *fields = NULL;
+ for (auto arrow_field : arrow_fields) {
+ fields = g_list_prepend(fields, garrow_field_new_raw(&arrow_field));
+ }
+ return g_list_reverse(fields);
+}
+
+/**
+ * garrow_union_data_type_get_field:
+ * @data_type: A #GArrowUnionDataType.
+ * @i: The index of the target field.
+ *
+ * Returns: (transfer full) (nullable):
+ * The field at the index in the union data type or %NULL on not found.
+ *
+ * Since: 0.12.0
+ */
+GArrowField *
+garrow_union_data_type_get_field(GArrowUnionDataType *data_type,
+ gint i)
+{
+ auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+
+ if (i < 0) {
+ i += arrow_data_type->num_children();
+ }
+ if (i < 0) {
+ return NULL;
+ }
+ if (i >= arrow_data_type->num_children()) {
+ return NULL;
+ }
+
+ auto arrow_field = arrow_data_type->child(i);
+ if (arrow_field) {
+ return garrow_field_new_raw(&arrow_field);
+ } else {
+ return NULL;
+ }
+}
+
+/**
+ * garrow_union_data_type_get_type_codes:
+ * @data_type: A #GArrowUnionDataType.
+ * @n_type_codes: (out): The number of type codes.
+ *
+ * Returns: (transfer full) (array length=n_type_codes):
+ * The codes for each field.
+ *
+ * It should be freed with g_free() when no longer needed.
+ *
+ * Since: 0.12.0
+ */
+guint8 *
+garrow_union_data_type_get_type_codes(GArrowUnionDataType *data_type,
+ gsize *n_type_codes)
+{
+ auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+ auto arrow_union_data_type =
+ std::static_pointer_cast<arrow::UnionType>(arrow_data_type);
+
+ const auto arrow_type_codes = arrow_union_data_type->type_codes();
+ const auto n = arrow_type_codes.size();
+ auto type_codes = static_cast<guint8 *>(g_new(guint8, n));
+ for (size_t i = 0; i < n; ++i) {
+ type_codes[i] = arrow_type_codes[i];
+ }
+ *n_type_codes = n;
+ return type_codes;
+}
+
+
+G_DEFINE_TYPE(GArrowSparseUnionDataType,
+ garrow_sparse_union_data_type,
+ GARROW_TYPE_UNION_DATA_TYPE)
+
+static void
+garrow_sparse_union_data_type_init(GArrowSparseUnionDataType *object)
+{
+}
+
+static void
+garrow_sparse_union_data_type_class_init(GArrowSparseUnionDataTypeClass *klass)
+{
+}
+
+/**
+ * garrow_sparse_union_data_type_new:
+ * @fields: (element-type GArrowField): The fields of the union.
+ * @type_codes: (array length=n_type_codes): The codes to specify each field.
+ * @n_type_codes: The number of type codes.
+ *
+ * Returns: The newly created sparse union data type.
+ */
+GArrowSparseUnionDataType *
+garrow_sparse_union_data_type_new(GList *fields,
+ guint8 *type_codes,
+ gsize n_type_codes)
+{
+ std::vector<std::shared_ptr<arrow::Field>> arrow_fields;
+ for (auto node = fields; node; node = g_list_next(node)) {
+ auto field = GARROW_FIELD(node->data);
+ auto arrow_field = garrow_field_get_raw(field);
+ arrow_fields.push_back(arrow_field);
+ }
+
+ std::vector<uint8_t> arrow_type_codes;
+ for (gsize i = 0; i < n_type_codes; ++i) {
+ arrow_type_codes.push_back(type_codes[i]);
+ }
+
+ auto arrow_data_type =
+ std::make_shared<arrow::UnionType>(arrow_fields,
+ arrow_type_codes,
+ arrow::UnionMode::SPARSE);
+ auto data_type = g_object_new(GARROW_TYPE_SPARSE_UNION_DATA_TYPE,
+ "data-type", &arrow_data_type,
+ NULL);
+ return GARROW_SPARSE_UNION_DATA_TYPE(data_type);
+}
+
+
+G_DEFINE_TYPE(GArrowDenseUnionDataType,
+ garrow_dense_union_data_type,
+ GARROW_TYPE_UNION_DATA_TYPE)
+
+static void
+garrow_dense_union_data_type_init(GArrowDenseUnionDataType *object)
+{
+}
+
+static void
+garrow_dense_union_data_type_class_init(GArrowDenseUnionDataTypeClass *klass)
+{
+}
+
+/**
+ * garrow_dense_union_data_type_new:
+ * @fields: (element-type GArrowField): The fields of the union.
+ * @type_codes: (array length=n_type_codes): The codes to specify each field.
+ * @n_type_codes: The number of type codes.
+ *
+ * Returns: The newly created dense union data type.
+ */
+GArrowDenseUnionDataType *
+garrow_dense_union_data_type_new(GList *fields,
+ guint8 *type_codes,
+ gsize n_type_codes)
+{
+ std::vector<std::shared_ptr<arrow::Field>> arrow_fields;
+ for (auto node = fields; node; node = g_list_next(node)) {
+ auto field = GARROW_FIELD(node->data);
+ auto arrow_field = garrow_field_get_raw(field);
+ arrow_fields.push_back(arrow_field);
+ }
+
+ std::vector<uint8_t> arrow_type_codes;
+ for (gsize i = 0; i < n_type_codes; ++i) {
+ arrow_type_codes.push_back(type_codes[i]);
+ }
+
+ auto arrow_data_type =
+ std::make_shared<arrow::UnionType>(arrow_fields,
+ arrow_type_codes,
+ arrow::UnionMode::DENSE);
+ auto data_type = g_object_new(GARROW_TYPE_DENSE_UNION_DATA_TYPE,
+ "data-type", &arrow_data_type,
+ NULL);
+ return GARROW_DENSE_UNION_DATA_TYPE(data_type);
+}
+
+
G_DEFINE_TYPE(GArrowDictionaryDataType,
garrow_dictionary_data_type,
GARROW_TYPE_FIXED_WIDTH_DATA_TYPE)
diff --git a/c_glib/arrow-glib/composite-data-type.h b/c_glib/arrow-glib/composite-data-type.h
index 7d6a02b..25e1ac3 100644
--- a/c_glib/arrow-glib/composite-data-type.h
+++ b/c_glib/arrow-glib/composite-data-type.h
@@ -96,6 +96,66 @@ gint
garrow_struct_data_type_get_field_index(GArrowStructDataType *data_type,
const gchar *name);
+
+#define GARROW_TYPE_UNION_DATA_TYPE (garrow_union_data_type_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUnionDataType,
+ garrow_union_data_type,
+ GARROW,
+ UNION_DATA_TYPE,
+ GArrowDataType)
+struct _GArrowUnionDataTypeClass
+{
+ GArrowDataTypeClass parent_class;
+};
+
+gint
+garrow_union_data_type_get_n_fields(GArrowUnionDataType *data_type);
+GList *
+garrow_union_data_type_get_fields(GArrowUnionDataType *data_type);
+GArrowField *
+garrow_union_data_type_get_field(GArrowUnionDataType *data_type,
+ gint i);
+guint8 *
+garrow_union_data_type_get_type_codes(GArrowUnionDataType *data_type,
+ gsize *n_type_codes);
+
+
+#define GARROW_TYPE_SPARSE_UNION_DATA_TYPE \
+ (garrow_sparse_union_data_type_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionDataType,
+ garrow_sparse_union_data_type,
+ GARROW,
+ SPARSE_UNION_DATA_TYPE,
+ GArrowUnionDataType)
+struct _GArrowSparseUnionDataTypeClass
+{
+ GArrowUnionDataTypeClass parent_class;
+};
+
+GArrowSparseUnionDataType *
+garrow_sparse_union_data_type_new(GList *fields,
+ guint8 *type_codes,
+ gsize n_type_codes);
+
+
+#define GARROW_TYPE_DENSE_UNION_DATA_TYPE \
+ (garrow_dense_union_data_type_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionDataType,
+ garrow_dense_union_data_type,
+ GARROW,
+ DENSE_UNION_DATA_TYPE,
+ GArrowUnionDataType)
+struct _GArrowDenseUnionDataTypeClass
+{
+ GArrowUnionDataTypeClass parent_class;
+};
+
+GArrowDenseUnionDataType *
+garrow_dense_union_data_type_new(GList *fields,
+ guint8 *type_codes,
+ gsize n_type_codes);
+
+
#define GARROW_TYPE_DICTIONARY_DATA_TYPE (garrow_dictionary_data_type_get_type())
G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryDataType,
garrow_dictionary_data_type,
diff --git a/c_glib/test/test-dense-union-array.rb b/c_glib/test/test-dense-union-array.rb
new file mode 100644
index 0000000..fa73f8d
--- /dev/null
+++ b/c_glib/test/test-dense-union-array.rb
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDenseUnionArray < Test::Unit::TestCase
+ include Helper::Buildable
+
+ def setup
+ type_ids = build_int8_array([0, 1, nil, 1, 1])
+ value_offsets = build_int32_array([0, 0, 0, 1, 2])
+ fields = [
+ build_int16_array([1]),
+ build_string_array(["a", "b", "c"]),
+ ]
+ @array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields)
+ end
+
+ def test_value_data_type
+ fields = [
+ Arrow::Field.new("0", Arrow::Int16DataType.new),
+ Arrow::Field.new("1", Arrow::StringDataType.new),
+ ]
+ assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]),
+ @array.value_data_type)
+ end
+
+ def test_field
+ assert_equal([
+ build_int16_array([1]),
+ build_string_array(["a", "b", "c"]),
+ ],
+ [
+ @array.get_field(0),
+ @array.get_field(1),
+ ])
+ end
+end
diff --git a/ruby/red-arrow/lib/arrow/array.rb b/c_glib/test/test-dense-union-data-type.rb
similarity index 50%
copy from ruby/red-arrow/lib/arrow/array.rb
copy to c_glib/test/test-dense-union-data-type.rb
index 7a0d053..0d12954 100644
--- a/ruby/red-arrow/lib/arrow/array.rb
+++ b/c_glib/test/test-dense-union-data-type.rb
@@ -15,49 +15,21 @@
# specific language governing permissions and limitations
# under the License.
-module Arrow
- class Array
- include Enumerable
-
- class << self
- def new(values)
- builder_class_name = "#{name}Builder"
- if const_defined?(builder_class_name)
- builder_class = const_get(builder_class_name)
- builder_class.build(values)
- else
- super
- end
- end
- end
-
- def [](i)
- i += length if i < 0
- if null?(i)
- nil
- else
- get_value(i)
- end
- end
-
- def each
- return to_enum(__method__) unless block_given?
-
- length.times do |i|
- yield(self[i])
- end
- end
-
- def reverse_each
- return to_enum(__method__) unless block_given?
+class TestDenseUnionDataType < Test::Unit::TestCase
+ def setup
+ fields = [
+ Arrow::Field.new("number", Arrow::Int32DataType.new),
+ Arrow::Field.new("text", Arrow::StringDataType.new),
+ ]
+ @data_type = Arrow::DenseUnionDataType.new(fields, [2, 9])
+ end
- (length - 1).downto(0) do |i|
- yield(self[i])
- end
- end
+ def test_type
+ assert_equal(Arrow::Type::UNION, @data_type.id)
+ end
- def to_arrow
- self
- end
+ def test_to_s
+ assert_equal("union[dense]<number: int32=2, text: string=9>",
+ @data_type.to_s)
end
end
diff --git a/c_glib/test/test-sparse-union-array.rb b/c_glib/test/test-sparse-union-array.rb
new file mode 100644
index 0000000..721f95c
--- /dev/null
+++ b/c_glib/test/test-sparse-union-array.rb
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestSparseUnionArray < Test::Unit::TestCase
+ include Helper::Buildable
+
+ def setup
+ type_ids = build_int8_array([0, 1, nil, 1, 0])
+ fields = [
+ build_int16_array([1, nil, nil, nil, 5]),
+ build_string_array([nil, "b", nil, "d", nil]),
+ ]
+ @array = Arrow::SparseUnionArray.new(type_ids, fields)
+ end
+
+ def test_value_data_type
+ fields = [
+ Arrow::Field.new("0", Arrow::Int16DataType.new),
+ Arrow::Field.new("1", Arrow::StringDataType.new),
+ ]
+ assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]),
+ @array.value_data_type)
+ end
+
+ def test_field
+ assert_equal([
+ build_int16_array([1, nil, nil, nil, 5]),
+ build_string_array([nil, "b", nil, "d", nil]),
+ ],
+ [
+ @array.get_field(0),
+ @array.get_field(1),
+ ])
+ end
+end
diff --git a/ruby/red-arrow/lib/arrow/array.rb b/c_glib/test/test-sparse-union-data-type.rb
similarity index 50%
copy from ruby/red-arrow/lib/arrow/array.rb
copy to c_glib/test/test-sparse-union-data-type.rb
index 7a0d053..ff4ce72 100644
--- a/ruby/red-arrow/lib/arrow/array.rb
+++ b/c_glib/test/test-sparse-union-data-type.rb
@@ -15,49 +15,21 @@
# specific language governing permissions and limitations
# under the License.
-module Arrow
- class Array
- include Enumerable
-
- class << self
- def new(values)
- builder_class_name = "#{name}Builder"
- if const_defined?(builder_class_name)
- builder_class = const_get(builder_class_name)
- builder_class.build(values)
- else
- super
- end
- end
- end
-
- def [](i)
- i += length if i < 0
- if null?(i)
- nil
- else
- get_value(i)
- end
- end
-
- def each
- return to_enum(__method__) unless block_given?
-
- length.times do |i|
- yield(self[i])
- end
- end
-
- def reverse_each
- return to_enum(__method__) unless block_given?
+class TestSparseUnionDataType < Test::Unit::TestCase
+ def setup
+ fields = [
+ Arrow::Field.new("number", Arrow::Int32DataType.new),
+ Arrow::Field.new("text", Arrow::StringDataType.new),
+ ]
+ @data_type = Arrow::SparseUnionDataType.new(fields, [2, 9])
+ end
- (length - 1).downto(0) do |i|
- yield(self[i])
- end
- end
+ def test_type
+ assert_equal(Arrow::Type::UNION, @data_type.id)
+ end
- def to_arrow
- self
- end
+ def test_to_s
+ assert_equal("union[sparse]<number: int32=2, text: string=9>",
+ @data_type.to_s)
end
end
diff --git a/ruby/red-arrow/lib/arrow/array.rb b/ruby/red-arrow/lib/arrow/array.rb
index 7a0d053..0492241 100644
--- a/ruby/red-arrow/lib/arrow/array.rb
+++ b/ruby/red-arrow/lib/arrow/array.rb
@@ -20,11 +20,13 @@ module Arrow
include Enumerable
class << self
- def new(values)
+ def new(*args)
+ return super if args.size != 1
+
builder_class_name = "#{name}Builder"
if const_defined?(builder_class_name)
builder_class = const_get(builder_class_name)
- builder_class.build(values)
+ builder_class.build(*args)
else
super
end