You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by sh...@apache.org on 2018/12/07 01:25:50 UTC

[arrow] branch master updated: ARROW-3946: [GLib] Add support for union

This is an automated email from the ASF dual-hosted git repository.

shiro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 494ef75  ARROW-3946: [GLib] Add support for union
494ef75 is described below

commit 494ef75f4989064210a699b0e3c715a7d67d07fa
Author: Kouhei Sutou <ko...@clear-code.com>
AuthorDate: Fri Dec 7 10:25:34 2018 +0900

    ARROW-3946: [GLib] Add support for union
    
    Author: Kouhei Sutou <ko...@clear-code.com>
    
    Closes #3112 from kou/glib-union-array and squashes the following commits:
    
    b1d8870f <Kouhei Sutou> Fix a typo
    c45e7fc0 <Kouhei Sutou>  Add support for union
---
 c_glib/arrow-glib/basic-array.cpp                  |  15 +-
 c_glib/arrow-glib/basic-data-type.cpp              |  11 +
 c_glib/arrow-glib/composite-array.cpp              | 171 ++++++++++++++-
 c_glib/arrow-glib/composite-array.h                |  52 ++++-
 c_glib/arrow-glib/composite-data-type.cpp          | 238 ++++++++++++++++++++-
 c_glib/arrow-glib/composite-data-type.h            |  60 ++++++
 c_glib/test/test-dense-union-array.rb              |  50 +++++
 .../test/test-dense-union-data-type.rb             |  56 ++---
 c_glib/test/test-sparse-union-array.rb             |  49 +++++
 .../test/test-sparse-union-data-type.rb            |  56 ++---
 ruby/red-arrow/lib/arrow/array.rb                  |   6 +-
 11 files changed, 661 insertions(+), 103 deletions(-)

diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp
index 77f64fc..47f9a95 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -209,7 +209,9 @@ enum {
   PROP_ARRAY
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowArray, garrow_array, G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowArray,
+                                    garrow_array,
+                                    G_TYPE_OBJECT)
 
 #define GARROW_ARRAY_GET_PRIVATE(obj)         \
   static_cast<GArrowArrayPrivate *>(          \
@@ -2255,6 +2257,17 @@ garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array)
   case arrow::Type::type::STRUCT:
     type = GARROW_TYPE_STRUCT_ARRAY;
     break;
+  case arrow::Type::type::UNION:
+    {
+      auto arrow_union_array =
+        std::static_pointer_cast<arrow::UnionArray>(*arrow_array);
+      if (arrow_union_array->mode() == arrow::UnionMode::SPARSE) {
+        type = GARROW_TYPE_SPARSE_UNION_ARRAY;
+      } else {
+        type = GARROW_TYPE_DENSE_UNION_ARRAY;
+      }
+    }
+    break;
   case arrow::Type::type::DICTIONARY:
     type = GARROW_TYPE_DICTIONARY_ARRAY;
     break;
diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp
index 24133c9..86b86cf 100644
--- a/c_glib/arrow-glib/basic-data-type.cpp
+++ b/c_glib/arrow-glib/basic-data-type.cpp
@@ -1184,6 +1184,17 @@ garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type)
   case arrow::Type::type::STRUCT:
     type = GARROW_TYPE_STRUCT_DATA_TYPE;
     break;
+  case arrow::Type::type::UNION:
+    {
+      auto arrow_union_data_type =
+        std::static_pointer_cast<arrow::UnionType>(*arrow_data_type);
+      if (arrow_union_data_type->mode() == arrow::UnionMode::SPARSE) {
+        type = GARROW_TYPE_SPARSE_UNION_DATA_TYPE;
+      } else {
+        type = GARROW_TYPE_DENSE_UNION_DATA_TYPE;
+      }
+    }
+    break;
   case arrow::Type::type::DICTIONARY:
     type = GARROW_TYPE_DICTIONARY_DATA_TYPE;
     break;
diff --git a/c_glib/arrow-glib/composite-array.cpp b/c_glib/arrow-glib/composite-array.cpp
index b040ac7..bff1858 100644
--- a/c_glib/arrow-glib/composite-array.cpp
+++ b/c_glib/arrow-glib/composite-array.cpp
@@ -41,10 +41,18 @@ G_BEGIN_DECLS
  * use #GArrowListArrayBuilder to create a new array.
  *
  * #GArrowStructArray is a class for struct array. It can store zero
- * or more structs. One struct has zero or more fields. If you don't
+ * or more structs. One struct has one or more fields. If you don't
  * have Arrow format data, you need to use #GArrowStructArrayBuilder
  * to create a new array.
  *
+ * #GArrowUnionArray is a base class for union array. It can store
+ * zero or more unions. One union has one or more fields but one union
+ * can store only one field value.
+ *
+ * #GArrowDenseUnionArray is a class for dense union array.
+ *
+ * #GArrowSparseUnionArray is a class for sparse union array.
+ *
  * #GArrowDictionaryArray is a class for dictionary array. It can
  * store data with dictionary and indices. It's space effective than
  * normal array when the array has many same values. You can convert a
@@ -159,7 +167,7 @@ garrow_struct_array_class_init(GArrowStructArrayClass *klass)
  * garrow_struct_array_new:
  * @data_type: The data type of the struct.
  * @length: The number of elements.
- * @children: (element-type GArrowArray): The arrays for each field
+ * @fields: (element-type GArrowArray): The arrays for each field
  *   as #GList of #GArrowArray.
  * @null_bitmap: (nullable): The bitmap that shows null elements. The
  *   N-th element is null when the N-th bit is 0, not null otherwise.
@@ -175,21 +183,21 @@ garrow_struct_array_class_init(GArrowStructArrayClass *klass)
 GArrowStructArray *
 garrow_struct_array_new(GArrowDataType *data_type,
                         gint64 length,
-                        GList *children,
+                        GList *fields,
                         GArrowBuffer *null_bitmap,
                         gint64 n_nulls)
 {
   const auto arrow_data_type = garrow_data_type_get_raw(data_type);
-  std::vector<std::shared_ptr<arrow::Array>> arrow_children;
-  for (GList *node = children; node; node = node->next) {
-    GArrowArray *child = GARROW_ARRAY(node->data);
-    arrow_children.push_back(garrow_array_get_raw(child));
+  std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
+  for (auto node = fields; node; node = node->next) {
+    auto child = GARROW_ARRAY(node->data);
+    arrow_fields.push_back(garrow_array_get_raw(child));
   }
   const auto arrow_bitmap = garrow_buffer_get_raw(null_bitmap);
   auto arrow_struct_array =
     std::make_shared<arrow::StructArray>(arrow_data_type,
                                          length,
-                                         arrow_children,
+                                         arrow_fields,
                                          arrow_bitmap,
                                          n_nulls);
   auto arrow_array =
@@ -264,6 +272,153 @@ garrow_struct_array_flatten(GArrowStructArray *array, GError **error)
 }
 
 
+G_DEFINE_TYPE(GArrowUnionArray,
+              garrow_union_array,
+              GARROW_TYPE_ARRAY)
+
+static void
+garrow_union_array_init(GArrowUnionArray *object)
+{
+}
+
+static void
+garrow_union_array_class_init(GArrowUnionArrayClass *klass)
+{
+}
+
+/**
+ * garrow_union_array_get_field
+ * @array: A #GArrowUnionArray.
+ * @i: The index of the field in the union.
+ *
+ * Returns: (nullable) (transfer full): The i-th field values as a
+ *   #GArrowArray or %NULL on out of range.
+ */
+GArrowArray *
+garrow_union_array_get_field(GArrowUnionArray *array,
+                             gint i)
+{
+  auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
+  auto arrow_union_array =
+    std::static_pointer_cast<arrow::UnionArray>(arrow_array);
+  auto n_fields = arrow_array->num_fields();
+  if (i < 0) {
+    i += n_fields;
+  }
+  if (i < 0) {
+    return NULL;
+  }
+  if (i >= n_fields) {
+    return NULL;
+  }
+  auto arrow_field_array = arrow_union_array->child(i);
+  return garrow_array_new_raw(&arrow_field_array);
+}
+
+
+G_DEFINE_TYPE(GArrowSparseUnionArray,
+              garrow_sparse_union_array,
+              GARROW_TYPE_UNION_ARRAY)
+
+static void
+garrow_sparse_union_array_init(GArrowSparseUnionArray *object)
+{
+}
+
+static void
+garrow_sparse_union_array_class_init(GArrowSparseUnionArrayClass *klass)
+{
+}
+
+/**
+ * garrow_sparse_union_array_new:
+ * @type_ids: The field type IDs for each value as #GArrowInt8Array.
+ * @fields: (element-type GArrowArray): The arrays for each field
+ *   as #GList of #GArrowArray.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowSparseUnionArray
+ *   or %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GArrowSparseUnionArray *
+garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
+                              GList *fields,
+                              GError **error)
+{
+  auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids));
+  std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
+  for (auto node = fields; node; node = node->next) {
+    auto *field = GARROW_ARRAY(node->data);
+    arrow_fields.push_back(garrow_array_get_raw(field));
+  }
+  std::shared_ptr<arrow::Array> arrow_union_array;
+  auto status = arrow::UnionArray::MakeSparse(*arrow_type_ids,
+                                              arrow_fields,
+                                              &arrow_union_array);
+  if (garrow_error_check(error, status, "[sparse-union-array][new]")) {
+    return GARROW_SPARSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array));
+  } else {
+    return NULL;
+  }
+}
+
+
+G_DEFINE_TYPE(GArrowDenseUnionArray,
+              garrow_dense_union_array,
+              GARROW_TYPE_UNION_ARRAY)
+
+static void
+garrow_dense_union_array_init(GArrowDenseUnionArray *object)
+{
+}
+
+static void
+garrow_dense_union_array_class_init(GArrowDenseUnionArrayClass *klass)
+{
+}
+
+/**
+ * garrow_dense_union_array_new:
+ * @type_ids: The field type IDs for each value as #GArrowInt8Array.
+ * @value_offsets: The value offsets for each value as #GArrowInt32Array.
+ *   Each offset is counted for each type.
+ * @fields: (element-type GArrowArray): The arrays for each field
+ *   as #GList of #GArrowArray.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowDenseUnionArray
+ *   or %NULL on error.
+ *
+ * Since: 0.12.0
+ */
+GArrowDenseUnionArray *
+garrow_dense_union_array_new(GArrowInt8Array *type_ids,
+                             GArrowInt32Array *value_offsets,
+                             GList *fields,
+                             GError **error)
+{
+  auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids));
+  auto arrow_value_offsets = garrow_array_get_raw(GARROW_ARRAY(value_offsets));
+  std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
+  for (auto node = fields; node; node = node->next) {
+    auto *field = GARROW_ARRAY(node->data);
+    arrow_fields.push_back(garrow_array_get_raw(field));
+  }
+  std::shared_ptr<arrow::Array> arrow_union_array;
+  auto status = arrow::UnionArray::MakeDense(*arrow_type_ids,
+                                             *arrow_value_offsets,
+                                             arrow_fields,
+                                             &arrow_union_array);
+  if (garrow_error_check(error, status, "[dense-union-array][new]")) {
+    return GARROW_DENSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array));
+  } else {
+    return NULL;
+  }
+}
+
+
 G_DEFINE_TYPE(GArrowDictionaryArray,
               garrow_dictionary_array,
               GARROW_TYPE_ARRAY)
diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h
index ad6ad53..c634dbf 100644
--- a/c_glib/arrow-glib/composite-array.h
+++ b/c_glib/arrow-glib/composite-array.h
@@ -123,7 +123,7 @@ GType garrow_struct_array_get_type(void) G_GNUC_CONST;
 
 GArrowStructArray *garrow_struct_array_new(GArrowDataType *data_type,
                                            gint64 length,
-                                           GList *children,
+                                           GList *fields,
                                            GArrowBuffer *null_bitmap,
                                            gint64 n_nulls);
 
@@ -137,6 +137,56 @@ GARROW_AVAILABLE_IN_0_10
 GList *garrow_struct_array_flatten(GArrowStructArray *array, GError **error);
 
 
+#define GARROW_TYPE_UNION_ARRAY (garrow_union_array_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUnionArray,
+                         garrow_union_array,
+                         GARROW,
+                         UNION_ARRAY,
+                         GArrowArray)
+struct _GArrowUnionArrayClass
+{
+  GArrowArrayClass parent_class;
+};
+
+GArrowArray *
+garrow_union_array_get_field(GArrowUnionArray *array,
+                             gint i);
+
+#define GARROW_TYPE_SPARSE_UNION_ARRAY (garrow_sparse_union_array_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArray,
+                         garrow_sparse_union_array,
+                         GARROW,
+                         SPARSE_UNION_ARRAY,
+                         GArrowUnionArray)
+struct _GArrowSparseUnionArrayClass
+{
+  GArrowUnionArrayClass parent_class;
+};
+
+GArrowSparseUnionArray *
+garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
+                              GList *fields,
+                              GError **error);
+
+
+#define GARROW_TYPE_DENSE_UNION_ARRAY (garrow_dense_union_array_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionArray,
+                         garrow_dense_union_array,
+                         GARROW,
+                         DENSE_UNION_ARRAY,
+                         GArrowUnionArray)
+struct _GArrowDenseUnionArrayClass
+{
+  GArrowUnionArrayClass parent_class;
+};
+
+GArrowDenseUnionArray *
+garrow_dense_union_array_new(GArrowInt8Array *type_ids,
+                             GArrowInt32Array *value_offsets,
+                             GList *fields,
+                             GError **error);
+
+
 #define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryArray,
                          garrow_dictionary_array,
diff --git a/c_glib/arrow-glib/composite-data-type.cpp b/c_glib/arrow-glib/composite-data-type.cpp
index 2645bea..a4d3d84 100644
--- a/c_glib/arrow-glib/composite-data-type.cpp
+++ b/c_glib/arrow-glib/composite-data-type.cpp
@@ -40,6 +40,12 @@ G_BEGIN_DECLS
  *
  * #GArrowStructDataType is a class for struct data type.
  *
+ * #GArrowUnionDataType is a base class for union data types.
+ *
+ * #GArrowSparseUnionDataType is a class for sparse union data type.
+ *
+ * #GArrowDenseUnionDataType is a class for dense union data type.
+ *
  * #GArrowDictionaryDataType is a class for dictionary data type.
  */
 
@@ -122,18 +128,17 @@ GArrowStructDataType *
 garrow_struct_data_type_new(GList *fields)
 {
   std::vector<std::shared_ptr<arrow::Field>> arrow_fields;
-  for (GList *node = fields; node; node = g_list_next(node)) {
+  for (auto *node = fields; node; node = g_list_next(node)) {
     auto field = GARROW_FIELD(node->data);
     auto arrow_field = garrow_field_get_raw(field);
     arrow_fields.push_back(arrow_field);
   }
 
   auto arrow_data_type = std::make_shared<arrow::StructType>(arrow_fields);
-  GArrowStructDataType *data_type =
-    GARROW_STRUCT_DATA_TYPE(g_object_new(GARROW_TYPE_STRUCT_DATA_TYPE,
-                                         "data-type", &arrow_data_type,
-                                         NULL));
-  return data_type;
+  auto data_type = g_object_new(GARROW_TYPE_STRUCT_DATA_TYPE,
+                                "data-type", &arrow_data_type,
+                                NULL);
+  return GARROW_STRUCT_DATA_TYPE(data_type);
 }
 
 /**
@@ -189,9 +194,12 @@ garrow_struct_data_type_get_field(GArrowStructDataType *data_type,
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
 
-  while (i < 0) {
+  if (i < 0) {
     i += arrow_data_type->num_children();
   }
+  if (i < 0) {
+    return NULL;
+  }
   if (i >= arrow_data_type->num_children()) {
     return NULL;
   }
@@ -252,6 +260,222 @@ garrow_struct_data_type_get_field_index(GArrowStructDataType *data_type,
 }
 
 
+G_DEFINE_ABSTRACT_TYPE(GArrowUnionDataType,
+                       garrow_union_data_type,
+                       GARROW_TYPE_DATA_TYPE)
+
+static void
+garrow_union_data_type_init(GArrowUnionDataType *object)
+{
+}
+
+static void
+garrow_union_data_type_class_init(GArrowUnionDataTypeClass *klass)
+{
+}
+
+/**
+ * garrow_union_data_type_get_n_fields:
+ * @data_type: A #GArrowUnionDataType.
+ *
+ * Returns: The number of fields of the union data type.
+ *
+ * Since: 0.12.0
+ */
+gint
+garrow_union_data_type_get_n_fields(GArrowUnionDataType *data_type)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  return arrow_data_type->num_children();
+}
+
+/**
+ * garrow_union_data_type_get_fields:
+ * @data_type: A #GArrowUnionDataType.
+ *
+ * Returns: (transfer full) (element-type GArrowField):
+ *   The fields of the union data type.
+ *
+ * Since: 0.12.0
+ */
+GList *
+garrow_union_data_type_get_fields(GArrowUnionDataType *data_type)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_fields = arrow_data_type->children();
+
+  GList *fields = NULL;
+  for (auto arrow_field : arrow_fields) {
+    fields = g_list_prepend(fields, garrow_field_new_raw(&arrow_field));
+  }
+  return g_list_reverse(fields);
+}
+
+/**
+ * garrow_union_data_type_get_field:
+ * @data_type: A #GArrowUnionDataType.
+ * @i: The index of the target field.
+ *
+ * Returns: (transfer full) (nullable):
+ *   The field at the index in the union data type or %NULL on not found.
+ *
+ * Since: 0.12.0
+ */
+GArrowField *
+garrow_union_data_type_get_field(GArrowUnionDataType *data_type,
+                                  gint i)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+
+  if (i < 0) {
+    i += arrow_data_type->num_children();
+  }
+  if (i < 0) {
+    return NULL;
+  }
+  if (i >= arrow_data_type->num_children()) {
+    return NULL;
+  }
+
+  auto arrow_field = arrow_data_type->child(i);
+  if (arrow_field) {
+    return garrow_field_new_raw(&arrow_field);
+  } else {
+    return NULL;
+  }
+}
+
+/**
+ * garrow_union_data_type_get_type_codes:
+ * @data_type: A #GArrowUnionDataType.
+ * @n_type_codes: (out): The number of type codes.
+ *
+ * Returns: (transfer full) (array length=n_type_codes):
+ *   The codes for each field.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 0.12.0
+ */
+guint8 *
+garrow_union_data_type_get_type_codes(GArrowUnionDataType *data_type,
+                                      gsize *n_type_codes)
+{
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_union_data_type =
+    std::static_pointer_cast<arrow::UnionType>(arrow_data_type);
+
+  const auto arrow_type_codes = arrow_union_data_type->type_codes();
+  const auto n = arrow_type_codes.size();
+  auto type_codes = static_cast<guint8 *>(g_new(guint8, n));
+  for (size_t i = 0; i < n; ++i) {
+    type_codes[i] = arrow_type_codes[i];
+  }
+  *n_type_codes = n;
+  return type_codes;
+}
+
+
+G_DEFINE_TYPE(GArrowSparseUnionDataType,
+              garrow_sparse_union_data_type,
+              GARROW_TYPE_UNION_DATA_TYPE)
+
+static void
+garrow_sparse_union_data_type_init(GArrowSparseUnionDataType *object)
+{
+}
+
+static void
+garrow_sparse_union_data_type_class_init(GArrowSparseUnionDataTypeClass *klass)
+{
+}
+
+/**
+ * garrow_sparse_union_data_type_new:
+ * @fields: (element-type GArrowField): The fields of the union.
+ * @type_codes: (array length=n_type_codes): The codes to specify each field.
+ * @n_type_codes: The number of type codes.
+ *
+ * Returns: The newly created sparse union data type.
+ */
+GArrowSparseUnionDataType *
+garrow_sparse_union_data_type_new(GList *fields,
+                                  guint8 *type_codes,
+                                  gsize n_type_codes)
+{
+  std::vector<std::shared_ptr<arrow::Field>> arrow_fields;
+  for (auto node = fields; node; node = g_list_next(node)) {
+    auto field = GARROW_FIELD(node->data);
+    auto arrow_field = garrow_field_get_raw(field);
+    arrow_fields.push_back(arrow_field);
+  }
+
+  std::vector<uint8_t> arrow_type_codes;
+  for (gsize i = 0; i < n_type_codes; ++i) {
+    arrow_type_codes.push_back(type_codes[i]);
+  }
+
+  auto arrow_data_type =
+    std::make_shared<arrow::UnionType>(arrow_fields,
+                                       arrow_type_codes,
+                                       arrow::UnionMode::SPARSE);
+  auto data_type = g_object_new(GARROW_TYPE_SPARSE_UNION_DATA_TYPE,
+                                "data-type", &arrow_data_type,
+                                NULL);
+  return GARROW_SPARSE_UNION_DATA_TYPE(data_type);
+}
+
+
+G_DEFINE_TYPE(GArrowDenseUnionDataType,
+              garrow_dense_union_data_type,
+              GARROW_TYPE_UNION_DATA_TYPE)
+
+static void
+garrow_dense_union_data_type_init(GArrowDenseUnionDataType *object)
+{
+}
+
+static void
+garrow_dense_union_data_type_class_init(GArrowDenseUnionDataTypeClass *klass)
+{
+}
+
+/**
+ * garrow_dense_union_data_type_new:
+ * @fields: (element-type GArrowField): The fields of the union.
+ * @type_codes: (array length=n_type_codes): The codes to specify each field.
+ * @n_type_codes: The number of type codes.
+ *
+ * Returns: The newly created dense union data type.
+ */
+GArrowDenseUnionDataType *
+garrow_dense_union_data_type_new(GList *fields,
+                                 guint8 *type_codes,
+                                 gsize n_type_codes)
+{
+  std::vector<std::shared_ptr<arrow::Field>> arrow_fields;
+  for (auto node = fields; node; node = g_list_next(node)) {
+    auto field = GARROW_FIELD(node->data);
+    auto arrow_field = garrow_field_get_raw(field);
+    arrow_fields.push_back(arrow_field);
+  }
+
+  std::vector<uint8_t> arrow_type_codes;
+  for (gsize i = 0; i < n_type_codes; ++i) {
+    arrow_type_codes.push_back(type_codes[i]);
+  }
+
+  auto arrow_data_type =
+    std::make_shared<arrow::UnionType>(arrow_fields,
+                                       arrow_type_codes,
+                                       arrow::UnionMode::DENSE);
+  auto data_type = g_object_new(GARROW_TYPE_DENSE_UNION_DATA_TYPE,
+                                "data-type", &arrow_data_type,
+                                NULL);
+  return GARROW_DENSE_UNION_DATA_TYPE(data_type);
+}
+
+
 G_DEFINE_TYPE(GArrowDictionaryDataType,
               garrow_dictionary_data_type,
               GARROW_TYPE_FIXED_WIDTH_DATA_TYPE)
diff --git a/c_glib/arrow-glib/composite-data-type.h b/c_glib/arrow-glib/composite-data-type.h
index 7d6a02b..25e1ac3 100644
--- a/c_glib/arrow-glib/composite-data-type.h
+++ b/c_glib/arrow-glib/composite-data-type.h
@@ -96,6 +96,66 @@ gint
 garrow_struct_data_type_get_field_index(GArrowStructDataType *data_type,
                                         const gchar *name);
 
+
+#define GARROW_TYPE_UNION_DATA_TYPE (garrow_union_data_type_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowUnionDataType,
+                         garrow_union_data_type,
+                         GARROW,
+                         UNION_DATA_TYPE,
+                         GArrowDataType)
+struct _GArrowUnionDataTypeClass
+{
+  GArrowDataTypeClass parent_class;
+};
+
+gint
+garrow_union_data_type_get_n_fields(GArrowUnionDataType *data_type);
+GList *
+garrow_union_data_type_get_fields(GArrowUnionDataType *data_type);
+GArrowField *
+garrow_union_data_type_get_field(GArrowUnionDataType *data_type,
+                                 gint i);
+guint8 *
+garrow_union_data_type_get_type_codes(GArrowUnionDataType *data_type,
+                                      gsize *n_type_codes);
+
+
+#define GARROW_TYPE_SPARSE_UNION_DATA_TYPE      \
+  (garrow_sparse_union_data_type_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionDataType,
+                         garrow_sparse_union_data_type,
+                         GARROW,
+                         SPARSE_UNION_DATA_TYPE,
+                         GArrowUnionDataType)
+struct _GArrowSparseUnionDataTypeClass
+{
+  GArrowUnionDataTypeClass parent_class;
+};
+
+GArrowSparseUnionDataType *
+garrow_sparse_union_data_type_new(GList *fields,
+                                  guint8 *type_codes,
+                                  gsize n_type_codes);
+
+
+#define GARROW_TYPE_DENSE_UNION_DATA_TYPE       \
+  (garrow_dense_union_data_type_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionDataType,
+                         garrow_dense_union_data_type,
+                         GARROW,
+                         DENSE_UNION_DATA_TYPE,
+                         GArrowUnionDataType)
+struct _GArrowDenseUnionDataTypeClass
+{
+  GArrowUnionDataTypeClass parent_class;
+};
+
+GArrowDenseUnionDataType *
+garrow_dense_union_data_type_new(GList *fields,
+                                 guint8 *type_codes,
+                                 gsize n_type_codes);
+
+
 #define GARROW_TYPE_DICTIONARY_DATA_TYPE (garrow_dictionary_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryDataType,
                          garrow_dictionary_data_type,
diff --git a/c_glib/test/test-dense-union-array.rb b/c_glib/test/test-dense-union-array.rb
new file mode 100644
index 0000000..fa73f8d
--- /dev/null
+++ b/c_glib/test/test-dense-union-array.rb
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDenseUnionArray < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    type_ids = build_int8_array([0, 1, nil, 1, 1])
+    value_offsets = build_int32_array([0, 0, 0, 1, 2])
+    fields = [
+      build_int16_array([1]),
+      build_string_array(["a", "b", "c"]),
+    ]
+    @array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields)
+  end
+
+  def test_value_data_type
+    fields = [
+      Arrow::Field.new("0", Arrow::Int16DataType.new),
+      Arrow::Field.new("1", Arrow::StringDataType.new),
+    ]
+    assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]),
+                 @array.value_data_type)
+  end
+
+  def test_field
+    assert_equal([
+                   build_int16_array([1]),
+                   build_string_array(["a", "b", "c"]),
+                 ],
+                 [
+                   @array.get_field(0),
+                   @array.get_field(1),
+                 ])
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/array.rb b/c_glib/test/test-dense-union-data-type.rb
similarity index 50%
copy from ruby/red-arrow/lib/arrow/array.rb
copy to c_glib/test/test-dense-union-data-type.rb
index 7a0d053..0d12954 100644
--- a/ruby/red-arrow/lib/arrow/array.rb
+++ b/c_glib/test/test-dense-union-data-type.rb
@@ -15,49 +15,21 @@
 # specific language governing permissions and limitations
 # under the License.
 
-module Arrow
-  class Array
-    include Enumerable
-
-    class << self
-      def new(values)
-        builder_class_name = "#{name}Builder"
-        if const_defined?(builder_class_name)
-          builder_class = const_get(builder_class_name)
-          builder_class.build(values)
-        else
-          super
-        end
-      end
-    end
-
-    def [](i)
-      i += length if i < 0
-      if null?(i)
-        nil
-      else
-        get_value(i)
-      end
-    end
-
-    def each
-      return to_enum(__method__) unless block_given?
-
-      length.times do |i|
-        yield(self[i])
-      end
-    end
-
-    def reverse_each
-      return to_enum(__method__) unless block_given?
+class TestDenseUnionDataType < Test::Unit::TestCase
+  def setup
+    fields = [
+      Arrow::Field.new("number", Arrow::Int32DataType.new),
+      Arrow::Field.new("text", Arrow::StringDataType.new),
+    ]
+    @data_type = Arrow::DenseUnionDataType.new(fields, [2, 9])
+  end
 
-      (length - 1).downto(0) do |i|
-        yield(self[i])
-      end
-    end
+  def test_type
+    assert_equal(Arrow::Type::UNION, @data_type.id)
+  end
 
-    def to_arrow
-      self
-    end
+  def test_to_s
+    assert_equal("union[dense]<number: int32=2, text: string=9>",
+                 @data_type.to_s)
   end
 end
diff --git a/c_glib/test/test-sparse-union-array.rb b/c_glib/test/test-sparse-union-array.rb
new file mode 100644
index 0000000..721f95c
--- /dev/null
+++ b/c_glib/test/test-sparse-union-array.rb
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestSparseUnionArray < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    type_ids = build_int8_array([0, 1, nil, 1, 0])
+    fields = [
+      build_int16_array([1, nil, nil, nil, 5]),
+      build_string_array([nil, "b", nil, "d", nil]),
+    ]
+    @array = Arrow::SparseUnionArray.new(type_ids, fields)
+  end
+
+  def test_value_data_type
+    fields = [
+      Arrow::Field.new("0", Arrow::Int16DataType.new),
+      Arrow::Field.new("1", Arrow::StringDataType.new),
+    ]
+    assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]),
+                 @array.value_data_type)
+  end
+
+  def test_field
+    assert_equal([
+                   build_int16_array([1, nil, nil, nil, 5]),
+                   build_string_array([nil, "b", nil, "d", nil]),
+                 ],
+                 [
+                   @array.get_field(0),
+                   @array.get_field(1),
+                 ])
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/array.rb b/c_glib/test/test-sparse-union-data-type.rb
similarity index 50%
copy from ruby/red-arrow/lib/arrow/array.rb
copy to c_glib/test/test-sparse-union-data-type.rb
index 7a0d053..ff4ce72 100644
--- a/ruby/red-arrow/lib/arrow/array.rb
+++ b/c_glib/test/test-sparse-union-data-type.rb
@@ -15,49 +15,21 @@
 # specific language governing permissions and limitations
 # under the License.
 
-module Arrow
-  class Array
-    include Enumerable
-
-    class << self
-      def new(values)
-        builder_class_name = "#{name}Builder"
-        if const_defined?(builder_class_name)
-          builder_class = const_get(builder_class_name)
-          builder_class.build(values)
-        else
-          super
-        end
-      end
-    end
-
-    def [](i)
-      i += length if i < 0
-      if null?(i)
-        nil
-      else
-        get_value(i)
-      end
-    end
-
-    def each
-      return to_enum(__method__) unless block_given?
-
-      length.times do |i|
-        yield(self[i])
-      end
-    end
-
-    def reverse_each
-      return to_enum(__method__) unless block_given?
+class TestSparseUnionDataType < Test::Unit::TestCase
+  def setup
+    fields = [
+      Arrow::Field.new("number", Arrow::Int32DataType.new),
+      Arrow::Field.new("text", Arrow::StringDataType.new),
+    ]
+    @data_type = Arrow::SparseUnionDataType.new(fields, [2, 9])
+  end
 
-      (length - 1).downto(0) do |i|
-        yield(self[i])
-      end
-    end
+  def test_type
+    assert_equal(Arrow::Type::UNION, @data_type.id)
+  end
 
-    def to_arrow
-      self
-    end
+  def test_to_s
+    assert_equal("union[sparse]<number: int32=2, text: string=9>",
+                 @data_type.to_s)
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/array.rb b/ruby/red-arrow/lib/arrow/array.rb
index 7a0d053..0492241 100644
--- a/ruby/red-arrow/lib/arrow/array.rb
+++ b/ruby/red-arrow/lib/arrow/array.rb
@@ -20,11 +20,13 @@ module Arrow
     include Enumerable
 
     class << self
-      def new(values)
+      def new(*args)
+        return super if args.size != 1
+
         builder_class_name = "#{name}Builder"
         if const_defined?(builder_class_name)
           builder_class = const_get(builder_class_name)
-          builder_class.build(values)
+          builder_class.build(*args)
         else
           super
         end