You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2019/04/25 06:36:19 UTC
[arrow] branch master updated: ARROW-5155: [GLib][Ruby] Add support
for building union arrays from data type
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ecfb807 ARROW-5155: [GLib][Ruby] Add support for building union arrays from data type
ecfb807 is described below
commit ecfb807458bfe909ecc8940bd840fc9c6169dd51
Author: Kenta Murata <mr...@mrkn.jp>
AuthorDate: Thu Apr 25 15:36:07 2019 +0900
ARROW-5155: [GLib][Ruby] Add support for building union arrays from data type
This is separated from #3723.
This should be merged after #3723.
Author: Kenta Murata <mr...@mrkn.jp>
Author: Kouhei Sutou <ko...@clear-code.com>
Closes #4127 from mrkn/glib_ruby_make_union_array_with_field_names and squashes the following commits:
e6255567 <Kouhei Sutou> Fix test data
f82ac3d1 <Kenta Murata> Fix test cases
d550dc97 <Kenta Murata> Fix comment
f1bfa07b <Kenta Murata> Stop copying a type_code vector
606a04c1 <Kenta Murata> Use new constructors of union arrays
5ad55722 <Kenta Murata> Add garrow_dense_union_array_new_data_type
c8793d5c <Kenta Murata> Add garrow_sparse_union_array_new_data_type
---
c_glib/arrow-glib/composite-array.cpp | 97 ++++++++++++++++++++++
c_glib/arrow-glib/composite-array.h | 11 +++
c_glib/test/test-dense-union-array.rb | 90 ++++++++++++++------
c_glib/test/test-sparse-union-array.rb | 87 +++++++++++++------
.../record-batch/test-dense-union-array.rb | 8 +-
.../record-batch/test-sparse-union-array.rb | 7 +-
6 files changed, 238 insertions(+), 62 deletions(-)
diff --git a/c_glib/arrow-glib/composite-array.cpp b/c_glib/arrow-glib/composite-array.cpp
index b202fb4..4fba813 100644
--- a/c_glib/arrow-glib/composite-array.cpp
+++ b/c_glib/arrow-glib/composite-array.cpp
@@ -366,6 +366,53 @@ garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
}
}
+/**
+ * garrow_sparse_union_array_new_data_type:
+ * @data_type: The data type for the sparse array.
+ * @type_ids: The field type IDs for each value as #GArrowInt8Array.
+ * @fields: (element-type GArrowArray): The arrays for each field
+ * as #GList of #GArrowArray.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowSparseUnionArray
+ * or %NULL on error.
+ *
+ * Since: 0.14.0
+ */
+GArrowSparseUnionArray *
+garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type,
+ GArrowInt8Array *type_ids,
+ GList *fields,
+ GError **error)
+{
+ auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+ auto arrow_union_data_type =
+ std::static_pointer_cast<arrow::UnionType>(arrow_data_type);
+ std::vector<std::string> arrow_field_names;
+ for (const auto &arrow_field : arrow_union_data_type->children()) {
+ arrow_field_names.push_back(arrow_field->name());
+ }
+ auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids));
+ std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
+ for (auto node = fields; node; node = node->next) {
+ auto *field = GARROW_ARRAY(node->data);
+ arrow_fields.push_back(garrow_array_get_raw(field));
+ }
+ std::shared_ptr<arrow::Array> arrow_union_array;
+ auto status = arrow::UnionArray::MakeSparse(*arrow_type_ids,
+ arrow_fields,
+ arrow_field_names,
+ arrow_union_data_type->type_codes(),
+ &arrow_union_array);
+ if (garrow_error_check(error,
+ status,
+ "[sparse-union-array][new][data-type]")) {
+ return GARROW_SPARSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array));
+ } else {
+ return NULL;
+ }
+}
+
G_DEFINE_TYPE(GArrowDenseUnionArray,
garrow_dense_union_array,
@@ -420,6 +467,56 @@ garrow_dense_union_array_new(GArrowInt8Array *type_ids,
}
}
+/**
+ * garrow_dense_union_array_new_data_type:
+ * @data_type: The data type for the dense array.
+ * @type_ids: The field type IDs for each value as #GArrowInt8Array.
+ * @value_offsets: The value offsets for each value as #GArrowInt32Array.
+ * Each offset is counted for each type.
+ * @fields: (element-type GArrowArray): The arrays for each field
+ * as #GList of #GArrowArray.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable): A newly created #GArrowSparseUnionArray
+ * or %NULL on error.
+ *
+ * Since: 0.14.0
+ */
+GArrowDenseUnionArray *
+garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type,
+ GArrowInt8Array *type_ids,
+ GArrowInt32Array *value_offsets,
+ GList *fields,
+ GError **error)
+{
+ auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+ auto arrow_union_data_type =
+ std::static_pointer_cast<arrow::UnionType>(arrow_data_type);
+ std::vector<std::string> arrow_field_names;
+ for (const auto &arrow_field : arrow_union_data_type->children()) {
+ arrow_field_names.push_back(arrow_field->name());
+ }
+ auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids));
+ auto arrow_value_offsets = garrow_array_get_raw(GARROW_ARRAY(value_offsets));
+ std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
+ for (auto node = fields; node; node = node->next) {
+ auto *field = GARROW_ARRAY(node->data);
+ arrow_fields.push_back(garrow_array_get_raw(field));
+ }
+ std::shared_ptr<arrow::Array> arrow_union_array;
+ auto status = arrow::UnionArray::MakeDense(*arrow_type_ids,
+ *arrow_value_offsets,
+ arrow_fields,
+ arrow_field_names,
+ arrow_union_data_type->type_codes(),
+ &arrow_union_array);
+ if (garrow_error_check(error, status, "[dense-union-array][new][data-type]")) {
+ return GARROW_DENSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array));
+ } else {
+ return NULL;
+ }
+}
+
G_DEFINE_TYPE(GArrowDictionaryArray,
garrow_dictionary_array,
diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h
index a181ffc..c54c2f8 100644
--- a/c_glib/arrow-glib/composite-array.h
+++ b/c_glib/arrow-glib/composite-array.h
@@ -108,6 +108,11 @@ GArrowSparseUnionArray *
garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
GList *fields,
GError **error);
+GArrowSparseUnionArray *
+garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type,
+ GArrowInt8Array *type_ids,
+ GList *fields,
+ GError **error);
#define GARROW_TYPE_DENSE_UNION_ARRAY (garrow_dense_union_array_get_type())
@@ -126,6 +131,12 @@ garrow_dense_union_array_new(GArrowInt8Array *type_ids,
GArrowInt32Array *value_offsets,
GList *fields,
GError **error);
+GArrowDenseUnionArray *
+garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type,
+ GArrowInt8Array *type_ids,
+ GArrowInt32Array *value_offsets,
+ GList *fields,
+ GError **error);
#define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type())
diff --git a/c_glib/test/test-dense-union-array.rb b/c_glib/test/test-dense-union-array.rb
index fa73f8d..aec12b4 100644
--- a/c_glib/test/test-dense-union-array.rb
+++ b/c_glib/test/test-dense-union-array.rb
@@ -18,33 +18,71 @@
class TestDenseUnionArray < Test::Unit::TestCase
include Helper::Buildable
- def setup
- type_ids = build_int8_array([0, 1, nil, 1, 1])
- value_offsets = build_int32_array([0, 0, 0, 1, 2])
- fields = [
- build_int16_array([1]),
- build_string_array(["a", "b", "c"]),
- ]
- @array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields)
- end
+ sub_test_case(".new") do
+ sub_test_case("default") do
+ def setup
+ type_ids = build_int8_array([0, 1, nil, 1, 1])
+ value_offsets = build_int32_array([0, 0, 0, 1, 2])
+ fields = [
+ build_int16_array([1]),
+ build_string_array(["a", "b", "c"]),
+ ]
+ @array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields)
+ end
- def test_value_data_type
- fields = [
- Arrow::Field.new("0", Arrow::Int16DataType.new),
- Arrow::Field.new("1", Arrow::StringDataType.new),
- ]
- assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]),
- @array.value_data_type)
- end
+ def test_value_data_type
+ fields = [
+ Arrow::Field.new("0", Arrow::Int16DataType.new),
+ Arrow::Field.new("1", Arrow::StringDataType.new),
+ ]
+ assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]),
+ @array.value_data_type)
+ end
+
+ def test_field
+ assert_equal([
+ build_int16_array([1]),
+ build_string_array(["a", "b", "c"]),
+ ],
+ [
+ @array.get_field(0),
+ @array.get_field(1),
+ ])
+ end
+ end
+
+ sub_test_case("DataType") do
+ def setup
+ data_type_fields = [
+ Arrow::Field.new("number", Arrow::Int16DataType.new),
+ Arrow::Field.new("text", Arrow::StringDataType.new),
+ ]
+ type_codes = [11, 13]
+ @data_type = Arrow::DenseUnionDataType.new(data_type_fields, type_codes)
+ type_ids = build_int8_array([11, 13, nil, 13, 13])
+ value_offsets = build_int32_array([0, 0, 0, 1, 2])
+ fields = [
+ build_int16_array([1]),
+ build_string_array(["a", "b", "c"])
+ ]
+ @array = Arrow::DenseUnionArray.new(@data_type, type_ids, value_offsets, fields)
+ end
+
+ def test_value_data_type
+ assert_equal(@data_type,
+ @array.value_data_type)
+ end
- def test_field
- assert_equal([
- build_int16_array([1]),
- build_string_array(["a", "b", "c"]),
- ],
- [
- @array.get_field(0),
- @array.get_field(1),
- ])
+ def test_field
+ assert_equal([
+ build_int16_array([1]),
+ build_string_array(["a", "b", "c"]),
+ ],
+ [
+ @array.get_field(0),
+ @array.get_field(1),
+ ])
+ end
+ end
end
end
diff --git a/c_glib/test/test-sparse-union-array.rb b/c_glib/test/test-sparse-union-array.rb
index 721f95c..62b7b3d 100644
--- a/c_glib/test/test-sparse-union-array.rb
+++ b/c_glib/test/test-sparse-union-array.rb
@@ -18,32 +18,69 @@
class TestSparseUnionArray < Test::Unit::TestCase
include Helper::Buildable
- def setup
- type_ids = build_int8_array([0, 1, nil, 1, 0])
- fields = [
- build_int16_array([1, nil, nil, nil, 5]),
- build_string_array([nil, "b", nil, "d", nil]),
- ]
- @array = Arrow::SparseUnionArray.new(type_ids, fields)
- end
+ sub_test_case(".new") do
+ sub_test_case("default") do
+ def setup
+ type_ids = build_int8_array([0, 1, nil, 1, 0])
+ fields = [
+ build_int16_array([1, nil, nil, nil, 5]),
+ build_string_array([nil, "b", nil, "d", nil]),
+ ]
+ @array = Arrow::SparseUnionArray.new(type_ids, fields)
+ end
- def test_value_data_type
- fields = [
- Arrow::Field.new("0", Arrow::Int16DataType.new),
- Arrow::Field.new("1", Arrow::StringDataType.new),
- ]
- assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]),
- @array.value_data_type)
- end
+ def test_value_data_type
+ fields = [
+ Arrow::Field.new("0", Arrow::Int16DataType.new),
+ Arrow::Field.new("1", Arrow::StringDataType.new),
+ ]
+ assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]),
+ @array.value_data_type)
+ end
+
+ def test_field
+ assert_equal([
+ build_int16_array([1, nil, nil, nil, 5]),
+ build_string_array([nil, "b", nil, "d", nil]),
+ ],
+ [
+ @array.get_field(0),
+ @array.get_field(1),
+ ])
+ end
+ end
+
+ sub_test_case("DataType") do
+ def setup
+ data_type_fields = [
+ Arrow::Field.new("number", Arrow::Int16DataType.new),
+ Arrow::Field.new("text", Arrow::StringDataType.new),
+ ]
+ type_codes = [11, 13]
+ @data_type = Arrow::SparseUnionDataType.new(data_type_fields, type_codes)
+ type_ids = build_int8_array([11, 13, nil, 13, 11])
+ fields = [
+ build_int16_array([1, nil, nil, nil, 5]),
+ build_string_array([nil, "b", nil, "d", nil]),
+ ]
+ @array = Arrow::SparseUnionArray.new(@data_type, type_ids, fields)
+ end
+
+ def test_value_data_type
+ assert_equal(@data_type,
+ @array.value_data_type)
+ end
- def test_field
- assert_equal([
- build_int16_array([1, nil, nil, nil, 5]),
- build_string_array([nil, "b", nil, "d", nil]),
- ],
- [
- @array.get_field(0),
- @array.get_field(1),
- ])
+ def test_field
+ assert_equal([
+ build_int16_array([1, nil, nil, nil, 5]),
+ build_string_array([nil, "b", nil, "d", nil]),
+ ],
+ [
+ @array.get_field(0),
+ @array.get_field(1),
+ ])
+ end
+ end
end
end
diff --git a/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb b/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb
index 3520eba..91477fb 100644
--- a/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb
+++ b/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb
@@ -69,12 +69,8 @@ class RawRecordsRecordBatchDenseUnionArrayTest < Test::Unit::TestCase
offsets << (type_ids.count(type_id) - 1)
end
end
- # TODO
- # union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
- # Arrow::Int8Array.new(type_ids),
- # Arrow::Int32Array.new(offsets),
- # arrays)
- union_array = Arrow::DenseUnionArray.new(Arrow::Int8Array.new(type_ids),
+ union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
+ Arrow::Int8Array.new(type_ids),
Arrow::Int32Array.new(offsets),
arrays)
schema = Arrow::Schema.new(column: union_array.value_data_type)
diff --git a/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb b/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb
index f963494..c1947b8 100644
--- a/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb
+++ b/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb
@@ -59,11 +59,8 @@ class RawRecordsRecordBatchSparseUnionArrayTest < Test::Unit::TestCase
type_ids << type_codes[1]
end
end
- # TODO
- # union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
- # Arrow::Int8Array.new(type_ids),
- # arrays)
- union_array = Arrow::SparseUnionArray.new(Arrow::Int8Array.new(type_ids),
+ union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
+ Arrow::Int8Array.new(type_ids),
arrays)
schema = Arrow::Schema.new(column: union_array.value_data_type)
Arrow::RecordBatch.new(schema,