You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2019/04/27 21:27:26 UTC
[arrow] branch master updated: ARROW-5150: [Ruby] Add
Arrow::Table#raw_records
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 8724f3c ARROW-5150: [Ruby] Add Arrow::Table#raw_records
8724f3c is described below
commit 8724f3ce05f89fde3a9e38f237c81ad36a210d9a
Author: Kenta Murata <mr...@mrkn.jp>
AuthorDate: Sun Apr 28 06:26:58 2019 +0900
ARROW-5150: [Ruby] Add Arrow::Table#raw_records
I'd like to add `raw_records` method in `Arrow::Table` class.
The same method of `RecordBatch` was added in #3587.
Author: Kenta Murata <mr...@mrkn.jp>
Author: Kouhei Sutou <ko...@clear-code.com>
Closes #4133 from mrkn/ruby_table_raw_records and squashes the following commits:
8d6b0d9c <Kouhei Sutou> Remove needless TODO
cc173459 <Kouhei Sutou> Unify tests for RecordBatch and Table
93967559 <Kenta Murata> Replace + with ++ for shifting row index
52cb4025 <Kenta Murata> Fix tags for errors
f7b88d4d <Kenta Murata> Add test cases for multiple columns and arrays
9ba352f7 <Kenta Murata> Add tests for StructArray
c47b5b34 <Kenta Murata> Add tests for ListArray
f6551fdc <Kenta Murata> Add tests for SparseUnionArray
a34c3603 <Kenta Murata> Add tests for DenseUnionArray
e8fb6c9b <Kenta Murata> Add Arrow::Table#raw_records
1e4f48ba <Kenta Murata> Rename
---
ruby/red-arrow/ext/arrow/arrow.cpp | 4 +
.../arrow/{record-batch.cpp => raw-records.cpp} | 54 ++-
ruby/red-arrow/ext/arrow/red-arrow.hpp | 1 +
ruby/red-arrow/lib/arrow/table.rb | 147 +++++-
.../raw-records/record-batch/test-basic-arrays.rb | 349 --------------
.../record-batch/test-dense-union-array.rb | 482 -------------------
.../raw-records/record-batch/test-list-array.rb | 498 --------------------
.../record-batch/test-sparse-union-array.rb | 471 -------------------
.../raw-records/record-batch/test-struct-array.rb | 426 -----------------
.../test/raw-records/test-basic-arrays.rb | 340 ++++++++++++++
.../test/raw-records/test-dense-union-array.rb | 492 +++++++++++++++++++
ruby/red-arrow/test/raw-records/test-list-array.rb | 520 +++++++++++++++++++++
.../{record-batch => }/test-multiple-columns.rb | 52 ++-
.../test/raw-records/test-sparse-union-array.rb | 480 +++++++++++++++++++
.../test/raw-records/test-struct-array.rb | 448 ++++++++++++++++++
ruby/red-arrow/test/raw-records/test-table.rb | 47 ++
16 files changed, 2552 insertions(+), 2259 deletions(-)
diff --git a/ruby/red-arrow/ext/arrow/arrow.cpp b/ruby/red-arrow/ext/arrow/arrow.cpp
index 48b98fb..17ac5e4 100644
--- a/ruby/red-arrow/ext/arrow/arrow.cpp
+++ b/ruby/red-arrow/ext/arrow/arrow.cpp
@@ -34,6 +34,10 @@ extern "C" void Init_arrow() {
rb_define_method(cArrowRecordBatch, "raw_records",
reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_raw_records),
0);
+ auto cArrowTable = rb_const_get_at(mArrow, rb_intern("Table"));
+ rb_define_method(cArrowTable, "raw_records",
+ reinterpret_cast<rb::RawMethod>(red_arrow::table_raw_records),
+ 0);
red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date"));
diff --git a/ruby/red-arrow/ext/arrow/record-batch.cpp b/ruby/red-arrow/ext/arrow/raw-records.cpp
similarity index 93%
rename from ruby/red-arrow/ext/arrow/record-batch.cpp
rename to ruby/red-arrow/ext/arrow/raw-records.cpp
index 506c8e1..a2fab55 100644
--- a/ruby/red-arrow/ext/arrow/record-batch.cpp
+++ b/ruby/red-arrow/ext/arrow/raw-records.cpp
@@ -642,11 +642,34 @@ namespace red_arrow {
auto record = rb_ary_new_capa(n_columns_);
rb_ary_push(records_, record);
}
+ row_offset_ = 0;
for (int i = 0; i < n_columns_; ++i) {
const auto array = record_batch.column(i).get();
column_index_ = i;
check_status(array->Accept(this),
- "[raw-records]");
+ "[record-batch][raw-records]");
+ }
+ return Qnil;
+ });
+ }
+
+ void build(const arrow::Table& table) {
+ rb::protect([&] {
+ const auto n_rows = table.num_rows();
+ for (int64_t i = 0; i < n_rows; ++i) {
+ auto record = rb_ary_new_capa(n_columns_);
+ rb_ary_push(records_, record);
+ }
+ for (int i = 0; i < n_columns_; ++i) {
+ const auto column = table.column(i).get();
+ const auto chunked_array = column->data();
+ column_index_ = i;
+ row_offset_ = 0;
+ for (const auto array : chunked_array->chunks()) {
+ check_status(array->Accept(this),
+ "[table][raw-records]");
+ row_offset_ += array->length();
+ }
}
return Qnil;
});
@@ -703,17 +726,17 @@ namespace red_arrow {
void convert(const ArrayType& array) {
const auto n = array.length();
if (array.null_count() > 0) {
- for (int64_t i = 0; i < n; ++i) {
+ for (int64_t i = 0, ii = row_offset_; i < n; ++i, ++ii) {
auto value = Qnil;
if (!array.IsNull(i)) {
value = convert_value(array, i);
}
- auto record = rb_ary_entry(records_, i);
+ auto record = rb_ary_entry(records_, ii);
rb_ary_store(record, column_index_, value);
}
} else {
- for (int64_t i = 0; i < n; ++i) {
- auto record = rb_ary_entry(records_, i);
+ for (int64_t i = 0, ii = row_offset_; i < n; ++i, ++ii) {
+ auto record = rb_ary_entry(records_, ii);
rb_ary_store(record, column_index_, convert_value(array, i));
}
}
@@ -731,6 +754,9 @@ namespace red_arrow {
// The current column index.
int column_index_;
+ // The current row offset.
+ int64_t row_offset_;
+
// The number of columns.
const int n_columns_;
};
@@ -753,4 +779,22 @@ namespace red_arrow {
return records;
}
+
+ VALUE
+ table_raw_records(VALUE rb_table) {
+ auto garrow_table = GARROW_TABLE(RVAL2GOBJ(rb_table));
+ auto table = garrow_table_get_raw(garrow_table).get();
+ const auto n_rows = table->num_rows();
+ const auto n_columns = table->num_columns();
+ auto records = rb_ary_new_capa(n_rows);
+
+ try {
+ RawRecordsBuilder builder(records, n_columns);
+ builder.build(*table);
+ } catch (rb::State& state) {
+ state.jump();
+ }
+
+ return records;
+ }
}
diff --git a/ruby/red-arrow/ext/arrow/red-arrow.hpp b/ruby/red-arrow/ext/arrow/red-arrow.hpp
index 6386d3d..90ae183 100644
--- a/ruby/red-arrow/ext/arrow/red-arrow.hpp
+++ b/ruby/red-arrow/ext/arrow/red-arrow.hpp
@@ -40,6 +40,7 @@ namespace red_arrow {
extern ID id_to_datetime;
VALUE record_batch_raw_records(VALUE obj);
+ VALUE table_raw_records(VALUE obj);
inline VALUE time_unit_to_scale(arrow::TimeUnit::type unit) {
switch (unit) {
diff --git a/ruby/red-arrow/lib/arrow/table.rb b/ruby/red-arrow/lib/arrow/table.rb
index 69a1de3..89f21aa 100644
--- a/ruby/red-arrow/lib/arrow/table.rb
+++ b/ruby/red-arrow/lib/arrow/table.rb
@@ -30,27 +30,154 @@ module Arrow
alias_method :initialize_raw, :initialize
private :initialize_raw
- def initialize(schema_or_raw_table_or_columns, columns=nil)
- if columns.nil?
- if schema_or_raw_table_or_columns[0].is_a?(Column)
- columns = schema_or_raw_table_or_columns
- fields = columns.collect(&:field)
+
+ # Creates a new {Arrow::Table}.
+ #
+ # @overload initialize(columns)
+ #
+ # @param columns [::Array<Arrow::Column>] The columns of the table.
+ #
+ # @example Create a table from columns
+ # count_field = Arrow::Field.new("count", :uint32)
+ # count_array = Arrow::UInt32Array.new([0, 2, nil, 4])
+ # count_column = Arrow::Column.new(count_field, count_array)
+ # visible_field = Arrow::Field.new("visible", :boolean)
+ # visible_array = Arrow::BooleanArray.new([true, nil, nil, false])
+ # visible_column = Arrow::Column.new(visible_field, visible_array)
+ # Arrow::Table.new([count_column, visible_column])
+ #
+ # @overload initialize(raw_table)
+ #
+ # @param raw_table [Hash<String, Arrow::Array>]
+ # The pairs of column name and values of the table. Column values is
+ # `Arrow::Array`.
+ #
+ # @example Create a table from column name and values
+ # Arrow::Table.new("count" => Arrow::UInt32Array.new([0, 2, nil, 4]),
+ # "visible" => Arrow::BooleanArray.new([true, nil, nil, false]))
+ #
+ # @overload initialize(raw_table)
+ #
+ # @param raw_table [Hash<String, Arrow::ChunkedArray>]
+ # The pairs of column name and values of the table. Column values is
+ # `Arrow::ChunkedArray`.
+ #
+ # @example Create a table from column name and values
+ # count_chunks = [
+ # Arrow::UInt32Array.new([0, 2]),
+ # Arrow::UInt32Array.new([nil, 4]),
+ # ]
+ # visible_chunks = [
+ # Arrow::BooleanArray.new([true]),
+ # Arrow::BooleanArray.new([nil, nil, false]),
+ # ]
+ # Arrow::Table.new("count" => Arrow::ChunkedArray.new(count_chunks),
+ # "visible" => Arrow::ChunkedArray.new(visible_chunks))
+ #
+ # @overload initialize(schema, columns)
+ #
+ # @param schema [Arrow::Schema] The schema of the table.
+ # You can also specify schema as primitive Ruby objects.
+ # See {Arrow::Schema#initialize} for details.
+ #
+ # @param columns [::Array<Arrow::Column>] The data of the table.
+ #
+ # @example Create a table from schema and columns
+ # count_field = Arrow::Field.new("count", :uint32)
+ # count_array = Arrow::UInt32Array.new([0, 2, nil, 4])
+ # count_column = Arrow::Column.new(count_field, count_array)
+ # visible_field = Arrow::Field.new("visible", :boolean)
+ # visible_array = Arrow::BooleanArray.new([true, nil, nil, false])
+ # visible_column = Arrow::Column.new(visible_field, visible_array)
+ # Arrow::Table.new(Arrow::Schema.new([count_field, visible_field]),
+ # [count_column, visible_column])
+ #
+ # @overload initialize(schema, arrays)
+ #
+ # @param schema [Arrow::Schema] The schema of the table.
+ # You can also specify schema as primitive Ruby objects.
+ # See {Arrow::Schema#initialize} for details.
+ #
+ # @param arrays [::Array<Arrow::Array>] The data of the table.
+ #
+ # @example Create a table from schema and arrays
+ # count_field = Arrow::Field.new("count", :uint32)
+ # count_array = Arrow::UInt32Array.new([0, 2, nil, 4])
+ # visible_field = Arrow::Field.new("visible", :boolean)
+ # visible_array = Arrow::BooleanArray.new([true, nil, nil, false])
+ # Arrow::Table.new(Arrow::Schema.new([count_field, visible_field]),
+ # [count_array, visible_array])
+ #
+ # @overload initialize(schema, record_batches)
+ #
+ # @param schema [Arrow::Schema] The schema of the table.
+ # You can also specify schema as primitive Ruby objects.
+ # See {Arrow::Schema#initialize} for details.
+ #
+ # @param arrays [::Array<Arrow::RecordBatch>] The data of the table.
+ #
+ # @example Create a table from schema and record batches
+ # count_field = Arrow::Field.new("count", :uint32)
+ # visible_field = Arrow::Field.new("visible", :boolean)
+ # schema = Arrow::Schema.new([count_field, visible_field])
+ # record_batches = [
+ # Arrow::RecordBatch.new(schema, [[0, true], [2, nil], [nil, nil]]),
+ # Arrow::RecordBatch.new(schema, [[4, false]]),
+ # ]
+ # Arrow::Table.new(schema, record_batches)
+ #
+ # @overload initialize(schema, raw_records)
+ #
+ # @param schema [Arrow::Schema] The schema of the table.
+ # You can also specify schema as primitive Ruby objects.
+ # See {Arrow::Schema#initialize} for details.
+ #
+ # @param arrays [::Array<::Array>] The data of the table as primitive
+ # Ruby objects.
+ #
+ # @example Create a table from schema and raw records
+ # schema = {
+ # count: :uint32,
+ # visible: :boolean,
+ # }
+ # raw_records = [
+ # [0, true],
+ # [2, nil],
+ # [nil, nil],
+ # [4, false],
+ # ]
+ # Arrow::Table.new(schema, raw_records)
+ def initialize(*args)
+ n_args = args.size
+ case n_args
+ when 1
+ if args[0][0].is_a?(Column)
+ values = args[0]
+ fields = values.collect(&:field)
schema = Schema.new(fields)
else
- raw_table = schema_or_raw_table_or_columns
+ raw_table = args[0]
fields = []
- columns = []
+ values = []
raw_table.each do |name, array|
field = Field.new(name.to_s, array.value_data_type)
fields << field
- columns << Column.new(field, array)
+ values << Column.new(field, array)
end
schema = Schema.new(fields)
end
+ when 2
+ schema = args[0]
+ schema = Schema.new(schema) unless schema.is_a?(Schema)
+ values = args[1]
+ if values[0].is_a?(::Array)
+ values = [RecordBatch.new(schema, values)]
+ end
else
- schema = schema_or_raw_table_or_columns
+ message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
+ raise ArgumentError, message
end
- initialize_raw(schema, columns)
+ initialize_raw(schema, values)
end
def columns
diff --git a/ruby/red-arrow/test/raw-records/record-batch/test-basic-arrays.rb b/ruby/red-arrow/test/raw-records/record-batch/test-basic-arrays.rb
deleted file mode 100644
index eee2699..0000000
--- a/ruby/red-arrow/test/raw-records/record-batch/test-basic-arrays.rb
+++ /dev/null
@@ -1,349 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase
- test("NullArray") do
- records = [
- [nil],
- [nil],
- [nil],
- [nil],
- ]
- array = Arrow::NullArray.new(records.size)
- schema = Arrow::Schema.new(column: :null)
- record_batch = Arrow::RecordBatch.new(schema,
- records.size,
- [array])
- assert_equal(records, record_batch.raw_records)
- end
-
- test("BooleanArray") do
- records = [
- [true],
- [nil],
- [false],
- ]
- record_batch = Arrow::RecordBatch.new({column: :boolean},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int8Array") do
- records = [
- [-(2 ** 7)],
- [nil],
- [(2 ** 7) - 1],
- ]
- record_batch = Arrow::RecordBatch.new({column: :int8},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt8Array") do
- records = [
- [0],
- [nil],
- [(2 ** 8) - 1],
- ]
- record_batch = Arrow::RecordBatch.new({column: :uint8},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int16Array") do
- records = [
- [-(2 ** 15)],
- [nil],
- [(2 ** 15) - 1],
- ]
- record_batch = Arrow::RecordBatch.new({column: :int16},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt16Array") do
- records = [
- [0],
- [nil],
- [(2 ** 16) - 1],
- ]
- record_batch = Arrow::RecordBatch.new({column: :uint16},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int32Array") do
- records = [
- [-(2 ** 31)],
- [nil],
- [(2 ** 31) - 1],
- ]
- record_batch = Arrow::RecordBatch.new({column: :int32},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt32Array") do
- records = [
- [0],
- [nil],
- [(2 ** 32) - 1],
- ]
- record_batch = Arrow::RecordBatch.new({column: :uint32},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int64Array") do
- records = [
- [-(2 ** 63)],
- [nil],
- [(2 ** 63) - 1],
- ]
- record_batch = Arrow::RecordBatch.new({column: :int64},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt64Array") do
- records = [
- [0],
- [nil],
- [(2 ** 64) - 1],
- ]
- record_batch = Arrow::RecordBatch.new({column: :uint64},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("FloatArray") do
- records = [
- [-1.0],
- [nil],
- [1.0],
- ]
- record_batch = Arrow::RecordBatch.new({column: :float},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("DoubleArray") do
- records = [
- [-1.0],
- [nil],
- [1.0],
- ]
- record_batch = Arrow::RecordBatch.new({column: :double},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("BinaryArray") do
- records = [
- ["\x00".b],
- [nil],
- ["\xff".b],
- ]
- record_batch = Arrow::RecordBatch.new({column: :binary},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("StringArray") do
- records = [
- ["Ruby"],
- [nil],
- ["\u3042"], # U+3042 HIRAGANA LETTER A
- ]
- record_batch = Arrow::RecordBatch.new({column: :string},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Date32Array") do
- records = [
- [Date.new(1960, 1, 1)],
- [nil],
- [Date.new(2017, 8, 23)],
- ]
- record_batch = Arrow::RecordBatch.new({column: :date32},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Date64Array") do
- records = [
- [DateTime.new(1960, 1, 1, 2, 9, 30)],
- [nil],
- [DateTime.new(2017, 8, 23, 14, 57, 2)],
- ]
- record_batch = Arrow::RecordBatch.new({column: :date64},
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- sub_test_case("TimestampArray") do
- test("second") do
- records = [
- [Time.parse("1960-01-01T02:09:30Z")],
- [nil],
- [Time.parse("2017-08-23T14:57:02Z")],
- ]
- record_batch = Arrow::RecordBatch.new({
- column: {
- type: :timestamp,
- unit: :second,
- }
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("milli") do
- records = [
- [Time.parse("1960-01-01T02:09:30.123Z")],
- [nil],
- [Time.parse("2017-08-23T14:57:02.987Z")],
- ]
- record_batch = Arrow::RecordBatch.new({
- column: {
- type: :timestamp,
- unit: :milli,
- }
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("micro") do
- records = [
- [Time.parse("1960-01-01T02:09:30.123456Z")],
- [nil],
- [Time.parse("2017-08-23T14:57:02.987654Z")],
- ]
- record_batch = Arrow::RecordBatch.new({
- column: {
- type: :timestamp,
- unit: :micro,
- }
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("nano") do
- records = [
- [Time.parse("1960-01-01T02:09:30.123456789Z")],
- [nil],
- [Time.parse("2017-08-23T14:57:02.987654321Z")],
- ]
- record_batch = Arrow::RecordBatch.new({
- column: {
- type: :timestamp,
- unit: :nano,
- }
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- sub_test_case("Time32Array") do
- test("second") do
- records = [
- [60 * 10], # 00:10:00
- [nil],
- [60 * 60 * 2 + 9], # 02:00:09
- ]
- record_batch = Arrow::RecordBatch.new({
- column: {
- type: :time32,
- unit: :second,
- }
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("milli") do
- records = [
- [(60 * 10) * 1000 + 123], # 00:10:00.123
- [nil],
- [(60 * 60 * 2 + 9) * 1000 + 987], # 02:00:09.987
- ]
- record_batch = Arrow::RecordBatch.new({
- column: {
- type: :time32,
- unit: :milli,
- }
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- sub_test_case("Time64Array") do
- test("micro") do
- records = [
- [(60 * 10) * 1_000_000 + 123_456], # 00:10:00.123456
- [nil],
- [(60 * 60 * 2 + 9) * 1_000_000 + 987_654], # 02:00:09.987654
- ]
- record_batch = Arrow::RecordBatch.new({
- column: {
- type: :time64,
- unit: :micro,
- }
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("nano") do
- records = [
- [(60 * 10) * 1_000_000_000 + 123_456_789], # 00:10:00.123456789
- [nil],
- [(60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321], # 02:00:09.987654321
- ]
- record_batch = Arrow::RecordBatch.new({
- column: {
- type: :time64,
- unit: :nano,
- }
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- test("Decimal128Array") do
- records = [
- [BigDecimal("92.92")],
- [nil],
- [BigDecimal("29.29")],
- ]
- record_batch = Arrow::RecordBatch.new({
- column: {
- type: :decimal128,
- precision: 8,
- scale: 2,
- }
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-end
diff --git a/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb b/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb
deleted file mode 100644
index 91477fb..0000000
--- a/ruby/red-arrow/test/raw-records/record-batch/test-dense-union-array.rb
+++ /dev/null
@@ -1,482 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class RawRecordsRecordBatchDenseUnionArrayTest < Test::Unit::TestCase
- def fields(type, type_codes)
- field_description = {}
- if type.is_a?(Hash)
- field_description = field_description.merge(type)
- else
- field_description[:type] = type
- end
- {
- column: {
- type: :dense_union,
- fields: [
- field_description.merge(name: "0"),
- field_description.merge(name: "1"),
- ],
- type_codes: type_codes,
- },
- }
- end
-
- # TODO: Use Arrow::RecordBatch.new(fields(type), records)
- def build_record_batch(type, records)
- type_codes = [0, 1]
- schema = Arrow::Schema.new(fields(type, type_codes))
- type_ids = []
- offsets = []
- arrays = schema.fields[0].data_type.fields.collect do |field|
- sub_schema = Arrow::Schema.new([field])
- sub_records = []
- records.each do |record|
- column = record[0]
- next if column.nil?
- next unless column.key?(field.name)
- sub_records << [column[field.name]]
- end
- sub_record_batch = Arrow::RecordBatch.new(sub_schema,
- sub_records)
- sub_record_batch.columns[0]
- end
- records.each do |record|
- column = record[0]
- if column.nil?
- type_ids << nil
- offsets << 0
- elsif column.key?("0")
- type_id = type_codes[0]
- type_ids << type_id
- offsets << (type_ids.count(type_id) - 1)
- elsif column.key?("1")
- type_id = type_codes[1]
- type_ids << type_id
- offsets << (type_ids.count(type_id) - 1)
- end
- end
- union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
- Arrow::Int8Array.new(type_ids),
- Arrow::Int32Array.new(offsets),
- arrays)
- schema = Arrow::Schema.new(column: union_array.value_data_type)
- Arrow::RecordBatch.new(schema,
- records.size,
- [union_array])
- end
-
- test("NullArray") do
- records = [
- [{"0" => nil}],
- [nil],
- ]
- record_batch = build_record_batch(:null, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("BooleanArray") do
- records = [
- [{"0" => true}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:boolean, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int8Array") do
- records = [
- [{"0" => -(2 ** 7)}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:int8, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt8Array") do
- records = [
- [{"0" => (2 ** 8) - 1}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:uint8, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int16Array") do
- records = [
- [{"0" => -(2 ** 15)}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:int16, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt16Array") do
- records = [
- [{"0" => (2 ** 16) - 1}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:uint16, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int32Array") do
- records = [
- [{"0" => -(2 ** 31)}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:int32, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt32Array") do
- records = [
- [{"0" => (2 ** 32) - 1}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:uint32, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int64Array") do
- records = [
- [{"0" => -(2 ** 63)}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:int64, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt64Array") do
- records = [
- [{"0" => (2 ** 64) - 1}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:uint64, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("FloatArray") do
- records = [
- [{"0" => -1.0}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:float, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("DoubleArray") do
- records = [
- [{"0" => -1.0}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:double, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("BinaryArray") do
- records = [
- [{"0" => "\xff".b}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:binary, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("StringArray") do
- records = [
- [{"0" => "Ruby"}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:string, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Date32Array") do
- records = [
- [{"0" => Date.new(1960, 1, 1)}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:date32, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Date64Array") do
- records = [
- [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:date64, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- sub_test_case("TimestampArray") do
- test("second") do
- records = [
- [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :timestamp,
- unit: :second,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("milli") do
- records = [
- [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :timestamp,
- unit: :milli,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("micro") do
- records = [
- [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :timestamp,
- unit: :micro,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("nano") do
- records = [
- [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :timestamp,
- unit: :nano,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- sub_test_case("Time32Array") do
- test("second") do
- records = [
- [{"0" => 60 * 10}], # 00:10:00
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :time32,
- unit: :second,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("milli") do
- records = [
- [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :time32,
- unit: :milli,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- sub_test_case("Time64Array") do
- test("micro") do
- records = [
- [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :time64,
- unit: :micro,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("nano") do
- records = [
- # 00:10:00.123456789
- [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :time64,
- unit: :nano,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- test("Decimal128Array") do
- records = [
- [{"0" => BigDecimal("92.92")}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :decimal128,
- precision: 8,
- scale: 2,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("ListArray") do
- records = [
- [{"0" => [true, nil, false]}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :list,
- field: {
- name: :sub_element,
- type: :boolean,
- },
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("StructArray") do
- records = [
- [{"0" => {"sub_field" => true}}],
- [nil],
- [{"1" => nil}],
- [{"0" => {"sub_field" => nil}}],
- ]
- record_batch = build_record_batch({
- type: :struct,
- fields: [
- {
- name: :sub_field,
- type: :boolean,
- },
- ],
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("SparseUnionArray") do
- omit("Need to add support for SparseUnionArrayBuilder")
- records = [
- [{"0" => {"field1" => true}}],
- [nil],
- [{"1" => nil}],
- [{"0" => {"field2" => nil}}],
- ]
- record_batch = build_record_batch({
- type: :sparse_union,
- fields: [
- {
- name: :field1,
- type: :boolean,
- },
- {
- name: :field2,
- type: :uint8,
- },
- ],
- type_codes: [0, 1],
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("DenseUnionArray") do
- omit("Need to add support for DenseUnionArrayBuilder")
- records = [
- [{"0" => {"field1" => true}}],
- [nil],
- [{"1" => nil}],
- [{"0" => {"field2" => nil}}],
- ]
- record_batch = build_record_batch({
- type: :dense_union,
- fields: [
- {
- name: :field1,
- type: :boolean,
- },
- {
- name: :field2,
- type: :uint8,
- },
- ],
- type_codes: [0, 1],
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("DictionaryArray") do
- omit("Need to add support for DictionaryArrayBuilder")
- records = [
- [{"0" => "Ruby"}],
- [nil],
- [{"1" => nil}],
- [{"0" => "GLib"}],
- ]
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
- record_batch = build_record_batch({
- type: :dictionary,
- index_data_type: :int8,
- dictionary: dictionary,
- ordered: true,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-end
diff --git a/ruby/red-arrow/test/raw-records/record-batch/test-list-array.rb b/ruby/red-arrow/test/raw-records/record-batch/test-list-array.rb
deleted file mode 100644
index 3c4963d..0000000
--- a/ruby/red-arrow/test/raw-records/record-batch/test-list-array.rb
+++ /dev/null
@@ -1,498 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class RawRecordsRecordBatchListArrayTest < Test::Unit::TestCase
- def fields(type)
- field_description = {
- name: :element,
- }
- if type.is_a?(Hash)
- field_description = field_description.merge(type)
- else
- field_description[:type] = type
- end
- {
- column: {
- type: :list,
- field: field_description,
- },
- }
- end
-
- test("NullArray") do
- records = [
- [[nil, nil, nil]],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:null),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("BooleanArray") do
- records = [
- [[true, nil, false]],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:boolean),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int8Array") do
- records = [
- [[-(2 ** 7), nil, (2 ** 7) - 1]],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:int8),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt8Array") do
- records = [
- [[0, nil, (2 ** 8) - 1]],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:uint8),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int16Array") do
- records = [
- [[-(2 ** 15), nil, (2 ** 15) - 1]],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:int16),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt16Array") do
- records = [
- [[0, nil, (2 ** 16) - 1]],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:uint16),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int32Array") do
- records = [
- [[-(2 ** 31), nil, (2 ** 31) - 1]],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:int32),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt32Array") do
- records = [
- [[0, nil, (2 ** 32) - 1]],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:uint32),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int64Array") do
- records = [
- [[-(2 ** 63), nil, (2 ** 63) - 1]],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:int64),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt64Array") do
- records = [
- [[0, nil, (2 ** 64) - 1]],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:uint64),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("FloatArray") do
- records = [
- [[-1.0, nil, 1.0]],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:float),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("DoubleArray") do
- records = [
- [[-1.0, nil, 1.0]],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:double),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("BinaryArray") do
- records = [
- [["\x00".b, nil, "\xff".b]],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:binary),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("StringArray") do
- records = [
- [
- [
- "Ruby",
- nil,
- "\u3042", # U+3042 HIRAGANA LETTER A
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:string),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Date32Array") do
- records = [
- [
- [
- Date.new(1960, 1, 1),
- nil,
- Date.new(2017, 8, 23),
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:date32),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Date64Array") do
- records = [
- [
- [
- DateTime.new(1960, 1, 1, 2, 9, 30),
- nil,
- DateTime.new(2017, 8, 23, 14, 57, 2),
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:date64),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- sub_test_case("TimestampArray") do
- test("second") do
- records = [
- [
- [
- Time.parse("1960-01-01T02:09:30Z"),
- nil,
- Time.parse("2017-08-23T14:57:02Z"),
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
- unit: :second),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("milli") do
- records = [
- [
- [
- Time.parse("1960-01-01T02:09:30.123Z"),
- nil,
- Time.parse("2017-08-23T14:57:02.987Z"),
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
- unit: :milli),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("micro") do
- records = [
- [
- [
- Time.parse("1960-01-01T02:09:30.123456Z"),
- nil,
- Time.parse("2017-08-23T14:57:02.987654Z"),
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
- unit: :micro),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("nano") do
- records = [
- [
- [
- Time.parse("1960-01-01T02:09:30.123456789Z"),
- nil,
- Time.parse("2017-08-23T14:57:02.987654321Z"),
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
- unit: :nano),
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- sub_test_case("Time32Array") do
- test("second") do
- records = [
- [
- [
- 60 * 10, # 00:10:00
- nil,
- 60 * 60 * 2 + 9, # 02:00:09
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :time32,
- unit: :second),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("milli") do
- records = [
- [
- [
- (60 * 10) * 1000 + 123, # 00:10:00.123
- nil,
- (60 * 60 * 2 + 9) * 1000 + 987, # 02:00:09.987
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :time32,
- unit: :milli),
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- sub_test_case("Time64Array") do
- test("micro") do
- records = [
- [
- [
- (60 * 10) * 1_000_000 + 123_456, # 00:10:00.123456
- nil,
- (60 * 60 * 2 + 9) * 1_000_000 + 987_654, # 02:00:09.987654
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :time64,
- unit: :micro),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("nano") do
- records = [
- [
- [
- (60 * 10) * 1_000_000_000 + 123_456_789, # 00:10:00.123456789
- nil,
- (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321, # 02:00:09.987654321
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :time64,
- unit: :nano),
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- test("Decimal128Array") do
- records = [
- [
- [
- BigDecimal("92.92"),
- nil,
- BigDecimal("29.29"),
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :decimal128,
- precision: 8,
- scale: 2),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("ListArray") do
- records = [
- [
- [
- [
- true,
- nil,
- ],
- nil,
- [
- nil,
- false,
- ],
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :list,
- field: {
- name: :sub_element,
- type: :boolean,
- }),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("StructArray") do
- records = [
- [
- [
- {"field" => true},
- nil,
- {"field" => nil},
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :struct,
- fields: [
- {
- name: :field,
- type: :boolean,
- },
- ]),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("SparseUnionArray") do
- omit("Need to add support for SparseUnionArrayBuilder")
- records = [
- [
- [
- {"field1" => true},
- nil,
- {"field2" => nil},
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :sparse_union,
- fields: [
- {
- name: :field1,
- type: :boolean,
- },
- {
- name: :field2,
- type: :uint8,
- },
- ],
- type_codes: [0, 1]),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("DenseUnionArray") do
- omit("Need to add support for DenseUnionArrayBuilder")
- records = [
- [
- [
- {"field1" => true},
- nil,
- {"field2" => nil},
- ],
- ],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :dense_union,
- fields: [
- {
- name: :field1,
- type: :boolean,
- },
- {
- name: :field2,
- type: :uint8,
- },
- ],
- type_codes: [0, 1]),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("DictionaryArray") do
- omit("Need to add support for DictionaryArrayBuilder")
- records = [
- [
- [
- "Ruby",
- nil,
- "GLib",
- ],
- ],
- [nil],
- ]
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
- record_batch = Arrow::RecordBatch.new(fields(type: :dictionary,
- index_data_type: :int8,
- dictionary: dictionary,
- ordered: true),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-end
diff --git a/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb b/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb
deleted file mode 100644
index c1947b8..0000000
--- a/ruby/red-arrow/test/raw-records/record-batch/test-sparse-union-array.rb
+++ /dev/null
@@ -1,471 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class RawRecordsRecordBatchSparseUnionArrayTest < Test::Unit::TestCase
- def fields(type, type_codes)
- field_description = {}
- if type.is_a?(Hash)
- field_description = field_description.merge(type)
- else
- field_description[:type] = type
- end
- {
- column: {
- type: :sparse_union,
- fields: [
- field_description.merge(name: "0"),
- field_description.merge(name: "1"),
- ],
- type_codes: type_codes,
- },
- }
- end
-
- # TODO: Use Arrow::RecordBatch.new(fields(type), records)
- def build_record_batch(type, records)
- type_codes = [0, 1]
- schema = Arrow::Schema.new(fields(type, type_codes))
- type_ids = []
- arrays = schema.fields[0].data_type.fields.collect do |field|
- sub_schema = Arrow::Schema.new([field])
- sub_records = records.collect do |record|
- [record[0].nil? ? nil : record[0][field.name]]
- end
- sub_record_batch = Arrow::RecordBatch.new(sub_schema,
- sub_records)
- sub_record_batch.columns[0]
- end
- records.each do |record|
- column = record[0]
- if column.nil?
- type_ids << nil
- elsif column.key?("0")
- type_ids << type_codes[0]
- elsif column.key?("1")
- type_ids << type_codes[1]
- end
- end
- union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
- Arrow::Int8Array.new(type_ids),
- arrays)
- schema = Arrow::Schema.new(column: union_array.value_data_type)
- Arrow::RecordBatch.new(schema,
- records.size,
- [union_array])
- end
-
- test("NullArray") do
- records = [
- [{"0" => nil}],
- [nil],
- ]
- record_batch = build_record_batch(:null, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("BooleanArray") do
- records = [
- [{"0" => true}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:boolean, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int8Array") do
- records = [
- [{"0" => -(2 ** 7)}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:int8, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt8Array") do
- records = [
- [{"0" => (2 ** 8) - 1}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:uint8, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int16Array") do
- records = [
- [{"0" => -(2 ** 15)}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:int16, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt16Array") do
- records = [
- [{"0" => (2 ** 16) - 1}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:uint16, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int32Array") do
- records = [
- [{"0" => -(2 ** 31)}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:int32, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt32Array") do
- records = [
- [{"0" => (2 ** 32) - 1}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:uint32, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int64Array") do
- records = [
- [{"0" => -(2 ** 63)}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:int64, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt64Array") do
- records = [
- [{"0" => (2 ** 64) - 1}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:uint64, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("FloatArray") do
- records = [
- [{"0" => -1.0}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:float, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("DoubleArray") do
- records = [
- [{"0" => -1.0}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:double, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("BinaryArray") do
- records = [
- [{"0" => "\xff".b}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:binary, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("StringArray") do
- records = [
- [{"0" => "Ruby"}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:string, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Date32Array") do
- records = [
- [{"0" => Date.new(1960, 1, 1)}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:date32, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Date64Array") do
- records = [
- [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch(:date64, records)
- assert_equal(records, record_batch.raw_records)
- end
-
- sub_test_case("TimestampArray") do
- test("second") do
- records = [
- [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :timestamp,
- unit: :second,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("milli") do
- records = [
- [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :timestamp,
- unit: :milli,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("micro") do
- records = [
- [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :timestamp,
- unit: :micro,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("nano") do
- records = [
- [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :timestamp,
- unit: :nano,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- sub_test_case("Time32Array") do
- test("second") do
- records = [
- [{"0" => 60 * 10}], # 00:10:00
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :time32,
- unit: :second,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("milli") do
- records = [
- [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :time32,
- unit: :milli,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- sub_test_case("Time64Array") do
- test("micro") do
- records = [
- [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :time64,
- unit: :micro,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("nano") do
- records = [
- # 00:10:00.123456789
- [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :time64,
- unit: :nano,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- test("Decimal128Array") do
- records = [
- [{"0" => BigDecimal("92.92")}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :decimal128,
- precision: 8,
- scale: 2,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("ListArray") do
- records = [
- [{"0" => [true, nil, false]}],
- [nil],
- [{"1" => nil}],
- ]
- record_batch = build_record_batch({
- type: :list,
- field: {
- name: :sub_element,
- type: :boolean,
- },
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("StructArray") do
- records = [
- [{"0" => {"sub_field" => true}}],
- [nil],
- [{"1" => nil}],
- [{"0" => {"sub_field" => nil}}],
- ]
- record_batch = build_record_batch({
- type: :struct,
- fields: [
- {
- name: :sub_field,
- type: :boolean,
- },
- ],
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("SparseUnionArray") do
- omit("Need to add support for SparseUnionArrayBuilder")
- records = [
- [{"0" => {"field1" => true}}],
- [nil],
- [{"1" => nil}],
- [{"0" => {"field2" => nil}}],
- ]
- record_batch = build_record_batch({
- type: :sparse_union,
- fields: [
- {
- name: :field1,
- type: :boolean,
- },
- {
- name: :field2,
- type: :uint8,
- },
- ],
- type_codes: [0, 1],
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("DenseUnionArray") do
- omit("Need to add support for DenseUnionArrayBuilder")
- records = [
- [{"0" => {"field1" => true}}],
- [nil],
- [{"1" => nil}],
- [{"0" => {"field2" => nil}}],
- ]
- record_batch = build_record_batch({
- type: :dense_union,
- fields: [
- {
- name: :field1,
- type: :boolean,
- },
- {
- name: :field2,
- type: :uint8,
- },
- ],
- type_codes: [0, 1],
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("DictionaryArray") do
- omit("Need to add support for DictionaryArrayBuilder")
- records = [
- [{"0" => "Ruby"}],
- [nil],
- [{"1" => nil}],
- [{"0" => "GLib"}],
- ]
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
- record_batch = build_record_batch({
- type: :dictionary,
- index_data_type: :int8,
- dictionary: dictionary,
- ordered: true,
- },
- records)
- assert_equal(records, record_batch.raw_records)
- end
-end
diff --git a/ruby/red-arrow/test/raw-records/record-batch/test-struct-array.rb b/ruby/red-arrow/test/raw-records/record-batch/test-struct-array.rb
deleted file mode 100644
index eba6d29..0000000
--- a/ruby/red-arrow/test/raw-records/record-batch/test-struct-array.rb
+++ /dev/null
@@ -1,426 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class RawRecordsRecordBatchStructArrayTest < Test::Unit::TestCase
- def fields(type)
- field_description = {
- name: :field,
- }
- if type.is_a?(Hash)
- field_description = field_description.merge(type)
- else
- field_description[:type] = type
- end
- {
- column: {
- type: :struct,
- fields: [
- field_description,
- ],
- },
- }
- end
-
- test("NullArray") do
- records = [
- [{"field" => nil}],
- [nil],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:null),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("BooleanArray") do
- records = [
- [{"field" => true}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:boolean),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int8Array") do
- records = [
- [{"field" => -(2 ** 7)}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:int8),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt8Array") do
- records = [
- [{"field" => (2 ** 8) - 1}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:uint8),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int16Array") do
- records = [
- [{"field" => -(2 ** 15)}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:int16),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt16Array") do
- records = [
- [{"field" => (2 ** 16) - 1}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:uint16),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int32Array") do
- records = [
- [{"field" => -(2 ** 31)}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:int32),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt32Array") do
- records = [
- [{"field" => (2 ** 32) - 1}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:uint32),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Int64Array") do
- records = [
- [{"field" => -(2 ** 63)}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:int64),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("UInt64Array") do
- records = [
- [{"field" => (2 ** 64) - 1}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:uint64),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("FloatArray") do
- records = [
- [{"field" => -1.0}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:float),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("DoubleArray") do
- records = [
- [{"field" => -1.0}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:double),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("BinaryArray") do
- records = [
- [{"field" => "\xff".b}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:binary),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("StringArray") do
- records = [
- [{"field" => "Ruby"}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:string),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Date32Array") do
- records = [
- [{"field" => Date.new(1960, 1, 1)}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:date32),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("Date64Array") do
- records = [
- [{"field" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(:date64),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- sub_test_case("TimestampArray") do
- test("second") do
- records = [
- [{"field" => Time.parse("1960-01-01T02:09:30Z")}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
- unit: :second),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("milli") do
- records = [
- [{"field" => Time.parse("1960-01-01T02:09:30.123Z")}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
- unit: :milli),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("micro") do
- records = [
- [{"field" => Time.parse("1960-01-01T02:09:30.123456Z")}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
- unit: :micro),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("nano") do
- records = [
- [{"field" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
- unit: :nano),
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- sub_test_case("Time32Array") do
- test("second") do
- records = [
- [{"field" => 60 * 10}], # 00:10:00
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :time32,
- unit: :second),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("milli") do
- records = [
- [{"field" => (60 * 10) * 1000 + 123}], # 00:10:00.123
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :time32,
- unit: :milli),
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- sub_test_case("Time64Array") do
- test("micro") do
- records = [
- [{"field" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :time64,
- unit: :micro),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("nano") do
- records = [
- # 00:10:00.123456789
- [{"field" => (60 * 10) * 1_000_000_000 + 123_456_789}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :time64,
- unit: :nano),
- records)
- assert_equal(records, record_batch.raw_records)
- end
- end
-
- test("Decimal128Array") do
- records = [
- [{"field" => BigDecimal("92.92")}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :decimal128,
- precision: 8,
- scale: 2),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("ListArray") do
- records = [
- [{"field" => [true, nil, false]}],
- [nil],
- [{"field" => nil}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :list,
- field: {
- name: :sub_element,
- type: :boolean,
- }),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("StructArray") do
- records = [
- [{"field" => {"sub_field" => true}}],
- [nil],
- [{"field" => nil}],
- [{"field" => {"sub_field" => nil}}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :struct,
- fields: [
- {
- name: :sub_field,
- type: :boolean,
- },
- ]),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("SparseUnionArray") do
- omit("Need to add support for SparseUnionArrayBuilder")
- records = [
- [{"field" => {"field1" => true}}],
- [nil],
- [{"field" => nil}],
- [{"field" => {"field2" => nil}}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :sparse_union,
- fields: [
- {
- name: :field1,
- type: :boolean,
- },
- {
- name: :field2,
- type: :uint8,
- },
- ],
- type_codes: [0, 1]),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("DenseUnionArray") do
- omit("Need to add support for DenseUnionArrayBuilder")
- records = [
- [{"field" => {"field1" => true}}],
- [nil],
- [{"field" => nil}],
- [{"field" => {"field2" => nil}}],
- ]
- record_batch = Arrow::RecordBatch.new(fields(type: :dense_union,
- fields: [
- {
- name: :field1,
- type: :boolean,
- },
- {
- name: :field2,
- type: :uint8,
- },
- ],
- type_codes: [0, 1]),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-
- test("DictionaryArray") do
- omit("Need to add support for DictionaryArrayBuilder")
- records = [
- [{"field" => "Ruby"}],
- [nil],
- [{"field" => nil}],
- [{"field" => "GLib"}],
- ]
- dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
- record_batch = Arrow::RecordBatch.new(fields(type: :dictionary,
- index_data_type: :int8,
- dictionary: dictionary,
- ordered: true),
- records)
- assert_equal(records, record_batch.raw_records)
- end
-end
diff --git a/ruby/red-arrow/test/raw-records/test-basic-arrays.rb b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
new file mode 100644
index 0000000..b70c17d
--- /dev/null
+++ b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb
@@ -0,0 +1,340 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module RawRecordsBasicArraysTests
+ def test_null
+ records = [
+ [nil],
+ [nil],
+ [nil],
+ [nil],
+ ]
+ target = build({column: :null}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_boolean
+ records = [
+ [true],
+ [nil],
+ [false],
+ ]
+ target = build({column: :boolean}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int8
+ records = [
+ [-(2 ** 7)],
+ [nil],
+ [(2 ** 7) - 1],
+ ]
+ target = build({column: :int8}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint8
+ records = [
+ [0],
+ [nil],
+ [(2 ** 8) - 1],
+ ]
+ target = build({column: :uint8}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int16
+ records = [
+ [-(2 ** 15)],
+ [nil],
+ [(2 ** 15) - 1],
+ ]
+ target = build({column: :int16}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint16
+ records = [
+ [0],
+ [nil],
+ [(2 ** 16) - 1],
+ ]
+ target = build({column: :uint16}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int32
+ records = [
+ [-(2 ** 31)],
+ [nil],
+ [(2 ** 31) - 1],
+ ]
+ target = build({column: :int32}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint32
+ records = [
+ [0],
+ [nil],
+ [(2 ** 32) - 1],
+ ]
+ target = build({column: :uint32}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int64
+ records = [
+ [-(2 ** 63)],
+ [nil],
+ [(2 ** 63) - 1],
+ ]
+ target = build({column: :int64}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint64
+ records = [
+ [0],
+ [nil],
+ [(2 ** 64) - 1],
+ ]
+ target = build({column: :uint64}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_float
+ records = [
+ [-1.0],
+ [nil],
+ [1.0],
+ ]
+ target = build({column: :float}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_double
+ records = [
+ [-1.0],
+ [nil],
+ [1.0],
+ ]
+ target = build({column: :double}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_binary
+ records = [
+ ["\x00".b],
+ [nil],
+ ["\xff".b],
+ ]
+ target = build({column: :binary}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_tring
+ records = [
+ ["Ruby"],
+ [nil],
+ ["\u3042"], # U+3042 HIRAGANA LETTER A
+ ]
+ target = build({column: :string}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_date32
+ records = [
+ [Date.new(1960, 1, 1)],
+ [nil],
+ [Date.new(2017, 8, 23)],
+ ]
+ target = build({column: :date32}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_date64
+ records = [
+ [DateTime.new(1960, 1, 1, 2, 9, 30)],
+ [nil],
+ [DateTime.new(2017, 8, 23, 14, 57, 2)],
+ ]
+ target = build({column: :date64}, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_second
+ records = [
+ [Time.parse("1960-01-01T02:09:30Z")],
+ [nil],
+ [Time.parse("2017-08-23T14:57:02Z")],
+ ]
+ target = build({
+ column: {
+ type: :timestamp,
+ unit: :second,
+ }
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_milli
+ records = [
+ [Time.parse("1960-01-01T02:09:30.123Z")],
+ [nil],
+ [Time.parse("2017-08-23T14:57:02.987Z")],
+ ]
+ target = build({
+ column: {
+ type: :timestamp,
+ unit: :milli,
+ }
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_micro
+ records = [
+ [Time.parse("1960-01-01T02:09:30.123456Z")],
+ [nil],
+ [Time.parse("2017-08-23T14:57:02.987654Z")],
+ ]
+ target = build({
+ column: {
+ type: :timestamp,
+ unit: :micro,
+ }
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_nano
+ records = [
+ [Time.parse("1960-01-01T02:09:30.123456789Z")],
+ [nil],
+ [Time.parse("2017-08-23T14:57:02.987654321Z")],
+ ]
+ target = build({
+ column: {
+ type: :timestamp,
+ unit: :nano,
+ }
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time32_second
+ records = [
+ [60 * 10], # 00:10:00
+ [nil],
+ [60 * 60 * 2 + 9], # 02:00:09
+ ]
+ target = build({
+ column: {
+ type: :time32,
+ unit: :second,
+ }
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time32_milli
+ records = [
+ [(60 * 10) * 1000 + 123], # 00:10:00.123
+ [nil],
+ [(60 * 60 * 2 + 9) * 1000 + 987], # 02:00:09.987
+ ]
+ target = build({
+ column: {
+ type: :time32,
+ unit: :milli,
+ }
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time64_micro
+ records = [
+ [(60 * 10) * 1_000_000 + 123_456], # 00:10:00.123456
+ [nil],
+ [(60 * 60 * 2 + 9) * 1_000_000 + 987_654], # 02:00:09.987654
+ ]
+ target = build({
+ column: {
+ type: :time64,
+ unit: :micro,
+ }
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time64_nano
+ records = [
+ [(60 * 10) * 1_000_000_000 + 123_456_789], # 00:10:00.123456789
+ [nil],
+ [(60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321], # 02:00:09.987654321
+ ]
+ target = build({
+ column: {
+ type: :time64,
+ unit: :nano,
+ }
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_decimal128
+ records = [
+ [BigDecimal("92.92")],
+ [nil],
+ [BigDecimal("29.29")],
+ ]
+ target = build({
+ column: {
+ type: :decimal128,
+ precision: 8,
+ scale: 2,
+ }
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+end
+
+class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase
+ include RawRecordsBasicArraysTests
+
+ def build(schema, records)
+ Arrow::RecordBatch.new(schema, records)
+ end
+end
+
+class RawRecordsTableBasicArraysTest < Test::Unit::TestCase
+ include RawRecordsBasicArraysTests
+
+ def build(schema, records)
+ Arrow::Table.new(schema, records)
+ end
+end
diff --git a/ruby/red-arrow/test/raw-records/test-dense-union-array.rb b/ruby/red-arrow/test/raw-records/test-dense-union-array.rb
new file mode 100644
index 0000000..c79c093
--- /dev/null
+++ b/ruby/red-arrow/test/raw-records/test-dense-union-array.rb
@@ -0,0 +1,492 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module RawRecordsDenseUnionArrayTests
+ def build_schema(type, type_codes)
+ field_description = {}
+ if type.is_a?(Hash)
+ field_description = field_description.merge(type)
+ else
+ field_description[:type] = type
+ end
+ {
+ column: {
+ type: :dense_union,
+ fields: [
+ field_description.merge(name: "0"),
+ field_description.merge(name: "1"),
+ ],
+ type_codes: type_codes,
+ },
+ }
+ end
+
+ # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records)
+ def build_record_batch(type, records)
+ type_codes = [0, 1]
+ schema = Arrow::Schema.new(build_schema(type, type_codes))
+ type_ids = []
+ offsets = []
+ arrays = schema.fields[0].data_type.fields.collect do |field|
+ sub_schema = Arrow::Schema.new([field])
+ sub_records = []
+ records.each do |record|
+ column = record[0]
+ next if column.nil?
+ next unless column.key?(field.name)
+ sub_records << [column[field.name]]
+ end
+ sub_record_batch = Arrow::RecordBatch.new(sub_schema,
+ sub_records)
+ sub_record_batch.columns[0]
+ end
+ records.each do |record|
+ column = record[0]
+ if column.nil?
+ type_ids << nil
+ offsets << 0
+ elsif column.key?("0")
+ type_id = type_codes[0]
+ type_ids << type_id
+ offsets << (type_ids.count(type_id) - 1)
+ elsif column.key?("1")
+ type_id = type_codes[1]
+ type_ids << type_id
+ offsets << (type_ids.count(type_id) - 1)
+ end
+ end
+ union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
+ Arrow::Int8Array.new(type_ids),
+ Arrow::Int32Array.new(offsets),
+ arrays)
+ schema = Arrow::Schema.new(column: union_array.value_data_type)
+ Arrow::RecordBatch.new(schema,
+ records.size,
+ [union_array])
+ end
+
+ def test_null
+ records = [
+ [{"0" => nil}],
+ [nil],
+ ]
+ target = build(:null, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_boolean
+ records = [
+ [{"0" => true}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:boolean, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int8
+ records = [
+ [{"0" => -(2 ** 7)}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:int8, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint8
+ records = [
+ [{"0" => (2 ** 8) - 1}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:uint8, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int16
+ records = [
+ [{"0" => -(2 ** 15)}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:int16, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint16
+ records = [
+ [{"0" => (2 ** 16) - 1}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:uint16, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int32
+ records = [
+ [{"0" => -(2 ** 31)}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:int32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint32
+ records = [
+ [{"0" => (2 ** 32) - 1}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:uint32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int64
+ records = [
+ [{"0" => -(2 ** 63)}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:int64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint64
+ records = [
+ [{"0" => (2 ** 64) - 1}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:uint64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_float
+ records = [
+ [{"0" => -1.0}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:float, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_double
+ records = [
+ [{"0" => -1.0}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:double, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_binary
+ records = [
+ [{"0" => "\xff".b}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:binary, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_string
+ records = [
+ [{"0" => "Ruby"}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:string, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_date32
+ records = [
+ [{"0" => Date.new(1960, 1, 1)}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:date32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_date64
+ records = [
+ [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:date64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_second
+ records = [
+ [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :second,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_milli
+ records = [
+ [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :milli,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_micro
+ records = [
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :micro,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_nano
+ records = [
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :nano,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time32_second
+ records = [
+ [{"0" => 60 * 10}], # 00:10:00
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :time32,
+ unit: :second,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time32_milli
+ records = [
+ [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :time32,
+ unit: :milli,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time64_micro
+ records = [
+ [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :time64,
+ unit: :micro,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time64_nano
+ records = [
+ # 00:10:00.123456789
+ [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :time64,
+ unit: :nano,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_decimal128
+ records = [
+ [{"0" => BigDecimal("92.92")}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :decimal128,
+ precision: 8,
+ scale: 2,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_list
+ records = [
+ [{"0" => [true, nil, false]}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :list,
+ field: {
+ name: :sub_element,
+ type: :boolean,
+ },
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_struct
+ records = [
+ [{"0" => {"sub_field" => true}}],
+ [nil],
+ [{"1" => nil}],
+ [{"0" => {"sub_field" => nil}}],
+ ]
+ target = build({
+ type: :struct,
+ fields: [
+ {
+ name: :sub_field,
+ type: :boolean,
+ },
+ ],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_sparse_union
+ omit("Need to add support for SparseUnionArrayBuilder")
+ records = [
+ [{"0" => {"field1" => true}}],
+ [nil],
+ [{"1" => nil}],
+ [{"0" => {"field2" => nil}}],
+ ]
+ target = build({
+ type: :sparse_union,
+ fields: [
+ {
+ name: :field1,
+ type: :boolean,
+ },
+ {
+ name: :field2,
+ type: :uint8,
+ },
+ ],
+ type_codes: [0, 1],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_dense_union
+ omit("Need to add support for DenseUnionArrayBuilder")
+ records = [
+ [{"0" => {"field1" => true}}],
+ [nil],
+ [{"1" => nil}],
+ [{"0" => {"field2" => nil}}],
+ ]
+ target = build({
+ type: :dense_union,
+ fields: [
+ {
+ name: :field1,
+ type: :boolean,
+ },
+ {
+ name: :field2,
+ type: :uint8,
+ },
+ ],
+ type_codes: [0, 1],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_dictionary
+ omit("Need to add support for DictionaryArrayBuilder")
+ records = [
+ [{"0" => "Ruby"}],
+ [nil],
+ [{"1" => nil}],
+ [{"0" => "GLib"}],
+ ]
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
+ target = build({
+ type: :dictionary,
+ index_data_type: :int8,
+ dictionary: dictionary,
+ ordered: true,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+end
+
+class RawRecordsRecordBatchDenseUnionArrayTest < Test::Unit::TestCase
+ include RawRecordsDenseUnionArrayTests
+
+ def build(type, records)
+ build_record_batch(type, records)
+ end
+end
+
+class RawRecordsTableDenseUnionArrayTest < Test::Unit::TestCase
+ include RawRecordsDenseUnionArrayTests
+
+ def build(type, records)
+ build_record_batch(type, records).to_table
+ end
+end
diff --git a/ruby/red-arrow/test/raw-records/test-list-array.rb b/ruby/red-arrow/test/raw-records/test-list-array.rb
new file mode 100644
index 0000000..ab64bdb
--- /dev/null
+++ b/ruby/red-arrow/test/raw-records/test-list-array.rb
@@ -0,0 +1,520 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module RawRecordsListArrayTests
+ def build_schema(type)
+ field_description = {
+ name: :element,
+ }
+ if type.is_a?(Hash)
+ field_description = field_description.merge(type)
+ else
+ field_description[:type] = type
+ end
+ {
+ column: {
+ type: :list,
+ field: field_description,
+ },
+ }
+ end
+
+ def test_null
+ records = [
+ [[nil, nil, nil]],
+ [nil],
+ ]
+ target = build(:null, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_boolean
+ records = [
+ [[true, nil, false]],
+ [nil],
+ ]
+ target = build(:boolean, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int8
+ records = [
+ [[-(2 ** 7), nil, (2 ** 7) - 1]],
+ [nil],
+ ]
+ target = build(:int8, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint8
+ records = [
+ [[0, nil, (2 ** 8) - 1]],
+ [nil],
+ ]
+ target = build(:uint8, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int16
+ records = [
+ [[-(2 ** 15), nil, (2 ** 15) - 1]],
+ [nil],
+ ]
+ target = build(:int16, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint16
+ records = [
+ [[0, nil, (2 ** 16) - 1]],
+ [nil],
+ ]
+ target = build(:uint16, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int32
+ records = [
+ [[-(2 ** 31), nil, (2 ** 31) - 1]],
+ [nil],
+ ]
+ target = build(:int32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint32
+ records = [
+ [[0, nil, (2 ** 32) - 1]],
+ [nil],
+ ]
+ target = build(:uint32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int64
+ records = [
+ [[-(2 ** 63), nil, (2 ** 63) - 1]],
+ [nil],
+ ]
+ target = build(:int64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint64
+ records = [
+ [[0, nil, (2 ** 64) - 1]],
+ [nil],
+ ]
+ target = build(:uint64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_float
+ records = [
+ [[-1.0, nil, 1.0]],
+ [nil],
+ ]
+ target = build(:float, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_double
+ records = [
+ [[-1.0, nil, 1.0]],
+ [nil],
+ ]
+ target = build(:double, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_binary
+ records = [
+ [["\x00".b, nil, "\xff".b]],
+ [nil],
+ ]
+ target = build(:binary, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_string
+ records = [
+ [
+ [
+ "Ruby",
+ nil,
+ "\u3042", # U+3042 HIRAGANA LETTER A
+ ],
+ ],
+ [nil],
+ ]
+ target = build(:string, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_date32
+ records = [
+ [
+ [
+ Date.new(1960, 1, 1),
+ nil,
+ Date.new(2017, 8, 23),
+ ],
+ ],
+ [nil],
+ ]
+ target = build(:date32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_date64
+ records = [
+ [
+ [
+ DateTime.new(1960, 1, 1, 2, 9, 30),
+ nil,
+ DateTime.new(2017, 8, 23, 14, 57, 2),
+ ],
+ ],
+ [nil],
+ ]
+ target = build(:date64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_second
+ records = [
+ [
+ [
+ Time.parse("1960-01-01T02:09:30Z"),
+ nil,
+ Time.parse("2017-08-23T14:57:02Z"),
+ ],
+ ],
+ [nil],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :second,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_milli
+ records = [
+ [
+ [
+ Time.parse("1960-01-01T02:09:30.123Z"),
+ nil,
+ Time.parse("2017-08-23T14:57:02.987Z"),
+ ],
+ ],
+ [nil],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :milli,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_micro
+ records = [
+ [
+ [
+ Time.parse("1960-01-01T02:09:30.123456Z"),
+ nil,
+ Time.parse("2017-08-23T14:57:02.987654Z"),
+ ],
+ ],
+ [nil],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :micro,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_nano
+ records = [
+ [
+ [
+ Time.parse("1960-01-01T02:09:30.123456789Z"),
+ nil,
+ Time.parse("2017-08-23T14:57:02.987654321Z"),
+ ],
+ ],
+ [nil],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :nano,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time32_test
+ records = [
+ [
+ [
+ 60 * 10, # 00:10:00
+ nil,
+ 60 * 60 * 2 + 9, # 02:00:09
+ ],
+ ],
+ [nil],
+ ]
+ target = build({
+ type: :time32,
+ unit: :second,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time32_milli
+ records = [
+ [
+ [
+ (60 * 10) * 1000 + 123, # 00:10:00.123
+ nil,
+ (60 * 60 * 2 + 9) * 1000 + 987, # 02:00:09.987
+ ],
+ ],
+ [nil],
+ ]
+ target = build({
+ type: :time32,
+ unit: :milli,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time64_micro
+ records = [
+ [
+ [
+ (60 * 10) * 1_000_000 + 123_456, # 00:10:00.123456
+ nil,
+ (60 * 60 * 2 + 9) * 1_000_000 + 987_654, # 02:00:09.987654
+ ],
+ ],
+ [nil],
+ ]
+ target = build({
+ type: :time64,
+ unit: :micro,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time64_nano
+ records = [
+ [
+ [
+ (60 * 10) * 1_000_000_000 + 123_456_789, # 00:10:00.123456789
+ nil,
+ (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321, # 02:00:09.987654321
+ ],
+ ],
+ [nil],
+ ]
+ target = build({
+ type: :time64,
+ unit: :nano,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_decimal128
+ records = [
+ [
+ [
+ BigDecimal("92.92"),
+ nil,
+ BigDecimal("29.29"),
+ ],
+ ],
+ [nil],
+ ]
+ target = build({
+ type: :decimal128,
+ precision: 8,
+ scale: 2,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_list
+ records = [
+ [
+ [
+ [
+ true,
+ nil,
+ ],
+ nil,
+ [
+ nil,
+ false,
+ ],
+ ],
+ ],
+ [nil],
+ ]
+ target = build({
+ type: :list,
+ field: {
+ name: :sub_element,
+ type: :boolean,
+ },
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_struct
+ records = [
+ [
+ [
+ {"field" => true},
+ nil,
+ {"field" => nil},
+ ],
+ ],
+ [nil],
+ ]
+ target = build({
+ type: :struct,
+ fields: [
+ {
+ name: :field,
+ type: :boolean,
+ },
+ ],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_sparse
+ omit("Need to add support for SparseUnionArrayBuilder")
+ records = [
+ [
+ [
+ {"field1" => true},
+ nil,
+ {"field2" => nil},
+ ],
+ ],
+ [nil],
+ ]
+ target = build({
+ type: :sparse_union,
+ fields: [
+ {
+ name: :field1,
+ type: :boolean,
+ },
+ {
+ name: :field2,
+ type: :uint8,
+ },
+ ],
+ type_codes: [0, 1],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_dense
+ omit("Need to add support for DenseUnionArrayBuilder")
+ records = [
+ [
+ [
+ {"field1" => true},
+ nil,
+ {"field2" => nil},
+ ],
+ ],
+ [nil],
+ ]
+ target = build({
+ type: :dense_union,
+ fields: [
+ {
+ name: :field1,
+ type: :boolean,
+ },
+ {
+ name: :field2,
+ type: :uint8,
+ },
+ ],
+ type_codes: [0, 1],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_dictionary
+ omit("Need to add support for DictionaryArrayBuilder")
+ records = [
+ [
+ [
+ "Ruby",
+ nil,
+ "GLib",
+ ],
+ ],
+ [nil],
+ ]
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
+ target = build({
+ type: :dictionary,
+ index_data_type: :int8,
+ dictionary: dictionary,
+ ordered: true,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+end
+
+class RawRecordsRecordBatchListArrayTest < Test::Unit::TestCase
+ include RawRecordsListArrayTests
+
+ def build(type, records)
+ Arrow::RecordBatch.new(build_schema(type), records)
+ end
+end
+
+class RawRecordsTableListArrayTest < Test::Unit::TestCase
+ include RawRecordsListArrayTests
+
+ def build(type, records)
+ Arrow::Table.new(build_schema(type), records)
+ end
+end
diff --git a/ruby/red-arrow/test/raw-records/record-batch/test-multiple-columns.rb b/ruby/red-arrow/test/raw-records/test-multiple-columns.rb
similarity index 52%
rename from ruby/red-arrow/test/raw-records/record-batch/test-multiple-columns.rb
rename to ruby/red-arrow/test/raw-records/test-multiple-columns.rb
index c0e3631..50dff67 100644
--- a/ruby/red-arrow/test/raw-records/record-batch/test-multiple-columns.rb
+++ b/ruby/red-arrow/test/raw-records/test-multiple-columns.rb
@@ -15,35 +15,51 @@
# specific language governing permissions and limitations
# under the License.
-class RawRecordsRecordBatchMultipleColumnsTest < Test::Unit::TestCase
- test("3 elements") do
+module RawRecordsMultipleColumnsTests
+ def test_3_elements
records = [
[true, nil, "Ruby"],
[nil, 0, "GLib"],
[false, 2 ** 8 - 1, nil],
]
- record_batch = Arrow::RecordBatch.new([
- {name: :column0, type: :boolean},
- {name: :column1, type: :uint8},
- {name: :column2, type: :string},
- ],
- records)
- assert_equal(records, record_batch.raw_records)
+ target = build([
+ {name: :column0, type: :boolean},
+ {name: :column1, type: :uint8},
+ {name: :column2, type: :string},
+ ],
+ records)
+ assert_equal(records, target.raw_records)
end
- test("4 elements") do
+ def test_4_elements
records = [
[true, nil, "Ruby", -(2 ** 63)],
[nil, 0, "GLib", nil],
[false, 2 ** 8 - 1, nil, (2 ** 63) - 1],
]
- record_batch = Arrow::RecordBatch.new([
- {name: :column0, type: :boolean},
- {name: :column1, type: :uint8},
- {name: :column2, type: :string},
- {name: :column3, type: :int64},
- ],
- records)
- assert_equal(records, record_batch.raw_records)
+ target = build([
+ {name: :column0, type: :boolean},
+ {name: :column1, type: :uint8},
+ {name: :column2, type: :string},
+ {name: :column3, type: :int64},
+ ],
+ records)
+ assert_equal(records, target.raw_records)
+ end
+end
+
+class RawRecordsRecordBatchMultipleColumnsTest < Test::Unit::TestCase
+ include RawRecordsMultipleColumnsTests
+
+ def build(schema, records)
+ Arrow::RecordBatch.new(schema, records)
+ end
+end
+
+class RawRecordsTableMultipleColumnsTest < Test::Unit::TestCase
+ include RawRecordsMultipleColumnsTests
+
+ def build(schema, records)
+ Arrow::Table.new(schema, records)
end
end
diff --git a/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb b/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb
new file mode 100644
index 0000000..f80592f
--- /dev/null
+++ b/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb
@@ -0,0 +1,480 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module RawRecordsSparseUnionArrayTests
+ def build_schema(type, type_codes)
+ field_description = {}
+ if type.is_a?(Hash)
+ field_description = field_description.merge(type)
+ else
+ field_description[:type] = type
+ end
+ {
+ column: {
+ type: :sparse_union,
+ fields: [
+ field_description.merge(name: "0"),
+ field_description.merge(name: "1"),
+ ],
+ type_codes: type_codes,
+ },
+ }
+ end
+
+ # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records)
+ def build_record_batch(type, records)
+ type_codes = [0, 1]
+ schema = Arrow::Schema.new(build_schema(type, type_codes))
+ type_ids = []
+ arrays = schema.fields[0].data_type.fields.collect do |field|
+ sub_schema = Arrow::Schema.new([field])
+ sub_records = records.collect do |record|
+ [record[0].nil? ? nil : record[0][field.name]]
+ end
+ sub_record_batch = Arrow::RecordBatch.new(sub_schema,
+ sub_records)
+ sub_record_batch.columns[0]
+ end
+ records.each do |record|
+ column = record[0]
+ if column.nil?
+ type_ids << nil
+ elsif column.key?("0")
+ type_ids << type_codes[0]
+ elsif column.key?("1")
+ type_ids << type_codes[1]
+ end
+ end
+ union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
+ Arrow::Int8Array.new(type_ids),
+ arrays)
+ schema = Arrow::Schema.new(column: union_array.value_data_type)
+ Arrow::RecordBatch.new(schema,
+ records.size,
+ [union_array])
+ end
+
+ def test_null
+ records = [
+ [{"0" => nil}],
+ [nil],
+ ]
+ target = build(:null, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_boolean
+ records = [
+ [{"0" => true}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:boolean, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int8
+ records = [
+ [{"0" => -(2 ** 7)}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:int8, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint8
+ records = [
+ [{"0" => (2 ** 8) - 1}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:uint8, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int16
+ records = [
+ [{"0" => -(2 ** 15)}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:int16, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint16
+ records = [
+ [{"0" => (2 ** 16) - 1}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:uint16, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int32
+ records = [
+ [{"0" => -(2 ** 31)}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:int32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint32
+ records = [
+ [{"0" => (2 ** 32) - 1}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:uint32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int64
+ records = [
+ [{"0" => -(2 ** 63)}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:int64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint64
+ records = [
+ [{"0" => (2 ** 64) - 1}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:uint64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_float
+ records = [
+ [{"0" => -1.0}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:float, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_double
+ records = [
+ [{"0" => -1.0}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:double, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_binary
+ records = [
+ [{"0" => "\xff".b}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:binary, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_string
+ records = [
+ [{"0" => "Ruby"}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:string, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_date32
+ records = [
+ [{"0" => Date.new(1960, 1, 1)}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:date32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_date64
+ records = [
+ [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build(:date64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_second
+ records = [
+ [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :second,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_milli
+ records = [
+ [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :milli,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_micro
+ records = [
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :micro,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_nano
+ records = [
+ [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :nano,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time32_second
+ records = [
+ [{"0" => 60 * 10}], # 00:10:00
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :time32,
+ unit: :second,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time32_milli
+ records = [
+ [{"0" => (60 * 10) * 1000 + 123}], # 00:10:00.123
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :time32,
+ unit: :milli,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time64_micro
+ records = [
+ [{"0" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :time64,
+ unit: :micro,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time64_nano
+ records = [
+ [{"0" => (60 * 10) * 1_000_000_000 + 123_456_789}], # 00:10:00.123456789
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :time64,
+ unit: :nano,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_decimal128
+ records = [
+ [{"0" => BigDecimal("92.92")}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :decimal128,
+ precision: 8,
+ scale: 2,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_list
+ records = [
+ [{"0" => [true, nil, false]}],
+ [nil],
+ [{"1" => nil}],
+ ]
+ target = build({
+ type: :list,
+ field: {
+ name: :sub_element,
+ type: :boolean,
+ },
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_struct
+ records = [
+ [{"0" => {"sub_field" => true}}],
+ [nil],
+ [{"1" => nil}],
+ [{"0" => {"sub_field" => nil}}],
+ ]
+ target = build({
+ type: :struct,
+ fields: [
+ {
+ name: :sub_field,
+ type: :boolean,
+ },
+ ],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_sparse_union
+ omit("Need to add support for SparseUnionArrayBuilder")
+ records = [
+ [{"0" => {"field1" => true}}],
+ [nil],
+ [{"1" => nil}],
+ [{"0" => {"field2" => nil}}],
+ ]
+ target = build({
+ type: :sparse_union,
+ fields: [
+ {
+ name: :field1,
+ type: :boolean,
+ },
+ {
+ name: :field2,
+ type: :uint8,
+ },
+ ],
+ type_codes: [0, 1],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_dense_union
+ omit("Need to add support for DenseUnionArrayBuilder")
+ records = [
+ [{"0" => {"field1" => true}}],
+ [nil],
+ [{"1" => nil}],
+ [{"0" => {"field2" => nil}}],
+ ]
+ target = build({
+ type: :dense_union,
+ fields: [
+ {
+ name: :field1,
+ type: :boolean,
+ },
+ {
+ name: :field2,
+ type: :uint8,
+ },
+ ],
+ type_codes: [0, 1],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_dictionary
+ omit("Need to add support for DictionaryArrayBuilder")
+ records = [
+ [{"0" => "Ruby"}],
+ [nil],
+ [{"1" => nil}],
+ [{"0" => "GLib"}],
+ ]
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
+ target = build({
+ type: :dictionary,
+ index_data_type: :int8,
+ dictionary: dictionary,
+ ordered: true,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+end
+
+class RawRecordsRecordBatchSparseUnionArrayTest < Test::Unit::TestCase
+ include RawRecordsSparseUnionArrayTests
+
+ def build(type, records)
+ build_record_batch(type, records)
+ end
+end
+
+class RawRecordsTableSparseUnionArrayTest < Test::Unit::TestCase
+ include RawRecordsSparseUnionArrayTests
+
+ def build(type, records)
+ build_record_batch(type, records).to_table
+ end
+end
diff --git a/ruby/red-arrow/test/raw-records/test-struct-array.rb b/ruby/red-arrow/test/raw-records/test-struct-array.rb
new file mode 100644
index 0000000..684ecab
--- /dev/null
+++ b/ruby/red-arrow/test/raw-records/test-struct-array.rb
@@ -0,0 +1,448 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module RawRecordsStructArrayTests
+ def build_schema(type)
+ field_description = {
+ name: :field,
+ }
+ if type.is_a?(Hash)
+ field_description = field_description.merge(type)
+ else
+ field_description[:type] = type
+ end
+ {
+ column: {
+ type: :struct,
+ fields: [
+ field_description,
+ ],
+ },
+ }
+ end
+
+ def test_null
+ records = [
+ [{"field" => nil}],
+ [nil],
+ ]
+ target = build(:null, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_boolean
+ records = [
+ [{"field" => true}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:boolean, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int8
+ records = [
+ [{"field" => -(2 ** 7)}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:int8, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint8
+ records = [
+ [{"field" => (2 ** 8) - 1}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:uint8, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int16
+ records = [
+ [{"field" => -(2 ** 15)}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:int16, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint16
+ records = [
+ [{"field" => (2 ** 16) - 1}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:uint16, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int32
+ records = [
+ [{"field" => -(2 ** 31)}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:int32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint32
+ records = [
+ [{"field" => (2 ** 32) - 1}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:uint32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_int64
+ records = [
+ [{"field" => -(2 ** 63)}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:int64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_uint64
+ records = [
+ [{"field" => (2 ** 64) - 1}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:uint64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_float
+ records = [
+ [{"field" => -1.0}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:float, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_double
+ records = [
+ [{"field" => -1.0}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:double, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_binary
+ records = [
+ [{"field" => "\xff".b}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:binary, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_string
+ records = [
+ [{"field" => "Ruby"}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:string, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_date32
+ records = [
+ [{"field" => Date.new(1960, 1, 1)}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:date32, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_date64
+ records = [
+ [{"field" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build(:date64, records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_second
+ records = [
+ [{"field" => Time.parse("1960-01-01T02:09:30Z")}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :second,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_milli
+ records = [
+ [{"field" => Time.parse("1960-01-01T02:09:30.123Z")}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :milli,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_micro
+ records = [
+ [{"field" => Time.parse("1960-01-01T02:09:30.123456Z")}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :micro,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_timestamp_nano
+ records = [
+ [{"field" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build({
+ type: :timestamp,
+ unit: :nano,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time32_second
+ records = [
+ [{"field" => 60 * 10}], # 00:10:00
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build({
+ type: :time32,
+ unit: :second,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time32_milli
+ records = [
+ [{"field" => (60 * 10) * 1000 + 123}], # 00:10:00.123
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build({
+ type: :time32,
+ unit: :milli,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time64_micro
+ records = [
+ [{"field" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build({
+ type: :time64,
+ unit: :micro,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_time64_nano
+ records = [
+ # 00:10:00.123456789
+ [{"field" => (60 * 10) * 1_000_000_000 + 123_456_789}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build({
+ type: :time64,
+ unit: :nano,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_decimal128
+ records = [
+ [{"field" => BigDecimal("92.92")}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build({
+ type: :decimal128,
+ precision: 8,
+ scale: 2,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_list
+ records = [
+ [{"field" => [true, nil, false]}],
+ [nil],
+ [{"field" => nil}],
+ ]
+ target = build({
+ type: :list,
+ field: {
+ name: :sub_element,
+ type: :boolean,
+ },
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_struct
+ records = [
+ [{"field" => {"sub_field" => true}}],
+ [nil],
+ [{"field" => nil}],
+ [{"field" => {"sub_field" => nil}}],
+ ]
+ target = build({
+ type: :struct,
+ fields: [
+ {
+ name: :sub_field,
+ type: :boolean,
+ },
+ ],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_sparse_union
+ omit("Need to add support for SparseUnionArrayBuilder")
+ records = [
+ [{"field" => {"field1" => true}}],
+ [nil],
+ [{"field" => nil}],
+ [{"field" => {"field2" => nil}}],
+ ]
+ target = build({
+ type: :sparse_union,
+ fields: [
+ {
+ name: :field1,
+ type: :boolean,
+ },
+ {
+ name: :field2,
+ type: :uint8,
+ },
+ ],
+ type_codes: [0, 1],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_dense_union
+ omit("Need to add support for DenseUnionArrayBuilder")
+ records = [
+ [{"field" => {"field1" => true}}],
+ [nil],
+ [{"field" => nil}],
+ [{"field" => {"field2" => nil}}],
+ ]
+ target = build({
+ type: :dense_union,
+ fields: [
+ {
+ name: :field1,
+ type: :boolean,
+ },
+ {
+ name: :field2,
+ type: :uint8,
+ },
+ ],
+ type_codes: [0, 1],
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+
+ def test_dictionary
+ omit("Need to add support for DictionaryArrayBuilder")
+ records = [
+ [{"field" => "Ruby"}],
+ [nil],
+ [{"field" => nil}],
+ [{"field" => "GLib"}],
+ ]
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
+ target = build({
+ type: :dictionary,
+ index_data_type: :int8,
+ dictionary: dictionary,
+ ordered: true,
+ },
+ records)
+ assert_equal(records, target.raw_records)
+ end
+end
+
+class RawRecordsRecordBatchStructArrayTest < Test::Unit::TestCase
+ include RawRecordsStructArrayTests
+
+ def build(type, records)
+ Arrow::RecordBatch.new(build_schema(type), records)
+ end
+end
+
+class RawRecordsTableStructArrayTest < Test::Unit::TestCase
+ include RawRecordsStructArrayTests
+
+ def build(type, records)
+ Arrow::Table.new(build_schema(type), records)
+ end
+end
diff --git a/ruby/red-arrow/test/raw-records/test-table.rb b/ruby/red-arrow/test/raw-records/test-table.rb
new file mode 100644
index 0000000..ae90217
--- /dev/null
+++ b/ruby/red-arrow/test/raw-records/test-table.rb
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class RawRecordsTableTest < Test::Unit::TestCase
+ test("2 arrays") do
+ raw_record_batches = [
+ [
+ [true, nil, "Ruby"],
+ [nil, 0, "GLib"],
+ [false, 2 ** 8 - 1, nil],
+ ],
+ [
+ [nil, 10, "A"],
+ [true, 20, "B"],
+ [false, nil, "C"],
+ [nil, 40, nil],
+ ]
+ ]
+ raw_records = raw_record_batches.inject do |all_records, record_batch|
+ all_records + record_batch
+ end
+ schema = [
+ {name: :column0, type: :boolean},
+ {name: :column1, type: :uint8},
+ {name: :column2, type: :string},
+ ]
+ record_batches = raw_record_batches.collect do |record_batch|
+ Arrow::RecordBatch.new(schema, record_batch)
+ end
+ table = Arrow::Table.new(schema, record_batches)
+ assert_equal(raw_records, table.raw_records)
+ end
+end