You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/05/06 06:12:36 UTC
[arrow-rs] branch master updated: Pretty Print `UnionArray`s (#1648)
This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 922bfe76b Pretty Print `UnionArray`s (#1648)
922bfe76b is described below
commit 922bfe76ba0cb45f4a0ef2f12a870b0aa0e2a74d
Author: Trent Feda <36...@users.noreply.github.com>
AuthorDate: Fri May 6 02:12:31 2022 -0400
Pretty Print `UnionArray`s (#1648)
* Add Union support to pretty/display
* Add inner null to nested Union test, Add type id to error print
---
arrow/src/util/display.rs | 42 ++++++++++++-
arrow/src/util/pretty.rs | 157 +++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 190 insertions(+), 9 deletions(-)
diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs
index 743f7f483..b0493b6ce 100644
--- a/arrow/src/util/display.rs
+++ b/arrow/src/util/display.rs
@@ -23,8 +23,9 @@ use std::sync::Arc;
use crate::array::Array;
use crate::datatypes::{
- ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type,
- Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+ ArrowNativeType, ArrowPrimitiveType, DataType, Field, Int16Type, Int32Type,
+ Int64Type, Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+ UnionMode,
};
use crate::{array, datatypes::IntervalUnit};
@@ -395,6 +396,7 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<Str
Ok(s)
}
+ DataType::Union(field_vec, mode) => union_to_string(column, row, field_vec, mode),
_ => Err(ArrowError::InvalidArgumentError(format!(
"Pretty printing not implemented for {:?} type",
column.data_type()
@@ -402,6 +404,42 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<Str
}
}
+/// Converts the value of the union array at `row` to a String
+fn union_to_string(
+ column: &array::ArrayRef,
+ row: usize,
+ fields: &[Field],
+ mode: &UnionMode,
+) -> Result<String> {
+ let list = column
+ .as_any()
+ .downcast_ref::<array::UnionArray>()
+ .ok_or_else(|| {
+ ArrowError::InvalidArgumentError(
+ "Repl error: could not convert union column to union array.".to_string(),
+ )
+ })?;
+ let type_id = list.type_id(row);
+ let name = fields
+ .get(type_id as usize)
+ .ok_or_else(|| {
+ ArrowError::InvalidArgumentError(format!(
+ "Repl error: could not get field name for type id: {} in union array.",
+ type_id,
+ ))
+ })?
+ .name();
+
+ let value = array_value_to_string(
+ &list.child(type_id),
+ match mode {
+ UnionMode::Dense => list.value_offset(row) as usize,
+ UnionMode::Sparse => row,
+ },
+ )?;
+
+ Ok(format!("{{{}={}}}", name, value))
+}
/// Converts the value of the dictionary array at `row` to a String
fn dict_array_value_to_string<K: ArrowPrimitiveType>(
colum: &array::ArrayRef,
diff --git a/arrow/src/util/pretty.rs b/arrow/src/util/pretty.rs
index f7e05bc07..3fa2729ba 100644
--- a/arrow/src/util/pretty.rs
+++ b/arrow/src/util/pretty.rs
@@ -109,17 +109,18 @@ mod tests {
use crate::{
array::{
self, new_null_array, Array, Date32Array, Date64Array,
- FixedSizeBinaryBuilder, Float16Array, PrimitiveBuilder, StringArray,
- StringBuilder, StringDictionaryBuilder, StructArray, Time32MillisecondArray,
- Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
- TimestampMicrosecondArray, TimestampMillisecondArray,
- TimestampNanosecondArray, TimestampSecondArray,
+ FixedSizeBinaryBuilder, Float16Array, Int32Array, PrimitiveBuilder,
+ StringArray, StringBuilder, StringDictionaryBuilder, StructArray,
+ Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
+ Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
+ TimestampNanosecondArray, TimestampSecondArray, UnionArray, UnionBuilder,
},
- datatypes::{DataType, Field, Int32Type, Schema},
+ buffer::Buffer,
+ datatypes::{DataType, Field, Float64Type, Int32Type, Schema, UnionMode},
};
use super::*;
- use crate::array::{DecimalArray, FixedSizeListBuilder, Int32Array};
+ use crate::array::{DecimalArray, FixedSizeListBuilder};
use std::fmt::Write;
use std::sync::Arc;
@@ -647,6 +648,148 @@ mod tests {
Ok(())
}
+ #[test]
+ fn test_pretty_format_dense_union() -> Result<()> {
+ let mut builder = UnionBuilder::new_dense(4);
+ builder.append::<Int32Type>("a", 1).unwrap();
+ builder.append::<Float64Type>("b", 3.2234).unwrap();
+ builder.append_null::<Float64Type>("b").unwrap();
+ builder.append_null::<Int32Type>("a").unwrap();
+ let union = builder.build().unwrap();
+
+ let schema = Schema::new(vec![Field::new(
+ "Teamsters",
+ DataType::Union(
+ vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Float64, false),
+ ],
+ UnionMode::Dense,
+ ),
+ false,
+ )]);
+
+ let batch =
+ RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
+ let table = pretty_format_batches(&[batch])?.to_string();
+ let actual: Vec<&str> = table.lines().collect();
+ let expected = vec![
+ "+------------+",
+ "| Teamsters |",
+ "+------------+",
+ "| {a=1} |",
+ "| {b=3.2234} |",
+ "| {b=} |",
+ "| {a=} |",
+ "+------------+",
+ ];
+
+ assert_eq!(expected, actual);
+ Ok(())
+ }
+
+ #[test]
+ fn test_pretty_format_sparse_union() -> Result<()> {
+ let mut builder = UnionBuilder::new_sparse(4);
+ builder.append::<Int32Type>("a", 1).unwrap();
+ builder.append::<Float64Type>("b", 3.2234).unwrap();
+ builder.append_null::<Float64Type>("b").unwrap();
+ builder.append_null::<Int32Type>("a").unwrap();
+ let union = builder.build().unwrap();
+
+ let schema = Schema::new(vec![Field::new(
+ "Teamsters",
+ DataType::Union(
+ vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Float64, false),
+ ],
+ UnionMode::Sparse,
+ ),
+ false,
+ )]);
+
+ let batch =
+ RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
+ let table = pretty_format_batches(&[batch])?.to_string();
+ let actual: Vec<&str> = table.lines().collect();
+ let expected = vec![
+ "+------------+",
+ "| Teamsters |",
+ "+------------+",
+ "| {a=1} |",
+ "| {b=3.2234} |",
+ "| {b=} |",
+ "| {a=} |",
+ "+------------+",
+ ];
+
+ assert_eq!(expected, actual);
+ Ok(())
+ }
+
+ #[test]
+ fn test_pretty_format_nested_union() -> Result<()> {
+ //Inner UnionArray
+ let mut builder = UnionBuilder::new_dense(5);
+ builder.append::<Int32Type>("b", 1).unwrap();
+ builder.append::<Float64Type>("c", 3.2234).unwrap();
+ builder.append_null::<Float64Type>("c").unwrap();
+ builder.append_null::<Int32Type>("b").unwrap();
+ builder.append_null::<Float64Type>("c").unwrap();
+ let inner = builder.build().unwrap();
+
+ let inner_field = Field::new(
+ "European Union",
+ DataType::Union(
+ vec![
+ Field::new("b", DataType::Int32, false),
+ Field::new("c", DataType::Float64, false),
+ ],
+ UnionMode::Dense,
+ ),
+ false,
+ );
+
+ // Can't use UnionBuilder with non-primitive types, so manually build outer UnionArray
+ let a_array = Int32Array::from(vec![None, None, None, Some(1234), Some(23)]);
+ let type_ids = Buffer::from_slice_ref(&[1_i8, 1, 0, 0, 1]);
+
+ let children: Vec<(Field, Arc<dyn Array>)> = vec![
+ (Field::new("a", DataType::Int32, true), Arc::new(a_array)),
+ (inner_field.clone(), Arc::new(inner)),
+ ];
+
+ let outer = UnionArray::try_new(type_ids, None, children).unwrap();
+
+ let schema = Schema::new(vec![Field::new(
+ "Teamsters",
+ DataType::Union(
+ vec![Field::new("a", DataType::Int32, true), inner_field],
+ UnionMode::Sparse,
+ ),
+ false,
+ )]);
+
+ let batch =
+ RecordBatch::try_new(Arc::new(schema), vec![Arc::new(outer)]).unwrap();
+ let table = pretty_format_batches(&[batch])?.to_string();
+ let actual: Vec<&str> = table.lines().collect();
+ let expected = vec![
+ "+-----------------------------+",
+ "| Teamsters |",
+ "+-----------------------------+",
+ "| {European Union={b=1}} |",
+ "| {European Union={c=3.2234}} |",
+ "| {a=} |",
+ "| {a=1234} |",
+ "| {European Union={c=}} |",
+ "+-----------------------------+",
+ ];
+ assert_eq!(expected, actual);
+ Ok(())
+ }
+
#[test]
fn test_writing_formatted_batches() -> Result<()> {
// define a schema.