You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/05/06 06:12:36 UTC

[arrow-rs] branch master updated: Pretty Print `UnionArray`s (#1648)

This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 922bfe76b Pretty Print `UnionArray`s (#1648)
922bfe76b is described below

commit 922bfe76ba0cb45f4a0ef2f12a870b0aa0e2a74d
Author: Trent Feda <36...@users.noreply.github.com>
AuthorDate: Fri May 6 02:12:31 2022 -0400

    Pretty Print `UnionArray`s (#1648)
    
    * Add Union support to pretty/display
    
    * Add inner null to nested Union test, Add type id to error print
---
 arrow/src/util/display.rs |  42 ++++++++++++-
 arrow/src/util/pretty.rs  | 157 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 190 insertions(+), 9 deletions(-)

diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs
index 743f7f483..b0493b6ce 100644
--- a/arrow/src/util/display.rs
+++ b/arrow/src/util/display.rs
@@ -23,8 +23,9 @@ use std::sync::Arc;
 
 use crate::array::Array;
 use crate::datatypes::{
-    ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type,
-    Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    ArrowNativeType, ArrowPrimitiveType, DataType, Field, Int16Type, Int32Type,
+    Int64Type, Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    UnionMode,
 };
 use crate::{array, datatypes::IntervalUnit};
 
@@ -395,6 +396,7 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<Str
 
             Ok(s)
         }
+        DataType::Union(field_vec, mode) => union_to_string(column, row, field_vec, mode),
         _ => Err(ArrowError::InvalidArgumentError(format!(
             "Pretty printing not implemented for {:?} type",
             column.data_type()
@@ -402,6 +404,42 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<Str
     }
 }
 
+/// Converts the value of the union array at `row` to a String
+fn union_to_string(
+    column: &array::ArrayRef,
+    row: usize,
+    fields: &[Field],
+    mode: &UnionMode,
+) -> Result<String> {
+    let list = column
+        .as_any()
+        .downcast_ref::<array::UnionArray>()
+        .ok_or_else(|| {
+            ArrowError::InvalidArgumentError(
+                "Repl error: could not convert union column to union array.".to_string(),
+            )
+        })?;
+    let type_id = list.type_id(row);
+    let name = fields
+        .get(type_id as usize)
+        .ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!(
+                "Repl error: could not get field name for type id: {} in union array.",
+                type_id,
+            ))
+        })?
+        .name();
+
+    let value = array_value_to_string(
+        &list.child(type_id),
+        match mode {
+            UnionMode::Dense => list.value_offset(row) as usize,
+            UnionMode::Sparse => row,
+        },
+    )?;
+
+    Ok(format!("{{{}={}}}", name, value))
+}
 /// Converts the value of the dictionary array at `row` to a String
 fn dict_array_value_to_string<K: ArrowPrimitiveType>(
     colum: &array::ArrayRef,
diff --git a/arrow/src/util/pretty.rs b/arrow/src/util/pretty.rs
index f7e05bc07..3fa2729ba 100644
--- a/arrow/src/util/pretty.rs
+++ b/arrow/src/util/pretty.rs
@@ -109,17 +109,18 @@ mod tests {
     use crate::{
         array::{
             self, new_null_array, Array, Date32Array, Date64Array,
-            FixedSizeBinaryBuilder, Float16Array, PrimitiveBuilder, StringArray,
-            StringBuilder, StringDictionaryBuilder, StructArray, Time32MillisecondArray,
-            Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
-            TimestampMicrosecondArray, TimestampMillisecondArray,
-            TimestampNanosecondArray, TimestampSecondArray,
+            FixedSizeBinaryBuilder, Float16Array, Int32Array, PrimitiveBuilder,
+            StringArray, StringBuilder, StringDictionaryBuilder, StructArray,
+            Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
+            Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
+            TimestampNanosecondArray, TimestampSecondArray, UnionArray, UnionBuilder,
         },
-        datatypes::{DataType, Field, Int32Type, Schema},
+        buffer::Buffer,
+        datatypes::{DataType, Field, Float64Type, Int32Type, Schema, UnionMode},
     };
 
     use super::*;
-    use crate::array::{DecimalArray, FixedSizeListBuilder, Int32Array};
+    use crate::array::{DecimalArray, FixedSizeListBuilder};
     use std::fmt::Write;
     use std::sync::Arc;
 
@@ -647,6 +648,148 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_pretty_format_dense_union() -> Result<()> {
+        let mut builder = UnionBuilder::new_dense(4);
+        builder.append::<Int32Type>("a", 1).unwrap();
+        builder.append::<Float64Type>("b", 3.2234).unwrap();
+        builder.append_null::<Float64Type>("b").unwrap();
+        builder.append_null::<Int32Type>("a").unwrap();
+        let union = builder.build().unwrap();
+
+        let schema = Schema::new(vec![Field::new(
+            "Teamsters",
+            DataType::Union(
+                vec![
+                    Field::new("a", DataType::Int32, false),
+                    Field::new("b", DataType::Float64, false),
+                ],
+                UnionMode::Dense,
+            ),
+            false,
+        )]);
+
+        let batch =
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
+        let table = pretty_format_batches(&[batch])?.to_string();
+        let actual: Vec<&str> = table.lines().collect();
+        let expected = vec![
+            "+------------+",
+            "| Teamsters  |",
+            "+------------+",
+            "| {a=1}      |",
+            "| {b=3.2234} |",
+            "| {b=}       |",
+            "| {a=}       |",
+            "+------------+",
+        ];
+
+        assert_eq!(expected, actual);
+        Ok(())
+    }
+
+    #[test]
+    fn test_pretty_format_sparse_union() -> Result<()> {
+        let mut builder = UnionBuilder::new_sparse(4);
+        builder.append::<Int32Type>("a", 1).unwrap();
+        builder.append::<Float64Type>("b", 3.2234).unwrap();
+        builder.append_null::<Float64Type>("b").unwrap();
+        builder.append_null::<Int32Type>("a").unwrap();
+        let union = builder.build().unwrap();
+
+        let schema = Schema::new(vec![Field::new(
+            "Teamsters",
+            DataType::Union(
+                vec![
+                    Field::new("a", DataType::Int32, false),
+                    Field::new("b", DataType::Float64, false),
+                ],
+                UnionMode::Sparse,
+            ),
+            false,
+        )]);
+
+        let batch =
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
+        let table = pretty_format_batches(&[batch])?.to_string();
+        let actual: Vec<&str> = table.lines().collect();
+        let expected = vec![
+            "+------------+",
+            "| Teamsters  |",
+            "+------------+",
+            "| {a=1}      |",
+            "| {b=3.2234} |",
+            "| {b=}       |",
+            "| {a=}       |",
+            "+------------+",
+        ];
+
+        assert_eq!(expected, actual);
+        Ok(())
+    }
+
+    #[test]
+    fn test_pretty_format_nested_union() -> Result<()> {
+        //Inner UnionArray
+        let mut builder = UnionBuilder::new_dense(5);
+        builder.append::<Int32Type>("b", 1).unwrap();
+        builder.append::<Float64Type>("c", 3.2234).unwrap();
+        builder.append_null::<Float64Type>("c").unwrap();
+        builder.append_null::<Int32Type>("b").unwrap();
+        builder.append_null::<Float64Type>("c").unwrap();
+        let inner = builder.build().unwrap();
+
+        let inner_field = Field::new(
+            "European Union",
+            DataType::Union(
+                vec![
+                    Field::new("b", DataType::Int32, false),
+                    Field::new("c", DataType::Float64, false),
+                ],
+                UnionMode::Dense,
+            ),
+            false,
+        );
+
+        // Can't use UnionBuilder with non-primitive types, so manually build outer UnionArray
+        let a_array = Int32Array::from(vec![None, None, None, Some(1234), Some(23)]);
+        let type_ids = Buffer::from_slice_ref(&[1_i8, 1, 0, 0, 1]);
+
+        let children: Vec<(Field, Arc<dyn Array>)> = vec![
+            (Field::new("a", DataType::Int32, true), Arc::new(a_array)),
+            (inner_field.clone(), Arc::new(inner)),
+        ];
+
+        let outer = UnionArray::try_new(type_ids, None, children).unwrap();
+
+        let schema = Schema::new(vec![Field::new(
+            "Teamsters",
+            DataType::Union(
+                vec![Field::new("a", DataType::Int32, true), inner_field],
+                UnionMode::Sparse,
+            ),
+            false,
+        )]);
+
+        let batch =
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(outer)]).unwrap();
+        let table = pretty_format_batches(&[batch])?.to_string();
+        let actual: Vec<&str> = table.lines().collect();
+        let expected = vec![
+            "+-----------------------------+",
+            "| Teamsters                   |",
+            "+-----------------------------+",
+            "| {European Union={b=1}}      |",
+            "| {European Union={c=3.2234}} |",
+            "| {a=}                        |",
+            "| {a=1234}                    |",
+            "| {European Union={c=}}       |",
+            "+-----------------------------+",
+        ];
+        assert_eq!(expected, actual);
+        Ok(())
+    }
+
     #[test]
     fn test_writing_formatted_batches() -> Result<()> {
         // define a schema.