You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2024/02/16 13:07:05 UTC

(arrow-datafusion) branch main updated: Add example for `ScalarStructBuilder::new_null`, fix display for `null` `ScalarValue::Struct` (#9238)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 14cb962718 Add example for `ScalarStructBuilder::new_null`, fix display for `null` `ScalarValue::Struct` (#9238)
14cb962718 is described below

commit 14cb9627184bc1680cac8965533ee8f0b06973b2
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Fri Feb 16 08:06:59 2024 -0500

    Add example for `ScalarStructBuilder::new_null`, fix display for `null` `ScalarValue::Struct` (#9238)
    
    * Minor: Add example for `ScalarStructBuilder::new_null`
    
    * Fix null display
    
    * fix docs
    
    * tweak
---
 datafusion/common/src/scalar/mod.rs            | 61 +++++++++++++++++++++++++-
 datafusion/common/src/scalar/struct_builder.rs | 36 ++++++++++++++-
 2 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index 29107ab10e..7e53415090 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -83,7 +83,7 @@ pub use struct_builder::ScalarStructBuilder;
 ///
 /// In general, performance will be better using arrow [`Array`]s rather than
 /// [`ScalarValue`], as it is far more efficient to process multiple values at
-/// once (vecctorized processing).
+/// once (vectorized processing).
 ///
 /// # Example
 /// ```
@@ -3103,6 +3103,11 @@ impl fmt::Display for ScalarValue {
                 // ScalarValue Struct should always have a single element
                 assert_eq!(struct_arr.len(), 1);
 
+                if struct_arr.null_count() == struct_arr.len() {
+                    write!(f, "NULL")?;
+                    return Ok(());
+                }
+
                 let columns = struct_arr.columns();
                 let fields = struct_arr.fields();
                 let nulls = struct_arr.nulls();
@@ -3298,6 +3303,7 @@ mod tests {
         as_string_array, as_struct_array, as_uint32_array, as_uint64_array,
     };
 
+    use crate::assert_batches_eq;
     use arrow::buffer::OffsetBuffer;
     use arrow::compute::{is_null, kernels};
     use arrow::datatypes::{ArrowNumericType, ArrowPrimitiveType};
@@ -5690,6 +5696,59 @@ mod tests {
         check_array(array);
     }
 
+    #[test]
+    fn test_struct_display() {
+        let field_a = Field::new("a", DataType::Int32, true);
+        let field_b = Field::new("b", DataType::Utf8, true);
+
+        let s = ScalarStructBuilder::new()
+            .with_scalar(field_a, ScalarValue::from(1i32))
+            .with_scalar(field_b, ScalarValue::Utf8(None))
+            .build()
+            .unwrap();
+
+        assert_eq!(s.to_string(), "{a:1,b:}");
+
+        let ScalarValue::Struct(arr) = s else {
+            panic!("Expected struct");
+        };
+
+        //verify compared to arrow display
+        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
+        let expected = [
+            "+-------------+",
+            "| s           |",
+            "+-------------+",
+            "| {a: 1, b: } |",
+            "+-------------+",
+        ];
+        assert_batches_eq!(&expected, &[batch]);
+    }
+
+    #[test]
+    fn test_struct_display_null() {
+        let fields = vec![Field::new("a", DataType::Int32, false)];
+        let s = ScalarStructBuilder::new_null(fields);
+        assert_eq!(s.to_string(), "NULL");
+
+        let ScalarValue::Struct(arr) = s else {
+            panic!("Expected struct");
+        };
+
+        //verify compared to arrow display
+        let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
+
+        #[rustfmt::skip]
+        let expected = [
+            "+---+",
+            "| s |",
+            "+---+",
+            "|   |",
+            "+---+",
+        ];
+        assert_batches_eq!(&expected, &[batch]);
+    }
+
     #[test]
     fn test_build_timestamp_millisecond_list() {
         let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
diff --git a/datafusion/common/src/scalar/struct_builder.rs b/datafusion/common/src/scalar/struct_builder.rs
index 926e100417..1192757e89 100644
--- a/datafusion/common/src/scalar/struct_builder.rs
+++ b/datafusion/common/src/scalar/struct_builder.rs
@@ -39,8 +39,40 @@ impl ScalarStructBuilder {
         Self::default()
     }
 
-    /// Return a new [`ScalarValue::Struct`] with the specified fields and a
-    /// single null value
+    /// Return a new [`ScalarValue::Struct`] with a single `null` value.
+    ///
+    /// Note this is different from a struct where each of the specified fields
+    /// are null (e.g. `{a: NULL}`)
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// # use arrow::datatypes::{DataType, Field};
+    /// # use datafusion_common::scalar::ScalarStructBuilder;
+    /// let fields = vec![
+    ///    Field::new("a", DataType::Int32, false),
+    /// ];
+    /// let sv = ScalarStructBuilder::new_null(fields);
+    /// // Note this is `NULL`, not `{a: NULL}`
+    /// assert_eq!(format!("{sv}"), "NULL");
+    ///```
+    ///
+    /// To create a struct where the *fields* are null, use `Self::new()` and
+    /// pass null values for each field:
+    ///
+    /// ```rust
+    /// # use arrow::datatypes::{DataType, Field};
+    /// # use datafusion_common::scalar::{ScalarStructBuilder, ScalarValue};
+    /// // make a nullable field
+    /// let field = Field::new("a", DataType::Int32, true);
+    /// // add a null value for the "a" field
+    /// let sv = ScalarStructBuilder::new()
+    ///   .with_scalar(field, ScalarValue::Int32(None))
+    ///   .build()
+    ///   .unwrap();
+    /// // value is not null, but field is
+    /// assert_eq!(format!("{sv}"), "{a:}");
+    /// ```
     pub fn new_null(fields: impl IntoFields) -> ScalarValue {
         DataType::Struct(fields.into()).try_into().unwrap()
     }