You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2024/02/16 13:07:05 UTC
(arrow-datafusion) branch main updated: Add example for `ScalarStructBuilder::new_null`, fix display for `null` `ScalarValue::Struct` (#9238)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 14cb962718 Add example for `ScalarStructBuilder::new_null`, fix display for `null` `ScalarValue::Struct` (#9238)
14cb962718 is described below
commit 14cb9627184bc1680cac8965533ee8f0b06973b2
Author: Andrew Lamb <an...@nerdnetworks.org>
AuthorDate: Fri Feb 16 08:06:59 2024 -0500
Add example for `ScalarStructBuilder::new_null`, fix display for `null` `ScalarValue::Struct` (#9238)
* Minor: Add example for `ScalarStructBuilder::new_null`
* Fix null display
* fix docs
* tweak
---
datafusion/common/src/scalar/mod.rs | 61 +++++++++++++++++++++++++-
datafusion/common/src/scalar/struct_builder.rs | 36 ++++++++++++++-
2 files changed, 94 insertions(+), 3 deletions(-)
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index 29107ab10e..7e53415090 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -83,7 +83,7 @@ pub use struct_builder::ScalarStructBuilder;
///
/// In general, performance will be better using arrow [`Array`]s rather than
/// [`ScalarValue`], as it is far more efficient to process multiple values at
-/// once (vecctorized processing).
+/// once (vectorized processing).
///
/// # Example
/// ```
@@ -3103,6 +3103,11 @@ impl fmt::Display for ScalarValue {
// ScalarValue Struct should always have a single element
assert_eq!(struct_arr.len(), 1);
+ if struct_arr.null_count() == struct_arr.len() {
+ write!(f, "NULL")?;
+ return Ok(());
+ }
+
let columns = struct_arr.columns();
let fields = struct_arr.fields();
let nulls = struct_arr.nulls();
@@ -3298,6 +3303,7 @@ mod tests {
as_string_array, as_struct_array, as_uint32_array, as_uint64_array,
};
+ use crate::assert_batches_eq;
use arrow::buffer::OffsetBuffer;
use arrow::compute::{is_null, kernels};
use arrow::datatypes::{ArrowNumericType, ArrowPrimitiveType};
@@ -5690,6 +5696,59 @@ mod tests {
check_array(array);
}
+ #[test]
+ fn test_struct_display() {
+ let field_a = Field::new("a", DataType::Int32, true);
+ let field_b = Field::new("b", DataType::Utf8, true);
+
+ let s = ScalarStructBuilder::new()
+ .with_scalar(field_a, ScalarValue::from(1i32))
+ .with_scalar(field_b, ScalarValue::Utf8(None))
+ .build()
+ .unwrap();
+
+ assert_eq!(s.to_string(), "{a:1,b:}");
+
+ let ScalarValue::Struct(arr) = s else {
+ panic!("Expected struct");
+ };
+
+ //verify compared to arrow display
+ let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
+ let expected = [
+ "+-------------+",
+ "| s |",
+ "+-------------+",
+ "| {a: 1, b: } |",
+ "+-------------+",
+ ];
+ assert_batches_eq!(&expected, &[batch]);
+ }
+
+ #[test]
+ fn test_struct_display_null() {
+ let fields = vec![Field::new("a", DataType::Int32, false)];
+ let s = ScalarStructBuilder::new_null(fields);
+ assert_eq!(s.to_string(), "NULL");
+
+ let ScalarValue::Struct(arr) = s else {
+ panic!("Expected struct");
+ };
+
+ //verify compared to arrow display
+ let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
+
+ #[rustfmt::skip]
+ let expected = [
+ "+---+",
+ "| s |",
+ "+---+",
+ "| |",
+ "+---+",
+ ];
+ assert_batches_eq!(&expected, &[batch]);
+ }
+
#[test]
fn test_build_timestamp_millisecond_list() {
let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)];
diff --git a/datafusion/common/src/scalar/struct_builder.rs b/datafusion/common/src/scalar/struct_builder.rs
index 926e100417..1192757e89 100644
--- a/datafusion/common/src/scalar/struct_builder.rs
+++ b/datafusion/common/src/scalar/struct_builder.rs
@@ -39,8 +39,40 @@ impl ScalarStructBuilder {
Self::default()
}
- /// Return a new [`ScalarValue::Struct`] with the specified fields and a
- /// single null value
+ /// Return a new [`ScalarValue::Struct`] with a single `null` value.
+ ///
+ /// Note this is different from a struct where each of the specified fields
+ /// are null (e.g. `{a: NULL}`)
+ ///
+ /// # Example
+ ///
+ /// ```rust
+ /// # use arrow::datatypes::{DataType, Field};
+ /// # use datafusion_common::scalar::ScalarStructBuilder;
+ /// let fields = vec![
+ /// Field::new("a", DataType::Int32, false),
+ /// ];
+ /// let sv = ScalarStructBuilder::new_null(fields);
+ /// // Note this is `NULL`, not `{a: NULL}`
+ /// assert_eq!(format!("{sv}"), "NULL");
+ ///```
+ ///
+ /// To create a struct where the *fields* are null, use `Self::new()` and
+ /// pass null values for each field:
+ ///
+ /// ```rust
+ /// # use arrow::datatypes::{DataType, Field};
+ /// # use datafusion_common::scalar::{ScalarStructBuilder, ScalarValue};
+ /// // make a nullable field
+ /// let field = Field::new("a", DataType::Int32, true);
+ /// // add a null value for the "a" field
+ /// let sv = ScalarStructBuilder::new()
+ /// .with_scalar(field, ScalarValue::Int32(None))
+ /// .build()
+ /// .unwrap();
+ /// // value is not null, but field is
+ /// assert_eq!(format!("{sv}"), "{a:}");
+ /// ```
pub fn new_null(fields: impl IntoFields) -> ScalarValue {
DataType::Struct(fields.into()).try_into().unwrap()
}