You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/04/28 20:32:46 UTC

[GitHub] [arrow-datafusion] alamb commented on a diff in pull request #2364: introduce `ScalarValue::Null` to df

alamb commented on code in PR #2364:
URL: https://github.com/apache/arrow-datafusion/pull/2364#discussion_r861280076


##########
Cargo.toml:
##########
@@ -38,3 +38,6 @@ exclude = ["datafusion-cli"]
 [profile.release]
 codegen-units = 1
 lto = true
+
+[patch.crates-io]

Review Comment:
   BTW arrow-rs 13.0.0 with these changes should be released sometime early next wee



##########
datafusion/common/src/scalar.rs:
##########
@@ -836,6 +844,10 @@ impl ScalarValue {
                     ScalarValue::iter_to_decimal_array(scalars, precision, scale)?;
                 Arc::new(decimal_array)
             }
+            DataType::Null => {
+                let null_array = ScalarValue::iter_to_null_array(scalars)?;

Review Comment:
   Can you use https://docs.rs/arrow/12.0.0/arrow/array/fn.new_null_array.html instead? 
   
   ```rust
   new_null_array(DataType::Null)
   ``` 
   



##########
datafusion/common/src/scalar.rs:
##########
@@ -39,6 +39,8 @@ use std::{convert::TryFrom, fmt, iter::repeat, sync::Arc};
 /// This is the single-valued counter-part of arrow’s `Array`.
 #[derive(Clone)]
 pub enum ScalarValue {
+    /// represents null
+    Null,

Review Comment:
   👍 



##########
datafusion/common/src/scalar.rs:
##########
@@ -325,6 +329,8 @@ impl std::hash::Hash for ScalarValue {
                 v.hash(state);
                 t.hash(state);
             }
+            // stable hash for Null value

Review Comment:
   👍 



##########
datafusion/common/src/scalar.rs:
##########
@@ -170,6 +172,7 @@ impl PartialEq for ScalarValue {
             (IntervalMonthDayNano(_), _) => false,
             (Struct(v1, t1), Struct(v2, t2)) => v1.eq(v2) && t1.eq(t2),
             (Struct(_, _), _) => false,
+            (Null, _) => false,

Review Comment:
   I think we need to also add the postive case:
   
   ```suggestion
               (Null, Null) => true,
               (Null, _) => false,
   ```



##########
datafusion/common/src/scalar.rs:
##########
@@ -39,6 +39,8 @@ use std::{convert::TryFrom, fmt, iter::repeat, sync::Arc};
 /// This is the single-valued counter-part of arrow’s `Array`.
 #[derive(Clone)]
 pub enum ScalarValue {
+    /// represents null

Review Comment:
   ```suggestion
       /// represents `DataType::Null` (castable to/from any other type)
   ```



##########
datafusion/common/src/scalar.rs:
##########
@@ -1522,6 +1550,7 @@ impl ScalarValue {
                 eq_array_primitive!(array, index, IntervalMonthDayNanoArray, val)
             }
             ScalarValue::Struct(_, _) => unimplemented!(),
+            ScalarValue::Null => false,

Review Comment:
   🤔  



##########
datafusion/expr/src/binary_rule.rs:
##########
@@ -590,8 +590,30 @@ fn eq_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
     numerical_coercion(lhs_type, rhs_type)
         .or_else(|| dictionary_coercion(lhs_type, rhs_type))
         .or_else(|| temporal_coercion(lhs_type, rhs_type))
+        .or_else(|| null_coercion(lhs_type, rhs_type))
 }
 
+/// coercion rules from NULL type. Since NULL can be casted to most of types in arrow,
+/// either lhs or rhs is NULL, if NULL can be casted to type of the other side, the coecion is valid.
+fn null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {

Review Comment:
   very cool



##########
datafusion/core/src/sql/planner.rs:
##########
@@ -1445,7 +1445,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                         SQLExpr::Value(Value::Number(n, _)) => parse_sql_number(&n),
                         SQLExpr::Value(Value::SingleQuotedString(s)) => Ok(lit(s)),
                         SQLExpr::Value(Value::Null) => {
-                            Ok(Expr::Literal(ScalarValue::Utf8(None)))
+                            Ok(Expr::Literal(ScalarValue::Null))

Review Comment:
   🎉 



##########
datafusion/physical-expr/src/expressions/nullif.rs:
##########
@@ -17,7 +17,7 @@
 
 use std::sync::Arc;
 
-use crate::expressions::binary::{eq_decimal, eq_decimal_scalar};
+use crate::expressions::binary::{eq_decimal, eq_decimal_scalar, eq_null};

Review Comment:
   is this needed?



##########
datafusion/common/src/scalar.rs:
##########
@@ -270,6 +273,7 @@ impl PartialOrd for ScalarValue {
                 }
             }
             (Struct(_, _), _) => None,
+            (Null, _) => None,

Review Comment:
   Two nulls are always equal I think
   
   ```suggestion
               (Null, Null) => Some(Equal),
               (Null, _) => None,
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org