You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by xu...@apache.org on 2022/04/09 10:57:01 UTC

[arrow-datafusion] branch master updated: fix 'not' expression will 'NULL' constants (#2144)

This is an automated email from the ASF dual-hosted git repository.

xudong963 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 2d908405f fix 'not' expression will 'NULL' constants (#2144)
2d908405f is described below

commit 2d908405f177947663322a6a8c300fd2781a0e01
Author: DuRipeng <45...@qq.com>
AuthorDate: Sat Apr 9 18:56:55 2022 +0800

    fix 'not' expression will 'NULL' constants (#2144)
    
    Co-authored-by: duripeng <du...@baidu.com>
---
 datafusion/core/src/physical_plan/planner.rs    | 12 ++++---
 datafusion/core/tests/sql/expr.rs               | 39 ++++++++++++++++++++++
 datafusion/physical-expr/src/expressions/not.rs | 44 +++++++++----------------
 3 files changed, 61 insertions(+), 34 deletions(-)

diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs
index 273ec0a85..32fa12fb9 100644
--- a/datafusion/core/src/physical_plan/planner.rs
+++ b/datafusion/core/src/physical_plan/planner.rs
@@ -1014,10 +1014,12 @@ pub fn create_physical_expr(
             input_schema,
             data_type.clone(),
         ),
-        Expr::Not(expr) => expressions::not(
-            create_physical_expr(expr, input_dfschema, input_schema, execution_props)?,
+        Expr::Not(expr) => expressions::not(create_physical_expr(
+            expr,
+            input_dfschema,
             input_schema,
-        ),
+            execution_props,
+        )?),
         Expr::Negative(expr) => expressions::negative(
             create_physical_expr(expr, input_dfschema, input_schema, execution_props)?,
             input_schema,
@@ -1096,7 +1098,7 @@ pub fn create_physical_expr(
             );
 
             if *negated {
-                expressions::not(binary_expr?, input_schema)
+                expressions::not(binary_expr?)
             } else {
                 binary_expr
             }
@@ -1535,7 +1537,7 @@ mod tests {
             &schema,
             &make_session_state(),
         )?;
-        let expected = expressions::not(expressions::col("a", &schema)?, &schema)?;
+        let expected = expressions::not(expressions::col("a", &schema)?)?;
 
         assert_eq!(format!("{:?}", expr), format!("{:?}", expected));
 
diff --git a/datafusion/core/tests/sql/expr.rs b/datafusion/core/tests/sql/expr.rs
index 2b12b3743..565af3203 100644
--- a/datafusion/core/tests/sql/expr.rs
+++ b/datafusion/core/tests/sql/expr.rs
@@ -333,6 +333,45 @@ async fn test_string_concat_operator() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_not_expressions() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    let sql = "SELECT not(true), not(false)";
+    let actual = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------------------+--------------------+",
+        "| NOT Boolean(true) | NOT Boolean(false) |",
+        "+-------------------+--------------------+",
+        "| false             | true               |",
+        "+-------------------+--------------------+",
+    ];
+    assert_batches_eq!(expected, &actual);
+
+    let sql = "SELECT null, not(null)";
+    let actual = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+------------+----------------+",
+        "| Utf8(NULL) | NOT Utf8(NULL) |",
+        "+------------+----------------+",
+        "|            |                |",
+        "+------------+----------------+",
+    ];
+    assert_batches_eq!(expected, &actual);
+
+    let sql = "SELECT NOT('hi')";
+    let result = plan_and_collect(&ctx, sql).await;
+    match result {
+        Ok(_) => panic!("expected error"),
+        Err(e) => {
+            assert_contains!(e.to_string(),
+                             "NOT 'Literal { value: Utf8(\"hi\") }' can't be evaluated because the expression's type is Utf8, not boolean or NULL"
+            );
+        }
+    }
+    Ok(())
+}
+
 #[tokio::test]
 async fn test_boolean_expressions() -> Result<()> {
     test_expression!("true", "true");
diff --git a/datafusion/physical-expr/src/expressions/not.rs b/datafusion/physical-expr/src/expressions/not.rs
index fd0fbd1c6..a7fba60ec 100644
--- a/datafusion/physical-expr/src/expressions/not.rs
+++ b/datafusion/physical-expr/src/expressions/not.rs
@@ -69,8 +69,8 @@ impl PhysicalExpr for NotExpr {
     }
 
     fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let arg = self.arg.evaluate(batch)?;
-        match arg {
+        let evaluate_arg = self.arg.evaluate(batch)?;
+        match evaluate_arg {
             ColumnarValue::Array(array) => {
                 let array =
                     array
@@ -86,6 +86,16 @@ impl PhysicalExpr for NotExpr {
                 )))
             }
             ColumnarValue::Scalar(scalar) => {
+                if scalar.is_null() {
+                    return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
+                }
+                let value_type = scalar.get_datatype();
+                if value_type != DataType::Boolean {
+                    return Err(DataFusionError::Internal(format!(
+                        "NOT '{:?}' can't be evaluated because the expression's type is {:?}, not boolean or NULL",
+                        self.arg, value_type,
+                    )));
+                }
                 let bool_value: bool = scalar.try_into()?;
                 Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(
                     !bool_value,
@@ -96,23 +106,8 @@ impl PhysicalExpr for NotExpr {
 }
 
 /// Creates a unary expression NOT
-///
-/// # Errors
-///
-/// This function errors when the argument's type is not boolean
-pub fn not(
-    arg: Arc<dyn PhysicalExpr>,
-    input_schema: &Schema,
-) -> Result<Arc<dyn PhysicalExpr>> {
-    let data_type = arg.data_type(input_schema)?;
-    if data_type != DataType::Boolean {
-        Err(DataFusionError::Internal(format!(
-            "NOT '{:?}' can't be evaluated because the expression's type is {:?}, not boolean",
-            arg, data_type,
-        )))
-    } else {
-        Ok(Arc::new(NotExpr::new(arg)))
-    }
+pub fn not(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
+    Ok(Arc::new(NotExpr::new(arg)))
 }
 
 #[cfg(test)]
@@ -126,7 +121,7 @@ mod tests {
     fn neg_op() -> Result<()> {
         let schema = Schema::new(vec![Field::new("a", DataType::Boolean, true)]);
 
-        let expr = not(col("a", &schema)?, &schema)?;
+        let expr = not(col("a", &schema)?)?;
         assert_eq!(expr.data_type(&schema)?, DataType::Boolean);
         assert!(expr.nullable(&schema)?);
 
@@ -145,13 +140,4 @@ mod tests {
 
         Ok(())
     }
-
-    /// verify that expression errors when the input expression is not a boolean.
-    #[test]
-    fn neg_op_not_null() {
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-
-        let expr = not(col("a", &schema).unwrap(), &schema);
-        assert!(expr.is_err());
-    }
 }