You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/12/26 21:18:12 UTC

[arrow-datafusion] branch master updated: Extended datatypes & signatures support for `NULLIF` function (#4737)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new a8f1f8abd Extended datatypes & signatures support for `NULLIF` function (#4737)
a8f1f8abd is described below

commit a8f1f8abd833cf38d536bd6f3c59f073a9964a43
Author: Eduard Karacharov <13...@users.noreply.github.com>
AuthorDate: Tue Dec 27 00:18:08 2022 +0300

    Extended datatypes & signatures support for `NULLIF` function (#4737)
    
    * extended nullif datatypes & signatures support
    
    * sqllogictests & type inheritance
---
 .../core/tests/sqllogictests/test_files/nullif.slt |  98 +++++++++++++
 datafusion/expr/src/nullif.rs                      |   2 +
 datafusion/physical-expr/src/expressions/nullif.rs | 151 +++++++++++++++------
 3 files changed, 207 insertions(+), 44 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/nullif.slt b/datafusion/core/tests/sqllogictests/test_files/nullif.slt
new file mode 100644
index 000000000..d6dd92d33
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/test_files/nullif.slt
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+statement ok
+CREATE TABLE test(
+  int_field  INT,
+  bool_field BOOLEAN,
+  text_field TEXT,
+  more_ints  INT
+) as VALUES
+  (1,    true,  'abc',  2),
+  (2,    false, 'def',  2),
+  (3,    NULL,  'ghij', 3),
+  (NULL, NULL,   NULL,  4),
+  (4,    false, 'zxc',  5),
+  (NULL, true,   NULL,  6)
+;
+
+# Arrays tests
+query T
+SELECT NULLIF(int_field, 2) FROM test;
+----
+1
+NULL
+3
+NULL
+4
+NULL
+
+query T
+SELECT NULLIF(bool_field, false) FROM test;
+----
+true
+NULL
+NULL
+NULL
+NULL
+true
+
+query T
+SELECT NULLIF(text_field, 'zxc') FROM test;
+----
+abc
+def
+ghij
+NULL
+NULL
+NULL
+
+query T
+SELECT NULLIF(int_field, more_ints) FROM test;
+----
+1
+NULL
+NULL
+NULL
+4
+NULL
+
+query T
+SELECT NULLIF(3, int_field) FROM test;
+----
+3
+3
+NULL
+3
+3
+3
+
+# Scalar values tests
+query T
+SELECT NULLIF(1, 1);
+----
+NULL
+
+query T
+SELECT NULLIF(1, 3);
+----
+1
+
+query T
+SELECT NULLIF(NULL, NULL);
+----
+NULL
diff --git a/datafusion/expr/src/nullif.rs b/datafusion/expr/src/nullif.rs
index d2ed04206..f17bd793b 100644
--- a/datafusion/expr/src/nullif.rs
+++ b/datafusion/expr/src/nullif.rs
@@ -32,4 +32,6 @@ pub static SUPPORTED_NULLIF_TYPES: &[DataType] = &[
     DataType::Int64,
     DataType::Float32,
     DataType::Float64,
+    DataType::Utf8,
+    DataType::LargeUtf8,
 ];
diff --git a/datafusion/physical-expr/src/expressions/nullif.rs b/datafusion/physical-expr/src/expressions/nullif.rs
index 803068525..73b5a80b6 100644
--- a/datafusion/physical-expr/src/expressions/nullif.rs
+++ b/datafusion/physical-expr/src/expressions/nullif.rs
@@ -15,52 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::sync::Arc;
-
 use arrow::array::Array;
-use arrow::array::*;
 use arrow::compute::eq_dyn;
 use arrow::compute::nullif::nullif;
-use arrow::datatypes::DataType;
-use datafusion_common::{cast::as_boolean_array, DataFusionError, Result};
+use datafusion_common::{cast::as_boolean_array, DataFusionError, Result, ScalarValue};
 use datafusion_expr::ColumnarValue;
 
 use super::binary::array_eq_scalar;
 
-/// Invoke a compute kernel on a primitive array and a Boolean Array
-macro_rules! compute_bool_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        let rr = as_boolean_array($RIGHT).expect("compute_op failed to downcast array");
-        Ok(Arc::new($OP(&ll, &rr)?) as ArrayRef)
-    }};
-}
-
-/// Binary op between primitive and boolean arrays
-macro_rules! primitive_bool_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        match $LEFT.data_type() {
-            DataType::Int8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float64Array),
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for NULLIF/primitive/boolean operator",
-                other
-            ))),
-        }
-    }};
-}
-
 /// Implements NULLIF(expr1, expr2)
 /// Args: 0 - left expr is any array
 ///       1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed.
@@ -79,7 +41,7 @@ pub fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
         (ColumnarValue::Array(lhs), ColumnarValue::Scalar(rhs)) => {
             let cond_array = array_eq_scalar(lhs, rhs)?;
 
-            let array = primitive_bool_array_op!(lhs, &cond_array, nullif)?;
+            let array = nullif(lhs, as_boolean_array(&cond_array)?)?;
 
             Ok(ColumnarValue::Array(array))
         }
@@ -88,17 +50,34 @@ pub fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
             let cond_array = eq_dyn(lhs, rhs)?;
 
             // Now, invoke nullif on the result
-            let array = primitive_bool_array_op!(lhs, &cond_array, nullif)?;
+            let array = nullif(lhs, as_boolean_array(&cond_array)?)?;
+            Ok(ColumnarValue::Array(array))
+        }
+        (ColumnarValue::Scalar(lhs), ColumnarValue::Array(rhs)) => {
+            // Similar to Array-Array case, except of ScalarValue -> Array cast
+            let lhs = lhs.to_array_of_size(rhs.len());
+            let cond_array = eq_dyn(&lhs, rhs)?;
+
+            let array = nullif(&lhs, as_boolean_array(&cond_array)?)?;
             Ok(ColumnarValue::Array(array))
         }
-        _ => Err(DataFusionError::NotImplemented(
-            "nullif does not support a literal as first argument".to_string(),
-        )),
+        (ColumnarValue::Scalar(lhs), ColumnarValue::Scalar(rhs)) => {
+            let val: ScalarValue = match lhs.eq(rhs) {
+                true => lhs.get_datatype().try_into()?,
+                false => lhs.clone(),
+            };
+
+            Ok(ColumnarValue::Scalar(val))
+        }
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use std::sync::Arc;
+
+    use arrow::array::*;
+
     use super::*;
     use datafusion_common::{Result, ScalarValue};
 
@@ -162,4 +141,88 @@ mod tests {
         assert_eq!(expected.as_ref(), result.as_ref());
         Ok(())
     }
+
+    #[test]
+    fn nullif_boolean() -> Result<()> {
+        let a = BooleanArray::from(vec![Some(true), Some(false), None]);
+        let a = ColumnarValue::Array(Arc::new(a));
+
+        let lit_array = ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)));
+
+        let result = nullif_func(&[a, lit_array])?;
+        let result = result.into_array(0);
+
+        let expected =
+            Arc::new(BooleanArray::from(vec![Some(true), None, None])) as ArrayRef;
+
+        assert_eq!(expected.as_ref(), result.as_ref());
+        Ok(())
+    }
+
+    #[test]
+    fn nullif_string() -> Result<()> {
+        let a = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]);
+        let a = ColumnarValue::Array(Arc::new(a));
+
+        let lit_array = ColumnarValue::Scalar(ScalarValue::Utf8(Some("bar".to_string())));
+
+        let result = nullif_func(&[a, lit_array])?;
+        let result = result.into_array(0);
+
+        let expected = Arc::new(StringArray::from(vec![
+            Some("foo"),
+            None,
+            None,
+            Some("baz"),
+        ])) as ArrayRef;
+
+        assert_eq!(expected.as_ref(), result.as_ref());
+        Ok(())
+    }
+
+    #[test]
+    fn nullif_literal_first() -> Result<()> {
+        let a = Int32Array::from(vec![Some(1), Some(2), None, None, Some(3), Some(4)]);
+        let a = ColumnarValue::Array(Arc::new(a));
+
+        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
+
+        let result = nullif_func(&[lit_array, a])?;
+        let result = result.into_array(0);
+
+        let expected = Arc::new(Int32Array::from(vec![
+            Some(2),
+            None,
+            Some(2),
+            Some(2),
+            Some(2),
+            Some(2),
+        ])) as ArrayRef;
+        assert_eq!(expected.as_ref(), result.as_ref());
+        Ok(())
+    }
+
+    #[test]
+    fn nullif_scalar() -> Result<()> {
+        let a_eq = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
+        let b_eq = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
+
+        let result_eq = nullif_func(&[a_eq, b_eq])?;
+        let result_eq = result_eq.into_array(1);
+
+        let expected_eq = Arc::new(Int32Array::from(vec![None])) as ArrayRef;
+
+        assert_eq!(expected_eq.as_ref(), result_eq.as_ref());
+
+        let a_neq = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
+        let b_neq = ColumnarValue::Scalar(ScalarValue::Int32(Some(1i32)));
+
+        let result_neq = nullif_func(&[a_neq, b_neq])?;
+        let result_neq = result_neq.into_array(1);
+
+        let expected_neq = Arc::new(Int32Array::from(vec![Some(2i32)])) as ArrayRef;
+        assert_eq!(expected_neq.as_ref(), result_neq.as_ref());
+
+        Ok(())
+    }
 }