You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/11/22 08:42:52 UTC

[arrow-rs] branch master updated: Add like_utf8_scalar_dyn kernel (#3146)

This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new de05308c6 Add like_utf8_scalar_dyn kernel (#3146)
de05308c6 is described below

commit de05308c68f091271ffb1e96bf0744698082aedc
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Tue Nov 22 00:42:46 2022 -0800

    Add like_utf8_scalar_dyn kernel (#3146)
---
 arrow/src/compute/kernels/comparison.rs | 155 ++++++++++++++++++++++++++++++++
 1 file changed, 155 insertions(+)

diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs
index 6438acc3b..05c8b7aa6 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -278,6 +278,39 @@ fn like_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
     like_scalar_op(left, right, |x| x)
 }
 
+/// Perform SQL `left LIKE right` operation on [`StringArray`] /
+/// [`LargeStringArray`], or [`DictionaryArray`] with values
+/// [`StringArray`]/[`LargeStringArray`] and a scalar.
+///
+/// See the documentation on [`like_utf8`] for more details.
+pub fn like_utf8_scalar_dyn(left: &dyn Array, right: &str) -> Result<BooleanArray> {
+    match left.data_type() {
+        DataType::Utf8 => {
+            let left = as_string_array(left);
+            like_scalar(left, right)
+        }
+        DataType::LargeUtf8 => {
+            let left = as_largestring_array(left);
+            like_scalar(left, right)
+        }
+        DataType::Dictionary(_, _) => {
+            downcast_dictionary_array!(
+                left => {
+                    like_dict_scalar(left, right)
+                }
+                t => Err(ArrowError::ComputeError(format!(
+                    "Should be DictionaryArray but got: {}", t
+                )))
+            )
+        }
+        _ => {
+            Err(ArrowError::ComputeError(
+                "like_utf8_scalar_dyn only supports Utf8, LargeUtf8 or DictionaryArray with Utf8 or LargeUtf8 values".to_string(),
+            ))
+        }
+    }
+}
+
 /// Perform SQL `left LIKE right` operation on [`StringArray`] /
 /// [`LargeStringArray`] and a scalar.
 ///
@@ -4471,6 +4504,14 @@ mod tests {
         vec![true, true, false, false]
     );
 
+    test_utf8_scalar!(
+        test_utf8_array_like_scalar_dyn_escape_testing,
+        vec!["varchar(255)", "int(255)", "varchar", "int"],
+        "%(%)%",
+        like_utf8_scalar_dyn,
+        vec![true, true, false, false]
+    );
+
     test_utf8_scalar!(
         test_utf8_array_like_scalar_escape_regex,
         vec![".*", "a", "*"],
@@ -4479,6 +4520,14 @@ mod tests {
         vec![true, false, false]
     );
 
+    test_utf8_scalar!(
+        test_utf8_array_like_scalar_dyn_escape_regex,
+        vec![".*", "a", "*"],
+        ".*",
+        like_utf8_scalar_dyn,
+        vec![true, false, false]
+    );
+
     test_utf8_scalar!(
         test_utf8_array_like_scalar_escape_regex_dot,
         vec![".", "a", "*"],
@@ -4487,6 +4536,14 @@ mod tests {
         vec![true, false, false]
     );
 
+    test_utf8_scalar!(
+        test_utf8_array_like_scalar_dyn_escape_regex_dot,
+        vec![".", "a", "*"],
+        ".",
+        like_utf8_scalar_dyn,
+        vec![true, false, false]
+    );
+
     test_utf8_scalar!(
         test_utf8_array_like_scalar,
         vec!["arrow", "parquet", "datafusion", "flight"],
@@ -4494,6 +4551,15 @@ mod tests {
         like_utf8_scalar,
         vec![true, true, false, false]
     );
+
+    test_utf8_scalar!(
+        test_utf8_array_like_scalar_dyn,
+        vec!["arrow", "parquet", "datafusion", "flight"],
+        "%ar%",
+        like_utf8_scalar_dyn,
+        vec![true, true, false, false]
+    );
+
     test_utf8_scalar!(
         test_utf8_array_like_scalar_start,
         vec!["arrow", "parrow", "arrows", "arr"],
@@ -4502,6 +4568,14 @@ mod tests {
         vec![true, false, true, false]
     );
 
+    test_utf8_scalar!(
+        test_utf8_array_like_scalar_dyn_start,
+        vec!["arrow", "parrow", "arrows", "arr"],
+        "arrow%",
+        like_utf8_scalar_dyn,
+        vec![true, false, true, false]
+    );
+
     test_utf8_scalar!(
         test_utf8_array_like_scalar_end,
         vec!["arrow", "parrow", "arrows", "arr"],
@@ -4510,6 +4584,14 @@ mod tests {
         vec![true, true, false, false]
     );
 
+    test_utf8_scalar!(
+        test_utf8_array_like_scalar_dyn_end,
+        vec!["arrow", "parrow", "arrows", "arr"],
+        "%arrow",
+        like_utf8_scalar_dyn,
+        vec![true, true, false, false]
+    );
+
     test_utf8_scalar!(
         test_utf8_array_like_scalar_equals,
         vec!["arrow", "parrow", "arrows", "arr"],
@@ -4518,6 +4600,14 @@ mod tests {
         vec![true, false, false, false]
     );
 
+    test_utf8_scalar!(
+        test_utf8_array_like_scalar_dyn_equals,
+        vec!["arrow", "parrow", "arrows", "arr"],
+        "arrow",
+        like_utf8_scalar_dyn,
+        vec![true, false, false, false]
+    );
+
     test_utf8_scalar!(
         test_utf8_array_like_scalar_one,
         vec!["arrow", "arrows", "parrow", "arr"],
@@ -4526,6 +4616,14 @@ mod tests {
         vec![false, true, false, false]
     );
 
+    test_utf8_scalar!(
+        test_utf8_array_like_scalar_dyn_one,
+        vec!["arrow", "arrows", "parrow", "arr"],
+        "arrow_",
+        like_utf8_scalar_dyn,
+        vec![false, true, false, false]
+    );
+
     test_utf8_scalar!(
         test_utf8_scalar_like_escape,
         vec!["a%", "a\\x"],
@@ -4534,6 +4632,14 @@ mod tests {
         vec![true, false]
     );
 
+    test_utf8_scalar!(
+        test_utf8_scalar_like_dyn_escape,
+        vec!["a%", "a\\x"],
+        "a\\%",
+        like_utf8_scalar_dyn,
+        vec![true, false]
+    );
+
     test_utf8_scalar!(
         test_utf8_scalar_like_escape_contains,
         vec!["ba%", "ba\\x"],
@@ -4542,6 +4648,14 @@ mod tests {
         vec![true, false]
     );
 
+    test_utf8_scalar!(
+        test_utf8_scalar_like_dyn_escape_contains,
+        vec!["ba%", "ba\\x"],
+        "%a\\%",
+        like_utf8_scalar_dyn,
+        vec![true, false]
+    );
+
     test_utf8!(
         test_utf8_scalar_ilike_regex,
         vec!["%%%"],
@@ -6138,6 +6252,12 @@ mod tests {
 
         let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
 
+        let data =
+            vec![Some("Earth"), Some("Fire"), Some("Water"), Some("Air"), None, Some("Air")];
+
+        let dict_arrayref: DictionaryArray<Int8Type> = data.into_iter().collect();
+        let dict_arrayref = Arc::new(dict_arrayref) as ArrayRef;
+
         assert_eq!(
             like_dict_scalar(&dict_array, "Air").unwrap(),
             BooleanArray::from(
@@ -6145,6 +6265,13 @@ mod tests {
             ),
         );
 
+        assert_eq!(
+            like_utf8_scalar_dyn(&dict_arrayref, "Air").unwrap(),
+            BooleanArray::from(
+                vec![Some(false), Some(false), Some(false), Some(true), None, Some(true)]
+            ),
+        );
+
         assert_eq!(
             like_dict_scalar(&dict_array, "Wa%").unwrap(),
             BooleanArray::from(
@@ -6152,6 +6279,13 @@ mod tests {
             ),
         );
 
+        assert_eq!(
+            like_utf8_scalar_dyn(&dict_arrayref, "Wa%").unwrap(),
+            BooleanArray::from(
+                vec![Some(false), Some(false), Some(true), Some(false), None, Some(false)]
+            ),
+        );
+
         assert_eq!(
             like_dict_scalar(&dict_array, "%r").unwrap(),
             BooleanArray::from(
@@ -6159,6 +6293,13 @@ mod tests {
             ),
         );
 
+        assert_eq!(
+            like_utf8_scalar_dyn(&dict_arrayref, "%r").unwrap(),
+            BooleanArray::from(
+                vec![Some(false), Some(false), Some(true), Some(true), None, Some(true)]
+            ),
+        );
+
         assert_eq!(
             like_dict_scalar(&dict_array, "%i%").unwrap(),
             BooleanArray::from(
@@ -6166,12 +6307,26 @@ mod tests {
             ),
         );
 
+        assert_eq!(
+            like_utf8_scalar_dyn(&dict_arrayref, "%i%").unwrap(),
+            BooleanArray::from(
+                vec![Some(false), Some(true), Some(false), Some(true), None, Some(true)]
+            ),
+        );
+
         assert_eq!(
             like_dict_scalar(&dict_array, "%a%r%").unwrap(),
             BooleanArray::from(
                 vec![Some(true), Some(false), Some(true), Some(false), None, Some(false)]
             ),
         );
+
+        assert_eq!(
+            like_utf8_scalar_dyn(&dict_arrayref, "%a%r%").unwrap(),
+            BooleanArray::from(
+                vec![Some(true), Some(false), Some(true), Some(false), None, Some(false)]
+            ),
+        );
     }
 
     #[test]