You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by vi...@apache.org on 2022/11/22 08:42:52 UTC
[arrow-rs] branch master updated: Add like_utf8_scalar_dyn kernel (#3146)
This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new de05308c6 Add like_utf8_scalar_dyn kernel (#3146)
de05308c6 is described below
commit de05308c68f091271ffb1e96bf0744698082aedc
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Tue Nov 22 00:42:46 2022 -0800
Add like_utf8_scalar_dyn kernel (#3146)
---
arrow/src/compute/kernels/comparison.rs | 155 ++++++++++++++++++++++++++++++++
1 file changed, 155 insertions(+)
diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs
index 6438acc3b..05c8b7aa6 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -278,6 +278,39 @@ fn like_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
like_scalar_op(left, right, |x| x)
}
+/// Perform SQL `left LIKE right` operation on [`StringArray`] /
+/// [`LargeStringArray`], or [`DictionaryArray`] with values
+/// [`StringArray`]/[`LargeStringArray`] and a scalar.
+///
+/// See the documentation on [`like_utf8`] for more details.
+pub fn like_utf8_scalar_dyn(left: &dyn Array, right: &str) -> Result<BooleanArray> {
+ match left.data_type() {
+ DataType::Utf8 => {
+ let left = as_string_array(left);
+ like_scalar(left, right)
+ }
+ DataType::LargeUtf8 => {
+ let left = as_largestring_array(left);
+ like_scalar(left, right)
+ }
+ DataType::Dictionary(_, _) => {
+ downcast_dictionary_array!(
+ left => {
+ like_dict_scalar(left, right)
+ }
+ t => Err(ArrowError::ComputeError(format!(
+ "Should be DictionaryArray but got: {}", t
+ )))
+ )
+ }
+ _ => {
+ Err(ArrowError::ComputeError(
+ "like_utf8_scalar_dyn only supports Utf8, LargeUtf8 or DictionaryArray with Utf8 or LargeUtf8 values".to_string(),
+ ))
+ }
+ }
+}
+
/// Perform SQL `left LIKE right` operation on [`StringArray`] /
/// [`LargeStringArray`] and a scalar.
///
@@ -4471,6 +4504,14 @@ mod tests {
vec![true, true, false, false]
);
+ test_utf8_scalar!(
+ test_utf8_array_like_scalar_dyn_escape_testing,
+ vec!["varchar(255)", "int(255)", "varchar", "int"],
+ "%(%)%",
+ like_utf8_scalar_dyn,
+ vec![true, true, false, false]
+ );
+
test_utf8_scalar!(
test_utf8_array_like_scalar_escape_regex,
vec![".*", "a", "*"],
@@ -4479,6 +4520,14 @@ mod tests {
vec![true, false, false]
);
+ test_utf8_scalar!(
+ test_utf8_array_like_scalar_dyn_escape_regex,
+ vec![".*", "a", "*"],
+ ".*",
+ like_utf8_scalar_dyn,
+ vec![true, false, false]
+ );
+
test_utf8_scalar!(
test_utf8_array_like_scalar_escape_regex_dot,
vec![".", "a", "*"],
@@ -4487,6 +4536,14 @@ mod tests {
vec![true, false, false]
);
+ test_utf8_scalar!(
+ test_utf8_array_like_scalar_dyn_escape_regex_dot,
+ vec![".", "a", "*"],
+ ".",
+ like_utf8_scalar_dyn,
+ vec![true, false, false]
+ );
+
test_utf8_scalar!(
test_utf8_array_like_scalar,
vec!["arrow", "parquet", "datafusion", "flight"],
@@ -4494,6 +4551,15 @@ mod tests {
like_utf8_scalar,
vec![true, true, false, false]
);
+
+ test_utf8_scalar!(
+ test_utf8_array_like_scalar_dyn,
+ vec!["arrow", "parquet", "datafusion", "flight"],
+ "%ar%",
+ like_utf8_scalar_dyn,
+ vec![true, true, false, false]
+ );
+
test_utf8_scalar!(
test_utf8_array_like_scalar_start,
vec!["arrow", "parrow", "arrows", "arr"],
@@ -4502,6 +4568,14 @@ mod tests {
vec![true, false, true, false]
);
+ test_utf8_scalar!(
+ test_utf8_array_like_scalar_dyn_start,
+ vec!["arrow", "parrow", "arrows", "arr"],
+ "arrow%",
+ like_utf8_scalar_dyn,
+ vec![true, false, true, false]
+ );
+
test_utf8_scalar!(
test_utf8_array_like_scalar_end,
vec!["arrow", "parrow", "arrows", "arr"],
@@ -4510,6 +4584,14 @@ mod tests {
vec![true, true, false, false]
);
+ test_utf8_scalar!(
+ test_utf8_array_like_scalar_dyn_end,
+ vec!["arrow", "parrow", "arrows", "arr"],
+ "%arrow",
+ like_utf8_scalar_dyn,
+ vec![true, true, false, false]
+ );
+
test_utf8_scalar!(
test_utf8_array_like_scalar_equals,
vec!["arrow", "parrow", "arrows", "arr"],
@@ -4518,6 +4600,14 @@ mod tests {
vec![true, false, false, false]
);
+ test_utf8_scalar!(
+ test_utf8_array_like_scalar_dyn_equals,
+ vec!["arrow", "parrow", "arrows", "arr"],
+ "arrow",
+ like_utf8_scalar_dyn,
+ vec![true, false, false, false]
+ );
+
test_utf8_scalar!(
test_utf8_array_like_scalar_one,
vec!["arrow", "arrows", "parrow", "arr"],
@@ -4526,6 +4616,14 @@ mod tests {
vec![false, true, false, false]
);
+ test_utf8_scalar!(
+ test_utf8_array_like_scalar_dyn_one,
+ vec!["arrow", "arrows", "parrow", "arr"],
+ "arrow_",
+ like_utf8_scalar_dyn,
+ vec![false, true, false, false]
+ );
+
test_utf8_scalar!(
test_utf8_scalar_like_escape,
vec!["a%", "a\\x"],
@@ -4534,6 +4632,14 @@ mod tests {
vec![true, false]
);
+ test_utf8_scalar!(
+ test_utf8_scalar_like_dyn_escape,
+ vec!["a%", "a\\x"],
+ "a\\%",
+ like_utf8_scalar_dyn,
+ vec![true, false]
+ );
+
test_utf8_scalar!(
test_utf8_scalar_like_escape_contains,
vec!["ba%", "ba\\x"],
@@ -4542,6 +4648,14 @@ mod tests {
vec![true, false]
);
+ test_utf8_scalar!(
+ test_utf8_scalar_like_dyn_escape_contains,
+ vec!["ba%", "ba\\x"],
+ "%a\\%",
+ like_utf8_scalar_dyn,
+ vec![true, false]
+ );
+
test_utf8!(
test_utf8_scalar_ilike_regex,
vec!["%%%"],
@@ -6138,6 +6252,12 @@ mod tests {
let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
+ let data =
+ vec![Some("Earth"), Some("Fire"), Some("Water"), Some("Air"), None, Some("Air")];
+
+ let dict_arrayref: DictionaryArray<Int8Type> = data.into_iter().collect();
+ let dict_arrayref = Arc::new(dict_arrayref) as ArrayRef;
+
assert_eq!(
like_dict_scalar(&dict_array, "Air").unwrap(),
BooleanArray::from(
@@ -6145,6 +6265,13 @@ mod tests {
),
);
+ assert_eq!(
+ like_utf8_scalar_dyn(&dict_arrayref, "Air").unwrap(),
+ BooleanArray::from(
+ vec![Some(false), Some(false), Some(false), Some(true), None, Some(true)]
+ ),
+ );
+
assert_eq!(
like_dict_scalar(&dict_array, "Wa%").unwrap(),
BooleanArray::from(
@@ -6152,6 +6279,13 @@ mod tests {
),
);
+ assert_eq!(
+ like_utf8_scalar_dyn(&dict_arrayref, "Wa%").unwrap(),
+ BooleanArray::from(
+ vec![Some(false), Some(false), Some(true), Some(false), None, Some(false)]
+ ),
+ );
+
assert_eq!(
like_dict_scalar(&dict_array, "%r").unwrap(),
BooleanArray::from(
@@ -6159,6 +6293,13 @@ mod tests {
),
);
+ assert_eq!(
+ like_utf8_scalar_dyn(&dict_arrayref, "%r").unwrap(),
+ BooleanArray::from(
+ vec![Some(false), Some(false), Some(true), Some(true), None, Some(true)]
+ ),
+ );
+
assert_eq!(
like_dict_scalar(&dict_array, "%i%").unwrap(),
BooleanArray::from(
@@ -6166,12 +6307,26 @@ mod tests {
),
);
+ assert_eq!(
+ like_utf8_scalar_dyn(&dict_arrayref, "%i%").unwrap(),
+ BooleanArray::from(
+ vec![Some(false), Some(true), Some(false), Some(true), None, Some(true)]
+ ),
+ );
+
assert_eq!(
like_dict_scalar(&dict_array, "%a%r%").unwrap(),
BooleanArray::from(
vec![Some(true), Some(false), Some(true), Some(false), None, Some(false)]
),
);
+
+ assert_eq!(
+ like_utf8_scalar_dyn(&dict_arrayref, "%a%r%").unwrap(),
+ BooleanArray::from(
+ vec![Some(true), Some(false), Some(true), Some(false), None, Some(false)]
+ ),
+ );
}
#[test]