You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/11/15 11:27:52 UTC
[arrow-datafusion] branch master updated: Improve error message for regexp_match 'g' flag (#4203)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 2a1c014c1 Improve error message for regexp_match 'g' flag (#4203)
2a1c014c1 is described below
commit 2a1c014c15bf9d60d635925c7e60b9f63c50f448
Author: Jeffrey <22...@users.noreply.github.com>
AuthorDate: Tue Nov 15 22:27:47 2022 +1100
Improve error message for regexp_match 'g' flag (#4203)
---
datafusion/core/tests/dataframe_functions.rs | 5 -----
datafusion/physical-expr/src/regex_expressions.rs | 21 ++++++++++++++++++++-
2 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/datafusion/core/tests/dataframe_functions.rs b/datafusion/core/tests/dataframe_functions.rs
index 276aa4e16..5b643e128 100644
--- a/datafusion/core/tests/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe_functions.rs
@@ -389,15 +389,10 @@ async fn test_fn_md5() -> Result<()> {
Ok(())
}
-// TODO: tobyhede - Issue #1429
-// https://github.com/apache/arrow-datafusion/issues/1429
-// g flag doesn't compile
#[tokio::test]
#[cfg(feature = "unicode_expressions")]
async fn test_fn_regexp_match() -> Result<()> {
let expr = regexp_match(vec![col("a"), lit("[a-z]")]);
- // The below will fail
- // let expr = regexp_match( vec![col("a"), lit("[a-z]"), lit("g")]);
let expected = vec![
"+-----------------------------------+",
diff --git a/datafusion/physical-expr/src/regex_expressions.rs b/datafusion/physical-expr/src/regex_expressions.rs
index b76bb2c45..a0e754664 100644
--- a/datafusion/physical-expr/src/regex_expressions.rs
+++ b/datafusion/physical-expr/src/regex_expressions.rs
@@ -77,7 +77,13 @@ pub fn regexp_match<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let values = downcast_string_array_arg!(args[0], "string", T);
let regex = downcast_string_array_arg!(args[1], "pattern", T);
let flags = Some(downcast_string_array_arg!(args[2], "flags", T));
- compute::regexp_match(values, regex, flags).map_err(DataFusionError::ArrowError)
+
+ match flags {
+ Some(f) if f.iter().any(|s| s == Some("g")) => {
+ Err(DataFusionError::Plan("regexp_match() does not support the \"global\" option".to_owned()))
+ },
+ _ => compute::regexp_match(values, regex, flags).map_err(DataFusionError::ArrowError),
+ }
}
other => Err(DataFusionError::Internal(format!(
"regexp_match was called with {} arguments. It requires at least 2 and at most 3.",
@@ -397,6 +403,19 @@ mod tests {
assert_eq!(re.as_ref(), &expected);
}
+ #[test]
+ fn test_unsupported_global_flag_regexp_match() {
+ let values = StringArray::from(vec!["abc"]);
+ let patterns = StringArray::from(vec!["^(a)"]);
+ let flags = StringArray::from(vec!["g"]);
+
+ let re_err =
+ regexp_match::<i32>(&[Arc::new(values), Arc::new(patterns), Arc::new(flags)])
+ .expect_err("unsupported flag should have failed");
+
+ assert_eq!(re_err.to_string(), "Error during planning: regexp_match() does not support the \"global\" option");
+ }
+
#[test]
fn test_static_pattern_regexp_replace() {
let values = StringArray::from(vec!["abc"; 5]);