You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/11/15 11:27:52 UTC

[arrow-datafusion] branch master updated: Improve error message for regexp_match 'g' flag (#4203)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 2a1c014c1 Improve error message for regexp_match 'g' flag (#4203)
2a1c014c1 is described below

commit 2a1c014c15bf9d60d635925c7e60b9f63c50f448
Author: Jeffrey <22...@users.noreply.github.com>
AuthorDate: Tue Nov 15 22:27:47 2022 +1100

    Improve error message for regexp_match 'g' flag (#4203)
---
 datafusion/core/tests/dataframe_functions.rs      |  5 -----
 datafusion/physical-expr/src/regex_expressions.rs | 21 ++++++++++++++++++++-
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/datafusion/core/tests/dataframe_functions.rs b/datafusion/core/tests/dataframe_functions.rs
index 276aa4e16..5b643e128 100644
--- a/datafusion/core/tests/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe_functions.rs
@@ -389,15 +389,10 @@ async fn test_fn_md5() -> Result<()> {
     Ok(())
 }
 
-// TODO: tobyhede - Issue #1429
-//       https://github.com/apache/arrow-datafusion/issues/1429
-//       g flag doesn't compile
 #[tokio::test]
 #[cfg(feature = "unicode_expressions")]
 async fn test_fn_regexp_match() -> Result<()> {
     let expr = regexp_match(vec![col("a"), lit("[a-z]")]);
-    // The below will fail
-    // let expr = regexp_match( vec![col("a"), lit("[a-z]"), lit("g")]);
 
     let expected = vec![
         "+-----------------------------------+",
diff --git a/datafusion/physical-expr/src/regex_expressions.rs b/datafusion/physical-expr/src/regex_expressions.rs
index b76bb2c45..a0e754664 100644
--- a/datafusion/physical-expr/src/regex_expressions.rs
+++ b/datafusion/physical-expr/src/regex_expressions.rs
@@ -77,7 +77,13 @@ pub fn regexp_match<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             let values = downcast_string_array_arg!(args[0], "string", T);
             let regex = downcast_string_array_arg!(args[1], "pattern", T);
             let flags = Some(downcast_string_array_arg!(args[2], "flags", T));
-            compute::regexp_match(values, regex,  flags).map_err(DataFusionError::ArrowError)
+
+            match flags {
+                Some(f) if f.iter().any(|s| s == Some("g")) => {
+                    Err(DataFusionError::Plan("regexp_match() does not support the \"global\" option".to_owned()))
+                },
+                _ => compute::regexp_match(values, regex, flags).map_err(DataFusionError::ArrowError),
+            }
         }
         other => Err(DataFusionError::Internal(format!(
             "regexp_match was called with {} arguments. It requires at least 2 and at most 3.",
@@ -397,6 +403,19 @@ mod tests {
         assert_eq!(re.as_ref(), &expected);
     }
 
+    #[test]
+    fn test_unsupported_global_flag_regexp_match() {
+        let values = StringArray::from(vec!["abc"]);
+        let patterns = StringArray::from(vec!["^(a)"]);
+        let flags = StringArray::from(vec!["g"]);
+
+        let re_err =
+            regexp_match::<i32>(&[Arc::new(values), Arc::new(patterns), Arc::new(flags)])
+                .expect_err("unsupported flag should have failed");
+
+        assert_eq!(re_err.to_string(), "Error during planning: regexp_match() does not support the \"global\" option");
+    }
+
     #[test]
     fn test_static_pattern_regexp_replace() {
         let values = StringArray::from(vec!["abc"; 5]);