You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by yj...@apache.org on 2022/04/18 01:38:45 UTC

[arrow-datafusion] branch master updated: fix string coercion missing in Eq/NotEq operator (#2258)

This is an automated email from the ASF dual-hosted git repository.

yjshen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 22b70b8ee fix string coercion missing in Eq/NotEq operator (#2258)
22b70b8ee is described below

commit 22b70b8eeb10c48e5b853d5cad02bbacd50419ef
Author: DuRipeng <45...@qq.com>
AuthorDate: Mon Apr 18 09:38:40 2022 +0800

    fix string coercion missing in Eq/NotEq operator (#2258)
    
    * fix issue: 'LargeUtf8 = Utf8' can't be evaluated in Eq operator
    
    * add not-eq case
---
 datafusion/core/tests/sql/select.rs                | 38 ++++++++++++++++++++++
 .../physical-expr/src/coercion_rule/binary_rule.rs |  1 +
 2 files changed, 39 insertions(+)

diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs
index b5179d08a..8ce74ebf6 100644
--- a/datafusion/core/tests/sql/select.rs
+++ b/datafusion/core/tests/sql/select.rs
@@ -958,6 +958,44 @@ async fn parallel_query_with_filter() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn query_with_filter_string_type_coercion() {
+    let large_string_array = LargeStringArray::from(vec!["1", "2", "3", "4", "5"]);
+    let schema =
+        Schema::new(vec![Field::new("large_string", DataType::LargeUtf8, false)]);
+    let batch =
+        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(large_string_array)])
+            .unwrap();
+
+    let ctx = SessionContext::new();
+    let table = MemTable::try_new(batch.schema(), vec![vec![batch]]).unwrap();
+    ctx.register_table("t", Arc::new(table)).unwrap();
+    let sql = "select * from t where large_string = '1'";
+    let actual = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+--------------+",
+        "| large_string |",
+        "+--------------+",
+        "| 1            |",
+        "+--------------+",
+    ];
+    assert_batches_eq!(expected, &actual);
+
+    let sql = "select * from t where large_string != '1'";
+    let actual = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+--------------+",
+        "| large_string |",
+        "+--------------+",
+        "| 2            |",
+        "| 3            |",
+        "| 4            |",
+        "| 5            |",
+        "+--------------+",
+    ];
+    assert_batches_eq!(expected, &actual);
+}
+
 #[tokio::test]
 async fn query_empty_table() {
     let ctx = SessionContext::new();
diff --git a/datafusion/physical-expr/src/coercion_rule/binary_rule.rs b/datafusion/physical-expr/src/coercion_rule/binary_rule.rs
index b09fc340f..1233ed980 100644
--- a/datafusion/physical-expr/src/coercion_rule/binary_rule.rs
+++ b/datafusion/physical-expr/src/coercion_rule/binary_rule.rs
@@ -116,6 +116,7 @@ fn comparison_eq_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<Da
     comparison_binary_numeric_coercion(lhs_type, rhs_type)
         .or_else(|| dictionary_coercion(lhs_type, rhs_type))
         .or_else(|| temporal_coercion(lhs_type, rhs_type))
+        .or_else(|| string_coercion(lhs_type, rhs_type))
 }
 
 fn comparison_order_coercion(