You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ja...@apache.org on 2023/06/15 09:58:03 UTC

[arrow-datafusion] branch main updated: fix: from_plan shouldn't use original schema (#6595)

This is an automated email from the ASF dual-hosted git repository.

jakevin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 36123ee018 fix: from_plan shouldn't use original schema (#6595)
36123ee018 is described below

commit 36123ee0183dfc8ce0fcae820a9c69e88c26fcd5
Author: jakevin <ja...@gmail.com>
AuthorDate: Thu Jun 15 17:57:57 2023 +0800

    fix: from_plan shouldn't use original schema (#6595)
---
 datafusion/common/src/dfschema.rs                  |   8 +-
 datafusion/core/tests/sql/expr.rs                  |  15 ++-
 .../core/tests/sqllogictests/test_files/array.slt  | 115 +++++++++++----------
 datafusion/expr/src/utils.rs                       |  11 +-
 4 files changed, 79 insertions(+), 70 deletions(-)

diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index 0416086d81..292c19886b 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -384,8 +384,12 @@ impl DFSchema {
         let self_fields = self.fields().iter();
         let other_fields = other.fields().iter();
         self_fields.zip(other_fields).all(|(f1, f2)| {
-            f1.qualifier() == f2.qualifier()
-                && f1.name() == f2.name()
+            // TODO: resolve field when exist alias
+            // f1.qualifier() == f2.qualifier()
+            //     && f1.name() == f2.name()
+            // column(t1.a) field is "t1"."a"
+            // column(x) as t1.a field is ""."t1.a"
+            f1.qualified_name() == f2.qualified_name()
                 && Self::datatype_is_semantically_equal(f1.data_type(), f2.data_type())
         })
     }
diff --git a/datafusion/core/tests/sql/expr.rs b/datafusion/core/tests/sql/expr.rs
index 6783670545..53552b1695 100644
--- a/datafusion/core/tests/sql/expr.rs
+++ b/datafusion/core/tests/sql/expr.rs
@@ -512,15 +512,22 @@ async fn test_regex_expressions() -> Result<()> {
 
 #[tokio::test]
 async fn test_cast_expressions() -> Result<()> {
+    test_expression!("CAST('0' AS INT)", "0");
+    test_expression!("CAST(NULL AS INT)", "NULL");
+    test_expression!("TRY_CAST('0' AS INT)", "0");
+    test_expression!("TRY_CAST('x' AS INT)", "NULL");
+    Ok(())
+}
+
+#[tokio::test]
+#[ignore]
+// issue: https://github.com/apache/arrow-datafusion/issues/6596
+async fn test_array_cast_expressions() -> Result<()> {
     test_expression!("CAST([1,2,3,4] AS INT[])", "[1, 2, 3, 4]");
     test_expression!(
         "CAST([1,2,3,4] AS NUMERIC(10,4)[])",
         "[1.0000, 2.0000, 3.0000, 4.0000]"
     );
-    test_expression!("CAST('0' AS INT)", "0");
-    test_expression!("CAST(NULL AS INT)", "NULL");
-    test_expression!("TRY_CAST('0' AS INT)", "0");
-    test_expression!("TRY_CAST('x' AS INT)", "NULL");
     Ok(())
 }
 
diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt
index 459046136b..44453546f3 100644
--- a/datafusion/core/tests/sqllogictests/test_files/array.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/array.slt
@@ -61,17 +61,18 @@ select make_array(make_array()), make_array(make_array(make_array()))
 ----
 [[]] [[[]]]
 
+# TODO issue: https://github.com/apache/arrow-datafusion/issues/6596
 # array_append scalar function #1
-query ? rowsort
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\)
 select array_append(make_array(), 4);
-----
-[4]
 
 # array_append scalar function #2
-query ?? rowsort
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\)
 select array_append(make_array(), make_array()), array_append(make_array(), make_array(4));
-----
-[[]] [[4]]
 
 # array_append scalar function #3
 query ??? rowsort
@@ -80,16 +81,16 @@ select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3
 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
 # array_prepend scalar function #1
-query ? rowsort
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\)
 select array_prepend(4, make_array());
-----
-[4]
 
 # array_prepend scalar function #2
-query ?? rowsort
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\)
 select array_prepend(make_array(), make_array()), array_prepend(make_array(4), make_array());
-----
-[[]] [[4]]
 
 # array_prepend scalar function #3
 query ??? rowsort
@@ -98,22 +99,22 @@ select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0,
 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
 # array_fill scalar function #1
-query ??? rowsort
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered:  [...]
 select array_fill(11, make_array(1, 2, 3)), array_fill(3, make_array(2, 3)), array_fill(2, make_array(2));
-----
-[[[11, 11, 11], [11, 11, 11]]] [[3, 3, 3], [3, 3, 3]] [2, 2]
 
 # array_fill scalar function #2
-query ?? rowsort
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered:  [...]
 select array_fill(1, make_array(1, 1, 1)), array_fill(2, make_array(2, 2, 2, 2, 2));
-----
-[[[1]]] [[[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]], [[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]]]
 
 # array_fill scalar function #3
-query ?
+query error DataFusion error: SQL error: TokenizerError\("Unterminated string literal at Line: 2, Column 856"\)
+caused by
+Internal error: Optimizer rule 'simplify_expressions' failed, due to generate a different schema, original schema: DFSchema \{ fields: \[DFField \{ qualifier: None, field: Field \{ name: "array_fill\(Int64\(1\),make_array\(\)\)", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \} \}\], metadata: \{\} \}, new schema: DFSchema \{ fields: \[DF [...]
 select array_fill(1, make_array())
-----
-[]
 
 # array_concat scalar function #1
 query ?? rowsort
@@ -146,10 +147,10 @@ select array_concat(make_array(2, 3), make_array());
 [2, 3]
 
 # array_concat scalar function #6
-query ? rowsort
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\)
 select array_concat(make_array(), make_array(2, 3));
-----
-[2, 3]
 
 # array_position scalar function #1
 query III
@@ -164,10 +165,10 @@ select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), array_position([1, 2,
 4 5 2
 
 # array_positions scalar function
-query III
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: UInt8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to UInt8
 select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1);
-----
-[3, 4] [5] [1, 2, 3]
 
 # array_replace scalar function
 query ???
@@ -176,22 +177,22 @@ select array_replace(make_array(1, 2, 3, 4), 2, 3), array_replace(make_array(1,
 [1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3]
 
 # array_to_string scalar function
-query ???
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Arrow error: Cast error: Cannot cast string '1\-2\-3\-4\-5' to value of Int64 type
 select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|');
-----
-h,e,l,l,o 1-2-3-4-5 1|2|3
 
 # array_to_string scalar function #2
-query ???
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Arrow error: Cast error: Cannot cast string '1\+2\+3\+4\+5\+6' to value of Int64 type
 select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5, 6]], '+'), array_to_string(array_fill(3, [3, 2, 2]), '/\');
-----
-11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3
 
 # array_to_string scalar function #3
-query ?
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert Utf8 to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\)
 select array_to_string(make_array(), ',')
-----
-(empty)
 
 # cardinality scalar function
 query III
@@ -200,10 +201,10 @@ select cardinality(make_array(1, 2, 3, 4, 5)), cardinality([1, 3, 5]), cardinali
 5 3 5
 
 # cardinality scalar function #2
-query II
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered:  [...]
 select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_fill(3, array[3, 2, 3]));
-----
-6 18
 
 # cardinality scalar function #3
 query II
@@ -218,10 +219,10 @@ select trim_array(make_array(1, 2, 3, 4, 5), 2), trim_array(['h', 'e', 'l', 'l',
 [1, 2, 3] [h, e] [1.0]
 
 # trim_array scalar function #2
-query ??
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered:  [...]
 select trim_array([[1, 2], [3, 4], [5, 6]], 2), trim_array(array_fill(4, [3, 4, 2]), 2);
-----
-[[1, 2]] [[[4, 4], [4, 4], [4, 4], [4, 4]]]
 
 # trim_array scalar function #3
 query ?
@@ -254,10 +255,10 @@ select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 2,
 NULL NULL 2
 
 # array_length scalar function #4
-query IIII rowsort
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered:  [...]
 select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3, [3, 2, 5]), 2), array_length(array_fill(3, [3, 2, 5]), 3), array_length(array_fill(3, [3, 2, 5]), 4);
-----
-3 2 5 NULL
 
 # array_length scalar function #5
 query III rowsort
@@ -266,22 +267,22 @@ select array_length(make_array()), array_length(make_array(), 1), array_length(m
 0 0 NULL
 
 # array_dims scalar function
-query III rowsort
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: UInt8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to UInt8
 select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])), array_dims(make_array([[[[1], [2]]]]));
-----
-[3] [2, 2] [1, 1, 1, 2, 1]
 
 # array_dims scalar function #2
-query II rowsort
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered:  [...]
 select array_dims(array_fill(2, [1, 2, 3])), array_dims(array_fill(3, [2, 5, 4]));
-----
-[1, 2, 3] [2, 5, 4]
 
 # array_dims scalar function #3
-query II rowsort
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: UInt8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to UInt8
 select array_dims(make_array()), array_dims(make_array(make_array()))
-----
-[0] [1, 0]
 
 # array_ndims scalar function
 query III rowsort
@@ -290,10 +291,10 @@ select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3, 4]))
 1 2 5
 
 # array_ndims scalar function #2
-query II rowsort
+query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
+caused by
+Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered:  [...]
 select array_ndims(array_fill(1, [1, 2, 3])), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]);
-----
-3 21
 
 # array_ndims scalar function #3
 query II rowsort
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index c2eabea857..05ebab3d99 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -730,13 +730,10 @@ pub fn from_plan(
     inputs: &[LogicalPlan],
 ) -> Result<LogicalPlan> {
     match plan {
-        LogicalPlan::Projection(Projection { schema, .. }) => {
-            Ok(LogicalPlan::Projection(Projection::try_new_with_schema(
-                expr.to_vec(),
-                Arc::new(inputs[0].clone()),
-                schema.clone(),
-            )?))
-        }
+        LogicalPlan::Projection(_) => Ok(LogicalPlan::Projection(Projection::try_new(
+            expr.to_vec(),
+            Arc::new(inputs[0].clone()),
+        )?)),
         LogicalPlan::Dml(DmlStatement {
             table_name,
             table_schema,