You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/11/24 08:19:53 UTC

[GitHub] [arrow-datafusion] ygf11 commented on a diff in pull request #4353: Support type coercion for join on columns

ygf11 commented on code in PR #4353:
URL: https://github.com/apache/arrow-datafusion/pull/4353#discussion_r1031174418


##########
datafusion/optimizer/src/type_coercion.rs:
##########
@@ -1114,4 +1293,145 @@ mod test {
         Ok(())
         // TODO add more test for this
     }
+
+    #[test]
+    fn test_column_join_with_different_data_types() -> Result<()> {
+        let t1 = join_test_table_scan_with_name("t1")?;
+        let t2 = join_test_table_scan_with_name("t2")?;
+
+        let join_plan = create_test_join_plan(
+            t1,
+            t2,
+            JoinType::Inner,
+            (vec![col("a")], vec![col("b")]),
+            None,
+        )?;
+
+        let expected = "Inner Join: t1.a#0 = t2.b\
+        \n  Projection: t1.a, t1.b, t1.c, CAST(t1.a AS Int32) AS t1.a#0\
+        \n    TableScan: t1\
+        \n  TableScan: t2";
+        let rule = TypeCoercion::new();
+        let mut config = OptimizerConfig::default();
+        let plan = rule.optimize(&join_plan, &mut config).unwrap();
+        assert_eq!(&format!("{:?}", plan), expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_join_with_filter_and_type_coercion() -> Result<()> {
+        let t1 = join_test_table_scan_with_name("t1")?;
+        let t2 = join_test_table_scan_with_name("t2")?;
+
+        let join_plan = create_test_join_plan(
+            t1,
+            t2,
+            JoinType::Inner,
+            (vec![col("a")], vec![col("c")]),
+            Some(Expr::Column(Column::new("t1".into(), "a")).lt(lit(10i32))),
+        )?;
+
+        let expected =
+            "Inner Join: t1.a#0 = t2.c Filter: CAST(t1.a AS Int32) < Int32(10)\
+            \n  Projection: t1.a, t1.b, t1.c, CAST(t1.a AS Int64) AS t1.a#0\
+            \n    TableScan: t1\
+            \n  TableScan: t2";
+        let rule = TypeCoercion::new();
+        let mut config = OptimizerConfig::default();
+        let plan = rule.optimize(&join_plan, &mut config).unwrap();
+        assert_eq!(&format!("{:?}", plan), expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_expr_join_with_different_data_types() -> Result<()> {
+        let t1 = join_test_table_scan_with_name("t1")?;
+        let t2 = join_test_table_scan_with_name("t2")?;
+
+        let left_keys = vec![col("a") + lit(1)];
+        let right_keys = vec![col("c") * lit(2)];
+        let join_plan = create_test_join_plan(
+            t1,
+            t2,
+            JoinType::Inner,
+            (left_keys, right_keys),
+            None,
+        )?;
+
+        let expected = "Projection: t1.a, t1.b, t1.c, t2.a, t2.b, t2.c\
+        \n  Inner Join: t1.a + Int32(1)#0 = t2.c * Int32(2)\
+        \n    Projection: t1.a, t1.b, t1.c, t1.a + Int32(1), CAST(t1.a + Int32(1) AS Int64) AS t1.a + Int32(1)#0\
+        \n      Projection: t1.a, t1.b, t1.c, CAST(t1.a AS Int32) + Int32(1)\
+        \n        TableScan: t1\
+        \n    Projection: t2.a, t2.b, t2.c, t2.c * CAST(Int32(2) AS Int64)\
+        \n      TableScan: t2";
+        let rule = TypeCoercion::new();
+        let mut config = OptimizerConfig::default();
+        let plan = rule.optimize(&join_plan, &mut config).unwrap();
+        assert_eq!(&format!("{:?}", plan), expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_multiple_keys_join_type_coercion() -> Result<()> {
+        let t1 = join_test_table_scan_with_name("t1")?;
+        let t2 = join_test_table_scan_with_name("t2")?;
+
+        let join_plan = create_test_join_plan(
+            t1,
+            t2,
+            JoinType::Inner,
+            (
+                vec![col("a"), col("b"), col("c")],
+                vec![col("b"), col("c"), col("a")],
+            ),
+            None,
+        )?;
+
+        let expected = "Inner Join: t1.a#0 = t2.b, t1.b#1 = t2.c, t1.c = t2.a#2\
+        \n  Projection: t1.a, t1.b, t1.c, CAST(t1.a AS Int32) AS t1.a#0, CAST(t1.b AS Int64) AS t1.b#1\
+        \n    TableScan: t1\
+        \n  Projection: t2.a, t2.b, t2.c, CAST(t2.a AS Int64) AS t2.a#2\
+        \n    TableScan: t2";
+        let rule = TypeCoercion::new();
+        let mut config = OptimizerConfig::default();
+        let plan = rule.optimize(&join_plan, &mut config).unwrap();
+        assert_eq!(&format!("{:?}", plan), expected);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_same_key_multiple_time_join() -> Result<()> {
+        let t1 = join_test_table_scan_with_name("t1")?;
+        let t2 = join_test_table_scan_with_name("t2")?;
+
+        // a + 2 happen twice, and both need insert type coercion.
+        let join_plan = create_test_join_plan(
+            t1,
+            t2,
+            JoinType::Inner,
+            (
+                vec![col("a") * lit(2u8), col("a") * lit(2u8)],
+                vec![col("b"), col("c")],
+            ),
+            None,
+        )?;
+
+        let expected = "Projection: t1.a, t1.b, t1.c, t2.a, t2.b, t2.c\
+        \n  Inner Join: t1.a * UInt8(2)#0 = t2.b, t1.a * UInt8(2)#1 = t2.c\
+        \n    Projection: t1.a, t1.b, t1.c, t1.a * UInt8(2), CAST(t1.a * UInt8(2) AS Int32) AS t1.a * UInt8(2)#0, CAST(t1.a * UInt8(2) AS Int64) AS t1.a * UInt8(2)#1\
+        \n      Projection: t1.a, t1.b, t1.c, t1.a * CAST(UInt8(2) AS UInt16)\
+        \n        TableScan: t1\
+        \n    TableScan: t2";
+        let rule = TypeCoercion::new();

Review Comment:
   In this test case, the column happens twice and both need type coercion, I give them an alias to distinguish from each other.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org