You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2023/02/24 18:57:05 UTC
[spark] branch branch-3.3 updated: [SPARK-42286][SPARK-41991][SPARK-42473][3.3][SQL] Fallback to previous codegen code path for complex expr with CAST
This is an automated email from the ASF dual-hosted git repository.
gengliang pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new 11eb66e3dbe [SPARK-42286][SPARK-41991][SPARK-42473][3.3][SQL] Fallback to previous codegen code path for complex expr with CAST
11eb66e3dbe is described below
commit 11eb66e3dbe3fdda5c60b31556cd6504b344b84a
Author: RunyaoChen <ru...@databricks.com>
AuthorDate: Fri Feb 24 10:56:50 2023 -0800
[SPARK-42286][SPARK-41991][SPARK-42473][3.3][SQL] Fallback to previous codegen code path for complex expr with CAST
### What changes were proposed in this pull request?
This PR fixes the internal error `Child is not Cast or ExpressionProxy of Cast` for valid `CaseWhen` expr with `Cast` expr in its branches.
Specifically, after SPARK-39865, an improved error msg for overflow exception during table insert was introduced. The improvement covers `Cast` expr and `ExpressionProxy` expr, but `CaseWhen` and other complex ones are not covered. An example below hits an internal error today.
```
create table t1 as select x FROM values (1), (2), (3) as tab(x);
create table t2 (x Decimal(9, 0));
insert into t2 select 0 - (case when x = 1 then 1 else x end) from t1 where x = 1;
```
To fix the query failure, we decide to fall back to the previous handling if the expr is not a simple `Cast` expr or `ExpressionProxy` expr.
### Why are the changes needed?
To fix the query regression introduced in SPARK-39865.
### Does this PR introduce _any_ user-facing change?
No. We just fall back to the previous error msg if the expression involving `Cast` is not a simple one.
### How was this patch tested?
- Added Unit test.
- Removed one test case for the `Child is not Cast or ExpressionProxy of Cast` internal error, as now we do not check if the child has a `Cast` expression and fall back to the previous error message.
Closes #40140 from RunyaoChen/cast_fix_branch_3.3_cp.
Lead-authored-by: RunyaoChen <ru...@databricks.com>
Co-authored-by: Runyao Chen <ru...@databricks.com>
Signed-off-by: Gengliang Wang <ge...@apache.org>
---
.../spark/sql/catalyst/expressions/Cast.scala | 36 +++++++++++++++++-----
.../org/apache/spark/sql/sources/InsertSuite.scala | 10 ++++++
2 files changed, 39 insertions(+), 7 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 737bce492c4..9028fd85b25 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -2357,21 +2357,43 @@ case class UpCast(child: Expression, target: AbstractDataType, walkedTypePath: S
* Casting a numeric value as another numeric type in store assignment. It can capture the
* arithmetic errors and show proper error messages to users.
*/
-case class CheckOverflowInTableInsert(child: AnsiCast, columnName: String) extends UnaryExpression {
- override protected def withNewChildInternal(newChild: Expression): Expression =
- copy(child = newChild.asInstanceOf[AnsiCast])
+case class CheckOverflowInTableInsert(child: Expression, columnName: String)
+ extends UnaryExpression {
+
+ override protected def withNewChildInternal(newChild: Expression): Expression = {
+ copy(child = newChild)
+ }
+
+ private def getCast: Option[AnsiCast] = child match {
+ case c: AnsiCast =>
+ Some(c)
+ case ExpressionProxy(c: AnsiCast, _, _) =>
+ Some(c)
+ case _ => None
+ }
override def eval(input: InternalRow): Any = try {
child.eval(input)
} catch {
case e: SparkArithmeticException =>
- throw QueryExecutionErrors.castingCauseOverflowErrorInTableInsert(
- child.child.dataType,
- child.dataType,
- columnName)
+ getCast match {
+ case Some(cast) =>
+ throw QueryExecutionErrors.castingCauseOverflowErrorInTableInsert(
+ cast.child.dataType,
+ cast.dataType,
+ columnName)
+ case None => throw e
+ }
}
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+ getCast match {
+ case Some(child) => doGenCodeWithBetterErrorMsg(ctx, ev, child)
+ case None => child.genCode(ctx)
+ }
+ }
+
+ def doGenCodeWithBetterErrorMsg(ctx: CodegenContext, ev: ExprCode, child: AnsiCast): ExprCode = {
val childGen = child.genCode(ctx)
val exceptionClass = classOf[SparkArithmeticException].getCanonicalName
val fromDt =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 679a5eb2661..41ca9903b6e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -1106,6 +1106,16 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
}
}
}
+
+ test("SPARK-42286: Insert into a table select from case when with cast, positive test") {
+ withTable("t1", "t2") {
+ sql("create table t1 (x int) using parquet")
+ sql("insert into t1 values (1), (2)")
+ sql("create table t2 (x Decimal(9, 0)) using parquet")
+ sql("insert into t2 select 0 - (case when x = 1 then 1 else x end) from t1 where x = 1")
+ checkAnswer(spark.table("t2"), Row(-1))
+ }
+ }
}
class FileExistingTestFileSystem extends RawLocalFileSystem {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org