You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2021/07/19 08:23:52 UTC
[spark] branch branch-3.2 updated: [SPARK-36093][SQL]
RemoveRedundantAliases should not change Command's parameter's expression's
name
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new 84a6fa2 [SPARK-36093][SQL] RemoveRedundantAliases should not change Command's parameter's expression's name
84a6fa2 is described below
commit 84a6fa22b3f12c5f775459619967b8611f9a5454
Author: Angerszhuuuu <an...@gmail.com>
AuthorDate: Mon Jul 19 16:22:31 2021 +0800
[SPARK-36093][SQL] RemoveRedundantAliases should not change Command's parameter's expression's name
### What changes were proposed in this pull request?
RemoveRedundantAliases may change DataWritingCommand's parameter's attribute name.
In the UT's case before RemoveRedundantAliases the partitionColumns is `CAL_DT`, and change by RemoveRedundantAliases and change to `cal_dt` then case the error case
### Why are the changes needed?
Fix bug
### Does this PR introduce _any_ user-facing change?
For below SQL case
```
sql("create table t1(cal_dt date) using parquet")
sql("insert into t1 values (date'2021-06-27'),(date'2021-06-28'),(date'2021-06-29'),(date'2021-06-30')")
sql("create view t1_v as select * from t1")
sql("CREATE TABLE t2 USING PARQUET PARTITIONED BY (CAL_DT) AS SELECT 1 AS FLAG,CAL_DT FROM t1_v WHERE CAL_DT BETWEEN '2021-06-27' AND '2021-06-28'")
sql("INSERT INTO t2 SELECT 2 AS FLAG,CAL_DT FROM t1_v WHERE CAL_DT BETWEEN '2021-06-29' AND '2021-06-30'")
```
Before this pr
```
sql("SELECT * FROM t2 WHERE CAL_DT BETWEEN '2021-06-29' AND '2021-06-30'").show
+----+------+
|FLAG|CAL_DT|
+----+------+
+----+------+
sql("SELECT * FROM t2 ").show
+----+----------+
|FLAG| CAL_DT|
+----+----------+
| 1|2021-06-27|
| 1|2021-06-28|
+----+----------+
```
After this pr
```
sql("SELECT * FROM t2 WHERE CAL_DT BETWEEN '2021-06-29' AND '2021-06-30'").show
+----+------+
|FLAG|CAL_DT|
+----+------+
| 2|2021-06-29|
| 2|2021-06-30|
+----+------+
sql("SELECT * FROM t2 ").show
+----+----------+
|FLAG| CAL_DT|
+----+----------+
| 1|2021-06-27|
| 1|2021-06-28|
| 2|2021-06-29|
| 2|2021-06-30|
+----+----------+
```
### How was this patch tested?
Added UT
Closes #33324 from AngersZhuuuu/SPARK-36093.
Authored-by: Angerszhuuuu <an...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit 313f3c546016ea87f040e755ac88f7fe58deff95)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../spark/sql/catalyst/optimizer/Optimizer.scala | 2 +-
.../scala/org/apache/spark/sql/SQLQuerySuite.scala | 39 ++++++++++++++++++++--
2 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index c79fd7a..aa2221b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -501,7 +501,7 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] {
// Transform the expressions.
newNode.mapExpressions { expr =>
clean(expr.transform {
- case a: Attribute => mapping.getOrElse(a, a)
+ case a: Attribute => mapping.get(a).map(_.withName(a.name)).getOrElse(a)
})
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 1e23c11..ed3b479 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -33,12 +33,12 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{Complete, Partial}
import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, NestedColumnAliasingSuite}
import org.apache.spark.sql.catalyst.plans.logical.{LocalLimit, Project, RepartitionByExpression, Sort}
import org.apache.spark.sql.catalyst.util.StringUtils
-import org.apache.spark.sql.execution.UnionExec
+import org.apache.spark.sql.execution.{CommandResultExec, UnionExec}
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
-import org.apache.spark.sql.execution.command.FunctionsCommand
-import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.command.{DataWritingCommandExec, FunctionsCommand}
+import org.apache.spark.sql.execution.datasources.{InsertIntoHadoopFsRelationCommand, LogicalRelation}
import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
@@ -4169,6 +4169,39 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
Row(1, 2, 1, 2) :: Nil)
}
}
+
+ test("SPARK-36093: RemoveRedundantAliases should not change expression's name") {
+ withTable("t1", "t2") {
+ withView("t1_v") {
+ sql("CREATE TABLE t1(cal_dt DATE) USING PARQUET")
+ sql(
+ """
+ |INSERT INTO t1 VALUES
+ |(date'2021-06-27'),
+ |(date'2021-06-28'),
+ |(date'2021-06-29'),
+ |(date'2021-06-30')""".stripMargin)
+ sql("CREATE VIEW t1_v AS SELECT * FROM t1")
+ sql(
+ """
+ |CREATE TABLE t2(FLAG INT, CAL_DT DATE)
+ |USING PARQUET
+ |PARTITIONED BY (CAL_DT)""".stripMargin)
+ val insert = sql(
+ """
+ |INSERT INTO t2 SELECT 2 AS FLAG,CAL_DT FROM t1_v
+ |WHERE CAL_DT BETWEEN '2021-06-29' AND '2021-06-30'""".stripMargin)
+ insert.queryExecution.executedPlan.collectFirst {
+ case CommandResultExec(_, DataWritingCommandExec(
+ i: InsertIntoHadoopFsRelationCommand, _), _) => i
+ }.get.partitionColumns.map(_.name).foreach(name => assert(name == "CAL_DT"))
+ checkAnswer(sql("SELECT FLAG, CAST(CAL_DT as STRING) FROM t2 "),
+ Row(2, "2021-06-29") :: Row(2, "2021-06-30") :: Nil)
+ checkAnswer(sql("SHOW PARTITIONS t2"),
+ Row("CAL_DT=2021-06-29") :: Row("CAL_DT=2021-06-30") :: Nil)
+ }
+ }
+ }
}
case class Foo(bar: Option[String])
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org