You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2022/06/28 14:41:17 UTC

[spark] branch branch-3.3 updated: [SPARK-39570][SQL] Inline table should allow expressions with alias

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 1f6b142e696 [SPARK-39570][SQL] Inline table should allow expressions with alias
1f6b142e696 is described below

commit 1f6b142e6966cbbda08f1a568974734d2d4f6208
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Tue Jun 28 22:40:41 2022 +0800

    [SPARK-39570][SQL] Inline table should allow expressions with alias
    
    ### What changes were proposed in this pull request?
    
    `ResolveInlineTables` requires the column expressions to be foldable, however, `Alias` is not foldable. Inline-table does not use the names in the column expressions, and we should trim aliases before checking foldable. We did something similar in `ResolvePivot`.
    
    ### Why are the changes needed?
    
    To make inline-table handle more cases, and also fixed a regression caused by https://github.com/apache/spark/pull/31844 . After https://github.com/apache/spark/pull/31844 , we always add an alias for function literals like `current_timestamp`, which breaks inline table.
    
    ### Does this PR introduce _any_ user-facing change?
    
    yea, some failed queries can be run after this PR.
    
    ### How was this patch tested?
    
    new tests
    
    Closes #36967 from cloud-fan/bug.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit 1df992f03fd935ac215424576530ab57d1ab939b)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala    |  7 ++-----
 .../sql/catalyst/analysis/ResolveInlineTables.scala      |  5 +++--
 .../sql/catalyst/analysis/ResolveInlineTablesSuite.scala |  6 +++++-
 .../src/test/resources/sql-tests/inputs/inline-table.sql |  6 ++++++
 .../resources/sql-tests/results/inline-table.sql.out     | 16 ++++++++++++++++
 5 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 03f021350a2..37024e15377 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -739,7 +739,7 @@ class Analyzer(override val catalogManager: CatalogManager)
     }
   }
 
-  object ResolvePivot extends Rule[LogicalPlan] {
+  object ResolvePivot extends Rule[LogicalPlan] with AliasHelper {
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
       _.containsPattern(PIVOT), ruleId) {
       case p: Pivot if !p.childrenResolved || !p.aggregates.forall(_.resolved)
@@ -753,10 +753,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         aggregates.foreach(checkValidAggregateExpression)
         // Check all pivot values are literal and match pivot column data type.
         val evalPivotValues = pivotValues.map { value =>
-          val foldable = value match {
-            case Alias(v, _) => v.foldable
-            case _ => value.foldable
-          }
+          val foldable = trimAliases(value).foldable
           if (!foldable) {
             throw QueryCompilationErrors.nonLiteralPivotValError(value)
           }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
index 91d724dc013..b91745a0cca 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import scala.util.control.NonFatal
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.AliasHelper
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.AlwaysProcess
@@ -28,7 +29,7 @@ import org.apache.spark.sql.types.{StructField, StructType}
 /**
  * An analyzer rule that replaces [[UnresolvedInlineTable]] with [[LocalRelation]].
  */
-object ResolveInlineTables extends Rule[LogicalPlan] with CastSupport {
+object ResolveInlineTables extends Rule[LogicalPlan] with CastSupport with AliasHelper {
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
     AlwaysProcess.fn, ruleId) {
     case table: UnresolvedInlineTable if table.expressionsResolved =>
@@ -65,7 +66,7 @@ object ResolveInlineTables extends Rule[LogicalPlan] with CastSupport {
     table.rows.foreach { row =>
       row.foreach { e =>
         // Note that nondeterministic expressions are not supported since they are not foldable.
-        if (!e.resolved || !e.foldable) {
+        if (!e.resolved || !trimAliases(e).foldable) {
           e.failAnalysis(s"cannot evaluate expression ${e.sql} in inline table definition")
         }
       }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
index 16d23153c1c..2e6c6e4eaf4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.{Cast, Literal, Rand}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Literal, Rand}
 import org.apache.spark.sql.catalyst.expressions.aggregate.Count
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.types.{LongType, NullType, TimestampType}
@@ -38,6 +38,10 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter {
     ResolveInlineTables.validateInputEvaluable(
       UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1)))))
 
+    // Alias is OK
+    ResolveInlineTables.validateInputEvaluable(
+      UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(Alias(lit(1), "a")()))))
+
     // nondeterministic (rand) should not work
     intercept[AnalysisException] {
       ResolveInlineTables.validateInputEvaluable(
diff --git a/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql b/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql
index b3ec956cd17..fd8bb2d837d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql
@@ -23,6 +23,12 @@ select * from values ("one", 1), ("two", 2L) as data(a, b);
 -- foldable expressions
 select * from values ("one", 1 + 0), ("two", 1 + 3L) as data(a, b);
 
+-- expressions with alias
+select * from values ("one", 1 as one) as data(a, b);
+
+-- literal functions
+select a from values ("one", current_timestamp) as data(a, b);
+
 -- complex types
 select * from values ("one", array(0, 1)), ("two", array(2, 3)) as data(a, b);
 
diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
index 401d684a55b..d9aa34da6f1 100644
--- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
@@ -73,6 +73,22 @@ one	1
 two	4
 
 
+-- !query
+select * from values ("one", 1 as one) as data(a, b)
+-- !query schema
+struct<a:string,b:int>
+-- !query output
+one	1
+
+
+-- !query
+select a from values ("one", current_timestamp) as data(a, b)
+-- !query schema
+struct<a:string>
+-- !query output
+one
+
+
 -- !query
 select * from values ("one", array(0, 1)), ("two", array(2, 3)) as data(a, b)
 -- !query schema


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org