You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2023/06/28 03:02:25 UTC

[spark] branch master updated: [SPARK-44206][SQL] DataSet.selectExpr scope Session.active

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 1a1ae1f8cf6 [SPARK-44206][SQL] DataSet.selectExpr scope Session.active
1a1ae1f8cf6 is described below

commit 1a1ae1f8cf6c1ba6f8fc40081db4a0782a54dd66
Author: zml1206 <zh...@gmail.com>
AuthorDate: Wed Jun 28 11:02:09 2023 +0800

    [SPARK-44206][SQL] DataSet.selectExpr scope Session.active
    
    ### What changes were proposed in this pull request?
    `Dataset.selectExpr` are covered by withActive, to scope Session.active.
    
    ### Why are the changes needed?
    [SPARK-30798](https://issues.apache.org/jira/browse/SPARK-30798) mentioned  all SparkSession dataset methods should covered by withActive, but `selectExpr` not.
    For example:
    ```
     val clone = spark.cloneSession()
    clone.conf.set("spark.sql.legacy.interval.enabled", "true")
    // sql1
    clone.sql("select '2023-01-01'+ INTERVAL 1 YEAR as b").show()
    // sql2
    clone.sql("select '2023-01-01' as a").selectExpr("a + INTERVAL 1 YEAR as b").show()
    ```
    sql1 can be executed successfully, sql2 failed.
    Error message:
    ```
    [DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "(a + INTERVAL '1' YEAR)" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("DOUBLE" and "INTERVAL YEAR").; line 1 pos 0;
    'Project [(cast(a#2 as double) + INTERVAL '1' YEAR) AS b#4]
    +- Project [2023-01-01 AS a#2]
       +- OneRowRelation
    
    org.apache.spark.sql.AnalysisException: [DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "(a + INTERVAL '1' YEAR)" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("DOUBLE" and "INTERVAL YEAR").; line 1 pos 0;
    'Project [(cast(a#2 as double) + INTERVAL '1' YEAR) AS b#4]
    +- Project [2023-01-01 AS a#2]
       +- OneRowRelation
    
            at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:73)
            at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$6(CheckAnalysis.scala:280)
            at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$6$adapted(CheckAnalysis.scala:267)
            at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:295)
            at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1(TreeNode.scala:294)
            at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$foreachUp$1$adapted(TreeNode.scala:294)
            at scala.collection.Iterator.foreach(Iterator.scala:943)
            at scala.collection.Iterator.foreach$(Iterator.scala:943)
            at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
            at scala.collection.IterableLike.foreach(IterableLike.scala:74)
            at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
            at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
            at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:294)
            at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$5(CheckAnalysis.scala:267)
            at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$5$adapted(CheckAnalysis.scala:267)
            at scala.collection.immutable.Stream.foreach(Stream.scala:533)
            at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$2(CheckAnalysis.scala:267)
            at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$2$adapted(CheckAnalysis.scala:182)
            at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:295)
            at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis0(CheckAnalysis.scala:182)
            at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis0$(CheckAnalysis.scala:164)
            at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis0(Analyzer.scala:187)
            at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:160)
            at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:150)
            at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:187)
            at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:210)
            at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:330)
            at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:207)
            at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:76)
            at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
            at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:202)
            at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:529)
            at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:202)
            at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:857)
            at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:201)
            at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:76)
            at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:74)
            at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:66)
            at org.apache.spark.sql.Dataset$.$anonfun$ofRows$1(Dataset.scala:91)
            at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:857)
            at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:89)
            at org.apache.spark.sql.Dataset.withPlan(Dataset.scala:4403)
            at org.apache.spark.sql.Dataset.select(Dataset.scala:1624)
            at org.apache.spark.sql.Dataset.selectExpr(Dataset.scala:1658)
            at org.apache.spark.sql.DataFrameSuite.$anonfun$new$729(DataFrameSuite.scala:3632)
            at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
            at org.scalatest.enablers.Timed$$anon$1.timeoutAfter(Timed.scala:127)
            at org.scalatest.concurrent.TimeLimits$.failAfterImpl(TimeLimits.scala:282)
            at org.scalatest.concurrent.TimeLimits.failAfter(TimeLimits.scala:231)
            at org.scalatest.concurrent.TimeLimits.failAfter$(TimeLimits.scala:230)
            at org.apache.spark.SparkFunSuite.failAfter(SparkFunSuite.scala:69)
            at org.apache.spark.SparkFunSuite.$anonfun$test$2(SparkFunSuite.scala:155)
            at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
            at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
            at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
            at org.scalatest.Transformer.apply(Transformer.scala:22)
            at org.scalatest.Transformer.apply(Transformer.scala:20)
            at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:226)
            at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:227)
            at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:224)
            at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:236)
            at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
            at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:236)
            at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:218)
            at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:69)
            at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234)
            at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)
            at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:69)
            at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:269)
            at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
            at scala.collection.immutable.List.foreach(List.scala:431)
            at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
            at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
            at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
            at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:269)
            at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:268)
            at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1564)
            at org.scalatest.Suite.run(Suite.scala:1114)
            at org.scalatest.Suite.run$(Suite.scala:1096)
            at org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1564)
            at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:273)
            at org.scalatest.SuperEngine.runImpl(Engine.scala:535)
            at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:273)
            at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:272)
            at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:69)
            at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
            at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
            at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
            at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:69)
            at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:47)
            at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1321)
            at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1315)
            at scala.collection.immutable.List.foreach(List.scala:431)
            at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1315)
            at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:992)
            at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:970)
            at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1481)
            at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:970)
            at org.scalatest.tools.Runner$.run(Runner.scala:798)
            at org.scalatest.tools.Runner.run(Runner.scala)
            at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2or3(ScalaTestRunner.java:38)
            at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:25)
    ```
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    UT
    
    Closes #41759 from zml1206/SPARK-44206.
    
    Authored-by: zml1206 <zh...@gmail.com>
    Signed-off-by: Kent Yao <ya...@apache.org>
---
 sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala        | 2 +-
 sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 9f204658175..3e0c692b00f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1654,7 +1654,7 @@ class Dataset[T] private[sql](
    * @since 2.0.0
    */
   @scala.annotation.varargs
-  def selectExpr(exprs: String*): DataFrame = {
+  def selectExpr(exprs: String*): DataFrame = sparkSession.withActive {
     select(exprs.map { expr =>
       Column(sparkSession.sessionState.sqlParser.parseExpression(expr))
     }: _*)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 55ea09b5945..99f2ce9706d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -3624,6 +3624,14 @@ class DataFrameSuite extends QueryTest
     val df = Seq("0.5944910").toDF("a")
     checkAnswer(df.selectExpr("cast(a as decimal(7,7)) div 100"), Row(0))
   }
+
+  test("SPARK-44206: Dataset.selectExpr scope Session.active") {
+    val _spark = spark.newSession()
+    _spark.conf.set("spark.sql.legacy.interval.enabled", "true")
+    val df1 = _spark.sql("select '2023-01-01'+ INTERVAL 1 YEAR as b")
+    val df2 = _spark.sql("select '2023-01-01' as a").selectExpr("a + INTERVAL 1 YEAR as b")
+    checkAnswer(df1, df2)
+  }
 }
 
 case class GroupByKey(a: Int, b: Int)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org