You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2022/07/01 04:23:20 UTC

[spark] branch master updated: [SPARK-39643][SQL] Prohibit subquery expressions in DEFAULT values

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 83c5107ba7f [SPARK-39643][SQL] Prohibit subquery expressions in DEFAULT values
83c5107ba7f is described below

commit 83c5107ba7fc3345f8faf1e2a6e0c20c7da013d6
Author: Daniel Tenedorio <da...@databricks.com>
AuthorDate: Thu Jun 30 21:23:06 2022 -0700

    [SPARK-39643][SQL] Prohibit subquery expressions in DEFAULT values
    
    ### What changes were proposed in this pull request?
    
    Prohibit subquery expressions in DEFAULT values.
    
    ### Why are the changes needed?
    
    This functionality is not part of the original feature idea, but we have not tested it so far. Here we explicitly check and prohibit this syntax.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes.
    
    ### How was this patch tested?
    
    This PR adds new unit tests.
    
    Closes #37035 from dtenedor/no-subquery-exprs-in-defaults.
    
    Authored-by: Daniel Tenedorio <da...@databricks.com>
    Signed-off-by: Gengliang Wang <ge...@apache.org>
---
 .../catalyst/util/ResolveDefaultColumnsUtil.scala  |  5 ++++
 .../spark/sql/errors/QueryCompilationErrors.scala  |  5 ++++
 .../sql/catalyst/catalog/SessionCatalogSuite.scala |  2 +-
 .../org/apache/spark/sql/sources/InsertSuite.scala | 35 ++++++++++++++--------
 4 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
index 2885f986236..2c3b1f35fb4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.{Literal => ExprLiteral}
 import org.apache.spark.sql.catalyst.optimizer.ConstantFolding
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
 import org.apache.spark.sql.connector.catalog.{CatalogManager, FunctionCatalog, Identifier}
 import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -141,6 +142,10 @@ object ResolveDefaultColumns {
             s"${field.name} has a DEFAULT value of $colText which fails to parse as a valid " +
             s"expression: ${ex.getMessage}")
     }
+    // Check invariants before moving on to analysis.
+    if (parsed.containsPattern(PLAN_EXPRESSION)) {
+      throw QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions()
+    }
     // Analyze the parse result.
     val plan = try {
       val analyzer: Analyzer = DefaultColumnAnalyzer
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 7ed5c785771..a909b362f68 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -2476,4 +2476,9 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       s"Failed to execute command because DEFAULT values are not supported for target data " +
         "source with table provider: \"" + dataSource + "\"")
   }
+
+  def defaultValuesMayNotContainSubQueryExpressions(): Throwable = {
+    new AnalysisException(
+      "Failed to execute command because subquery expressions are not allowed in DEFAULT values")
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index da5e07d33c6..1a3566e0c62 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -164,7 +164,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
       }.getMessage.contains("fails to parse as a valid expression"))
       assert(intercept[AnalysisException] {
         ResolveDefaultColumns.analyze(columnD, statementType)
-      }.getMessage.contains("fails to resolve as a valid expression"))
+      }.getMessage.contains("subquery expressions are not allowed in DEFAULT values"))
       assert(intercept[AnalysisException] {
         ResolveDefaultColumns.analyze(columnE, statementType)
       }.getMessage.contains("statement provided a value of incompatible type"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 7370cf4f28b..576611cade5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -1025,7 +1025,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
     object Errors {
       val COMMON_SUBSTRING = " has a DEFAULT value"
       val COLUMN_DEFAULT_NOT_FOUND = "`default` cannot be resolved."
-      val BAD_SUBQUERY = "cannot evaluate expression scalarsubquery() in inline table definition"
+      val BAD_SUBQUERY = "subquery expressions are not allowed in DEFAULT values"
     }
     // The default value fails to analyze.
     withTable("t") {
@@ -1038,21 +1038,20 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       assert(intercept[AnalysisException] {
         sql("create table t(i boolean, s bigint default (select min(x) from badtable)) " +
           "using parquet")
-      }.getMessage.contains(Errors.COMMON_SUBSTRING))
+      }.getMessage.contains(Errors.BAD_SUBQUERY))
     }
     // The default value parses but refers to a table from the catalog.
     withTable("t", "other") {
       sql("create table other(x string) using parquet")
       assert(intercept[AnalysisException] {
         sql("create table t(i boolean, s bigint default (select min(x) from other)) using parquet")
-      }.getMessage.contains(Errors.COMMON_SUBSTRING))
+      }.getMessage.contains(Errors.BAD_SUBQUERY))
     }
     // The default value has an explicit alias. It fails to evaluate when inlined into the VALUES
     // list at the INSERT INTO time.
     withTable("t") {
-      sql("create table t(i boolean default (select false as alias), s bigint) using parquet")
       assert(intercept[AnalysisException] {
-        sql("insert into t values (default, default)")
+        sql("create table t(i boolean default (select false as alias), s bigint) using parquet")
       }.getMessage.contains(Errors.BAD_SUBQUERY))
     }
     // Explicit default values may not participate in complex expressions in the VALUES list.
@@ -1397,6 +1396,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
   test("SPARK-38811 INSERT INTO on columns added with ALTER TABLE ADD COLUMNS: Negative tests") {
     object Errors {
       val COMMON_SUBSTRING = " has a DEFAULT value"
+      val BAD_SUBQUERY = "subquery expressions are not allowed in DEFAULT values"
     }
     // The default value fails to analyze.
     withTable("t") {
@@ -1410,7 +1410,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       sql("create table t(i boolean) using parquet")
       assert(intercept[AnalysisException] {
         sql("alter table t add column s bigint default (select min(x) from badtable)")
-      }.getMessage.contains(Errors.COMMON_SUBSTRING))
+      }.getMessage.contains(Errors.BAD_SUBQUERY))
     }
     // The default value parses but refers to a table from the catalog.
     withTable("t", "other") {
@@ -1418,7 +1418,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       sql("create table t(i boolean) using parquet")
       assert(intercept[AnalysisException] {
         sql("alter table t add column s bigint default (select min(x) from other)")
-      }.getMessage.contains(Errors.COMMON_SUBSTRING))
+      }.getMessage.contains(Errors.BAD_SUBQUERY))
     }
     // The default value parses but the type is not coercible.
     withTable("t") {
@@ -1474,8 +1474,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
   test("SPARK-38838 INSERT INTO with defaults set by ALTER TABLE ALTER COLUMN: negative tests") {
     object Errors {
       val COMMON_SUBSTRING = " has a DEFAULT value"
-      val BAD_SUBQUERY =
-        "cannot evaluate expression CAST(scalarsubquery() AS BIGINT) in inline table definition"
+      val BAD_SUBQUERY = "subquery expressions are not allowed in DEFAULT values"
     }
     val createTable = "create table t(i boolean, s bigint) using parquet"
     val insertDefaults = "insert into t values (default, default)"
@@ -1488,12 +1487,11 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       // The default value analyzes to a table not in the catalog.
       assert(intercept[AnalysisException] {
         sql("alter table t alter column s set default (select min(x) from badtable)")
-      }.getMessage.contains(Errors.COMMON_SUBSTRING))
+      }.getMessage.contains(Errors.BAD_SUBQUERY))
       // The default value has an explicit alias. It fails to evaluate when inlined into the VALUES
       // list at the INSERT INTO time.
-      sql("alter table t alter column s set default (select 42 as alias)")
       assert(intercept[AnalysisException] {
-        sql(insertDefaults)
+        sql("alter table t alter column s set default (select 42 as alias)")
       }.getMessage.contains(Errors.BAD_SUBQUERY))
       // The default value parses but the type is not coercible.
       assert(intercept[AnalysisException] {
@@ -1673,6 +1671,19 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
     }
   }
 
+  test("SPARK-39643 Prohibit subquery expressions in DEFAULT values") {
+    Seq(
+      "create table t(a string default (select 'abc')) using parquet",
+      "create table t(a string default exists(select 42 where true)) using parquet",
+      "create table t(a string default 1 in (select 1 union all select 2)) using parquet"
+    ).foreach { query =>
+      assert(intercept[AnalysisException] {
+        sql(query)
+      }.getMessage.contains(
+        QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions().getMessage))
+    }
+  }
+
   test("Stop task set if FileAlreadyExistsException was thrown") {
     Seq(true, false).foreach { fastFail =>
       withSQLConf("fs.file.impl" -> classOf[FileExistingTestFileSystem].getName,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org