You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2023/06/21 17:32:48 UTC
[spark] branch master updated: [SPARK-43205][SQL][FOLLOWUP] remove unnecessary abstraction for `withIdentClause`

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a4fb7cceb44 [SPARK-43205][SQL][FOLLOWUP] remove unnecessary abstraction for `withIdentClause`
a4fb7cceb44 is described below

commit a4fb7cceb441ddd30ce6613a27ba9b62402911fd
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Wed Jun 21 10:32:36 2023 -0700

    [SPARK-43205][SQL][FOLLOWUP] remove unnecessary abstraction for `withIdentClause`
    
    ### What changes were proposed in this pull request?
    
    This is a followup of https://github.com/apache/spark/pull/41385 . This PR adds `withFuncIndentClause` for function identifiers, so that the related methods can be simpler.
    
    ### Why are the changes needed?
    
    code cleanup
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    existing tests
    
    Closes #41631 from cloud-fan/followup.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: Gengliang Wang <ge...@apache.org>
---
 .../spark/sql/catalyst/parser/AstBuilder.scala     | 70 ++++++++++++----------
 .../analyzer-results/identifier-clause.sql.out     | 32 +++++-----
 .../sql-tests/inputs/identifier-clause.sql         | 14 ++---
 .../sql-tests/results/identifier-clause.sql.out    | 14 ++---
 4 files changed, 70 insertions(+), 60 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index abfe64f72e7..07721424a86 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -66,31 +66,44 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   protected def withIdentClause(
       ctx: IdentifierReferenceContext,
       builder: Seq[String] => LogicalPlan): LogicalPlan = {
-    withIdentClause(
-      ctx.expression,
-      () => visitMultipartIdentifier(ctx.multipartIdentifier),
-      builder)
+    val exprCtx = ctx.expression
+    if (exprCtx != null) {
+      PlanWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, builder)
+    } else {
+      builder.apply(visitMultipartIdentifier(ctx.multipartIdentifier))
+    }
   }
 
   protected def withIdentClause(
-      ctx: ExpressionContext,
-      getIdent: () => Seq[String],
+      ctx: IdentifierReferenceContext,
+      builder: Seq[String] => Expression): Expression = {
+    val exprCtx = ctx.expression
+    if (exprCtx != null) {
+      ExpressionWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, builder)
+    } else {
+      builder.apply(visitMultipartIdentifier(ctx.multipartIdentifier))
+    }
+  }
+
+  protected def withFuncIdentClause(
+      ctx: FunctionNameContext,
       builder: Seq[String] => LogicalPlan): LogicalPlan = {
-    if (ctx != null) {
-      PlanWithUnresolvedIdentifier(withOrigin(ctx) { expression(ctx) }, builder)
+    val exprCtx = ctx.expression
+    if (exprCtx != null) {
+      PlanWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, builder)
     } else {
-      builder.apply(getIdent())
+      builder.apply(getFunctionMultiparts(ctx))
     }
   }
 
-  protected def withIdentClause(
-      ctx: ExpressionContext,
-      getIdent: () => Seq[String],
+  protected def withFuncIdentClause(
+      ctx: FunctionNameContext,
       builder: Seq[String] => Expression): Expression = {
-    if (ctx != null) {
-      ExpressionWithUnresolvedIdentifier(withOrigin(ctx) { expression(ctx) }, builder)
+    val exprCtx = ctx.expression
+    if (exprCtx != null) {
+      ExpressionWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, builder)
     } else {
-      builder.apply(getIdent())
+      builder.apply(getFunctionMultiparts(ctx))
     }
   }
 
@@ -1538,21 +1551,17 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       Seq.empty
     }
 
-    withIdentClause(
-      func.functionName.expression,
-      () => getFunctionMultiparts(func.functionName),
-      name => {
-        if (name.length > 1) {
-          throw QueryParsingErrors.invalidTableValuedFunctionNameError(name, ctx)
-        }
+    withFuncIdentClause(func.functionName, ident => {
+      if (ident.length > 1) {
+        throw QueryParsingErrors.invalidTableValuedFunctionNameError(ident, ctx)
+      }
 
-        val tvf = UnresolvedTableValuedFunction(name, func.expression.asScala.map(expression).toSeq)
+      val tvf = UnresolvedTableValuedFunction(ident, func.expression.asScala.map(expression).toSeq)
 
-        val tvfAliases = if (aliases.nonEmpty) UnresolvedTVFAliases(name, tvf, aliases) else tvf
+      val tvfAliases = if (aliases.nonEmpty) UnresolvedTVFAliases(ident, tvf, aliases) else tvf
 
-        tvfAliases.optionalMap(func.tableAlias.strictIdentifier)(aliasPlan)
-      }
-    )
+      tvfAliases.optionalMap(func.tableAlias.strictIdentifier)(aliasPlan)
+    })
   }
 
   /**
@@ -2189,9 +2198,10 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     val ignoreNulls =
       Option(ctx.nullsOption).map(_.getType == SqlBaseParser.IGNORE).getOrElse(false)
     val funcCtx = ctx.functionName
-    val func = withIdentClause(funcCtx.expression, () => getFunctionMultiparts(funcCtx), ident => {
-      UnresolvedFunction(ident, arguments, isDistinct, filter, ignoreNulls)
-    })
+    val func = withFuncIdentClause(
+      funcCtx,
+      ident => UnresolvedFunction(ident, arguments, isDistinct, filter, ignoreNulls)
+    )
 
     // Check if the function is evaluated in a windowed context.
     ctx.windowSpec match {
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
index 7fc18dfe04e..367d492905d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
@@ -69,7 +69,7 @@ CreateNamespace true
 
 
 -- !query
-CREATE TABLE s.tab(c1 INT) USING PARQUET
+CREATE TABLE s.tab(c1 INT) USING CSV
 -- !query analysis
 CreateDataSourceTableCommand `spark_catalog`.`s`.`tab`, false
 
@@ -83,7 +83,7 @@ SetNamespaceCommand [s]
 -- !query
 INSERT INTO IDENTIFIER('ta' || 'b') VALUES(1)
 -- !query analysis
-InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/s.db/tab, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/s.db/tab], Append, `spark_catalog`.`s`.`tab`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/s.db/tab), [c1]
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/s.db/tab, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/s.db/tab], Append, `spark_catalog`.`s`.`tab`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/s.db/tab), [c1]
 +- Project [cast(col1#x as int) AS c1#x]
    +- LocalRelation [col1#x]
 
@@ -132,7 +132,7 @@ SELECT * FROM IDENTIFIER('tab')
 -- !query analysis
 Project [c1#x]
 +- SubqueryAlias spark_catalog.s.tab
-   +- Relation spark_catalog.s.tab[c1#x] parquet
+   +- Relation spark_catalog.s.tab[c1#x] csv
 
 
 -- !query
@@ -140,7 +140,7 @@ SELECT * FROM IDENTIFIER('s.tab')
 -- !query analysis
 Project [c1#x]
 +- SubqueryAlias spark_catalog.s.tab
-   +- Relation spark_catalog.s.tab[c1#x] parquet
+   +- Relation spark_catalog.s.tab[c1#x] csv
 
 
 -- !query
@@ -148,7 +148,7 @@ SELECT * FROM IDENTIFIER('`s`.`tab`')
 -- !query analysis
 Project [c1#x]
 +- SubqueryAlias spark_catalog.s.tab
-   +- Relation spark_catalog.s.tab[c1#x] parquet
+   +- Relation spark_catalog.s.tab[c1#x] csv
 
 
 -- !query
@@ -156,7 +156,7 @@ SELECT * FROM IDENTIFIER('t' || 'a' || 'b')
 -- !query analysis
 Project [c1#x]
 +- SubqueryAlias spark_catalog.s.tab
-   +- Relation spark_catalog.s.tab[c1#x] parquet
+   +- Relation spark_catalog.s.tab[c1#x] csv
 
 
 -- !query
@@ -201,7 +201,7 @@ Project [id#xL]
 
 
 -- !query
-CREATE TABLE IDENTIFIER('tab')(c1 INT) USING parquet
+CREATE TABLE IDENTIFIER('tab')(c1 INT) USING CSV
 -- !query analysis
 CreateDataSourceTableCommand `spark_catalog`.`default`.`tab`, false
 
@@ -228,7 +228,7 @@ SetCatalogAndNamespace
 
 
 -- !query
-CREATE TABLE IDENTIFIER('ta' || 'b')(c1 INT) USING parquet
+CREATE TABLE IDENTIFIER('ta' || 'b')(c1 INT) USING CSV
 -- !query analysis
 CreateDataSourceTableCommand `spark_catalog`.`identifier_clauses`.`tab`, false
 
@@ -241,13 +241,13 @@ DropTable true, false
 
 
 -- !query
-CREATE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING parquet
+CREATE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV
 -- !query analysis
 CreateDataSourceTableCommand `spark_catalog`.`identifier_clauses`.`tab`, false
 
 
 -- !query
-REPLACE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING parquet
+REPLACE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
@@ -265,7 +265,7 @@ CACHE TABLE IDENTIFIER('ta' || 'b')
 -- !query analysis
 CacheTable [tab], false, true
    +- SubqueryAlias spark_catalog.identifier_clauses.tab
-      +- Relation spark_catalog.identifier_clauses.tab[c1#x] parquet
+      +- Relation spark_catalog.identifier_clauses.tab[c1#x] csv
 
 
 -- !query
@@ -273,7 +273,7 @@ UNCACHE TABLE IDENTIFIER('ta' || 'b')
 -- !query analysis
 UncacheTable false, true
    +- SubqueryAlias spark_catalog.identifier_clauses.tab
-      +- Relation spark_catalog.identifier_clauses.tab[c1#x] parquet
+      +- Relation spark_catalog.identifier_clauses.tab[c1#x] csv
 
 
 -- !query
@@ -298,7 +298,7 @@ DropNamespace false, false
 
 
 -- !query
-CREATE TABLE tab(c1 INT) USING parquet
+CREATE TABLE tab(c1 INT) USING CSV
 -- !query analysis
 CreateDataSourceTableCommand `spark_catalog`.`default`.`tab`, false
 
@@ -306,7 +306,7 @@ CreateDataSourceTableCommand `spark_catalog`.`default`.`tab`, false
 -- !query
 INSERT INTO tab VALUES (1)
 -- !query analysis
-InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/tab, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/tab], Append, `spark_catalog`.`default`.`tab`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/tab), [c1]
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/tab, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/tab], Append, `spark_catalog`.`default`.`tab`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/tab), [c1]
 +- Project [cast(col1#x as int) AS c1#x]
    +- LocalRelation [col1#x]
 
@@ -316,7 +316,7 @@ SELECT c1 FROM tab
 -- !query analysis
 Project [c1#x]
 +- SubqueryAlias spark_catalog.default.tab
-   +- Relation spark_catalog.default.tab[c1#x] parquet
+   +- Relation spark_catalog.default.tab[c1#x] csv
 
 
 -- !query
@@ -1055,7 +1055,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 
 
 -- !query
-CREATE TABLE tab(IDENTIFIER('c1') INT) USING parquet
+CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
index 93e67411172..a1bd500455d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
@@ -17,7 +17,7 @@ SELECT IDENTIFIER('c' || '1') FROM VALUES(1) AS T(c1);
 
 -- Table references
 CREATE SCHEMA IF NOT EXISTS s;
-CREATE TABLE s.tab(c1 INT) USING PARQUET;
+CREATE TABLE s.tab(c1 INT) USING CSV;
 USE SCHEMA s;
 
 INSERT INTO IDENTIFIER('ta' || 'b') VALUES(1);
@@ -40,22 +40,22 @@ SELECT IDENTIFIER('abs')(-1);
 SELECT * FROM IDENTIFIER('ra' || 'nge')(0, 1);
 
 -- Table DDL
-CREATE TABLE IDENTIFIER('tab')(c1 INT) USING parquet;
+CREATE TABLE IDENTIFIER('tab')(c1 INT) USING CSV;
 DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b');
 
 CREATE SCHEMA identifier_clauses;
 USE identifier_clauses;
-CREATE TABLE IDENTIFIER('ta' || 'b')(c1 INT) USING parquet;
+CREATE TABLE IDENTIFIER('ta' || 'b')(c1 INT) USING CSV;
 DROP TABLE IF EXISTS IDENTIFIER('identifier_clauses.' || 'tab');
-CREATE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING parquet;
-REPLACE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING parquet;
+CREATE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV;
+REPLACE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV;
 CACHE TABLE IDENTIFIER('ta' || 'b');
 UNCACHE TABLE IDENTIFIER('ta' || 'b');
 DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b');
 USE default;
 DROP SCHEMA identifier_clauses;
 
-CREATE TABLE tab(c1 INT) USING parquet;
+CREATE TABLE tab(c1 INT) USING CSV;
 INSERT INTO tab VALUES (1);
 SELECT c1 FROM tab;
 DESCRIBE IDENTIFIER('ta' || 'b');
@@ -135,7 +135,7 @@ SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS
 WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v);
 INSERT INTO tab(IDENTIFIER('c1')) VALUES(1);
 CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1);
-CREATE TABLE tab(IDENTIFIER('c1') INT) USING parquet;
+CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV;
 
 
 
diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
index 97220606317..9410ccc5e54 100644
--- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
@@ -72,7 +72,7 @@ struct<>
 
 
 -- !query
-CREATE TABLE s.tab(c1 INT) USING PARQUET
+CREATE TABLE s.tab(c1 INT) USING CSV
 -- !query schema
 struct<>
 -- !query output
@@ -221,7 +221,7 @@ struct<id:bigint>
 
 
 -- !query
-CREATE TABLE IDENTIFIER('tab')(c1 INT) USING parquet
+CREATE TABLE IDENTIFIER('tab')(c1 INT) USING CSV
 -- !query schema
 struct<>
 -- !query output
@@ -253,7 +253,7 @@ struct<>
 
 
 -- !query
-CREATE TABLE IDENTIFIER('ta' || 'b')(c1 INT) USING parquet
+CREATE TABLE IDENTIFIER('ta' || 'b')(c1 INT) USING CSV
 -- !query schema
 struct<>
 -- !query output
@@ -269,7 +269,7 @@ struct<>
 
 
 -- !query
-CREATE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING parquet
+CREATE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV
 -- !query schema
 struct<>
 -- !query output
@@ -277,7 +277,7 @@ struct<>
 
 
 -- !query
-REPLACE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING parquet
+REPLACE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV
 -- !query schema
 struct<>
 -- !query output
@@ -333,7 +333,7 @@ struct<>
 
 
 -- !query
-CREATE TABLE tab(c1 INT) USING parquet
+CREATE TABLE tab(c1 INT) USING CSV
 -- !query schema
 struct<>
 -- !query output
@@ -1209,7 +1209,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 
 
 -- !query
-CREATE TABLE tab(IDENTIFIER('c1') INT) USING parquet
+CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV
 -- !query schema
 struct<>
 -- !query output


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org