You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/06/16 20:30:24 UTC
[spark] branch master updated: [SPARK-44071] Define and use Unresolved[Leaf|Unary]Node traits
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 747953eb5c4 [SPARK-44071] Define and use Unresolved[Leaf|Unary]Node traits
747953eb5c4 is described below
commit 747953eb5c46e121faf476a060049f1423ae7e91
Author: Ryan Johnson <ry...@databricks.com>
AuthorDate: Fri Jun 16 23:30:08 2023 +0300
[SPARK-44071] Define and use Unresolved[Leaf|Unary]Node traits
### What changes were proposed in this pull request?
Looking at [unresolved.scala](https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala), catalyst would benefit from an `UnresolvedNode` trait that various `UnresolvedFoo` classes could inherit:
```scala
trait UnresolvedNode extends LogicalPlan {
override def output: Seq[Attribute] = Nil
override lazy val resolved = false
}
```
Today, the code is duplicated in ~20 locations (7 of them in that one file).
### Why are the changes needed?
Reduces redundancy, improves readability, documents programmer intent better.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Mild refactor, existing unit tests suffice.
Closes #41617 from ryan-johnson-databricks/unresolved-node-trait.
Authored-by: Ryan Johnson <ry...@databricks.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
.../sql/catalyst/analysis/RelationTimeTravel.scala | 8 ++--
.../spark/sql/catalyst/analysis/parameters.scala | 10 ++---
.../spark/sql/catalyst/analysis/unresolved.scala | 48 +++++++++-------------
.../sql/catalyst/analysis/v2ResolutionPlans.scala | 32 +++------------
.../spark/sql/catalyst/catalog/interface.scala | 11 ++---
.../plans/logical/basicLogicalOperators.scala | 6 +--
.../sql/catalyst/analysis/AnalysisErrorSuite.scala | 5 +--
7 files changed, 39 insertions(+), 81 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationTimeTravel.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationTimeTravel.scala
index 4daefa816a5..6e0d0998883 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationTimeTravel.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationTimeTravel.scala
@@ -17,8 +17,8 @@
package org.apache.spark.sql.catalyst.analysis
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
-import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.trees.TreePattern.{RELATION_TIME_TRAVEL, TreePattern}
/**
@@ -29,8 +29,6 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.{RELATION_TIME_TRAVEL, Tr
case class RelationTimeTravel(
relation: LogicalPlan,
timestamp: Option[Expression],
- version: Option[String]) extends LeafNode {
- override def output: Seq[Attribute] = Nil
- override lazy val resolved: Boolean = false
+ version: Option[String]) extends UnresolvedLeafNode {
override val nodePatterns: Seq[TreePattern] = Seq(RELATION_TIME_TRAVEL)
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
index 2a31e90465c..a00f9cec92c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
@@ -18,8 +18,8 @@
package org.apache.spark.sql.catalyst.analysis
import org.apache.spark.SparkException
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, LeafExpression, Literal, SubqueryExpression, Unevaluable}
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
+import org.apache.spark.sql.catalyst.expressions.{Expression, LeafExpression, Literal, SubqueryExpression, Unevaluable}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.trees.TreePattern.{PARAMETER, PARAMETERIZED_QUERY, TreePattern, UNRESOLVED_WITH}
import org.apache.spark.sql.errors.QueryErrorsBase
@@ -47,10 +47,10 @@ case class Parameter(name: String) extends LeafExpression with Unevaluable {
* The logical plan representing a parameterized query. It will be removed during analysis after
* the parameters are bind.
*/
-case class ParameterizedQuery(child: LogicalPlan, args: Map[String, Expression]) extends UnaryNode {
+case class ParameterizedQuery(child: LogicalPlan, args: Map[String, Expression])
+ extends UnresolvedUnaryNode {
+
assert(args.nonEmpty)
- override def output: Seq[Attribute] = Nil
- override lazy val resolved = false
final override val nodePatterns: Seq[TreePattern] = Seq(PARAMETERIZED_QUERY)
override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan =
copy(child = newChild)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 6637f550aa7..079d7564623 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -36,6 +36,18 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
class UnresolvedException(function: String)
extends AnalysisException(s"Invalid call to $function on unresolved object")
+/** Parent trait for unresolved node types */
+trait UnresolvedNode extends LogicalPlan {
+ override def output: Seq[Attribute] = Nil
+ override lazy val resolved: Boolean = false
+}
+
+/** Parent trait for unresolved leaf node types */
+trait UnresolvedLeafNode extends LeafNode with UnresolvedNode
+
+/** Parent trait for unresolved unary node types */
+trait UnresolvedUnaryNode extends UnaryNode with UnresolvedNode
+
/**
* A logical plan placeholder that holds the identifier clause string expression. It will be
* replaced by the actual logical plan with the evaluated identifier string.
@@ -43,9 +55,7 @@ class UnresolvedException(function: String)
case class PlanWithUnresolvedIdentifier(
identifierExpr: Expression,
planBuilder: Seq[String] => LogicalPlan)
- extends LeafNode {
- override def output: Seq[Attribute] = Nil
- override lazy val resolved = false
+ extends UnresolvedLeafNode {
final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_IDENTIFIER)
}
@@ -77,7 +87,7 @@ case class UnresolvedRelation(
multipartIdentifier: Seq[String],
options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty(),
override val isStreaming: Boolean = false)
- extends LeafNode with NamedRelation {
+ extends UnresolvedLeafNode with NamedRelation {
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
/** Returns a `.` separated name for this relation. */
@@ -85,10 +95,6 @@ case class UnresolvedRelation(
override def name: String = tableName
- override def output: Seq[Attribute] = Nil
-
- override lazy val resolved = false
-
final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_RELATION)
}
@@ -115,11 +121,9 @@ object UnresolvedRelation {
case class UnresolvedInlineTable(
names: Seq[String],
rows: Seq[Seq[Expression]])
- extends LeafNode {
+ extends UnresolvedLeafNode {
lazy val expressionsResolved: Boolean = rows.forall(_.forall(_.resolved))
- override lazy val resolved = false
- override def output: Seq[Attribute] = Nil
}
/**
@@ -134,11 +138,7 @@ case class UnresolvedInlineTable(
case class UnresolvedTableValuedFunction(
name: Seq[String],
functionArgs: Seq[Expression])
- extends LeafNode {
-
- override def output: Seq[Attribute] = Nil
-
- override lazy val resolved = false
+ extends UnresolvedLeafNode {
final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_TABLE_VALUED_FUNCTION)
}
@@ -174,11 +174,7 @@ object UnresolvedTableValuedFunction {
case class UnresolvedTVFAliases(
name: Seq[String],
child: LogicalPlan,
- outputNames: Seq[String]) extends UnaryNode {
-
- override def output: Seq[Attribute] = Nil
-
- override lazy val resolved = false
+ outputNames: Seq[String]) extends UnresolvedUnaryNode {
final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_TVF_ALIASES)
@@ -634,11 +630,7 @@ case class UnresolvedAlias(
case class UnresolvedSubqueryColumnAliases(
outputColumnNames: Seq[String],
child: LogicalPlan)
- extends UnaryNode {
-
- override def output: Seq[Attribute] = Nil
-
- override lazy val resolved = false
+ extends UnresolvedUnaryNode {
final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_SUBQUERY_COLUMN_ALIAS)
@@ -717,9 +709,7 @@ case class UnresolvedOrdinal(ordinal: Int)
case class UnresolvedHaving(
havingCondition: Expression,
child: LogicalPlan)
- extends UnaryNode {
- override lazy val resolved: Boolean = false
- override def output: Seq[Attribute] = child.output
+ extends UnresolvedUnaryNode {
override protected def withNewChildInternal(newChild: LogicalPlan): UnresolvedHaving =
copy(child = newChild)
final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_HAVING)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index 2d26e281607..cbc08f760fb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -34,11 +34,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
* Holds the name of a namespace that has yet to be looked up in a catalog. It will be resolved to
* [[ResolvedNamespace]] during analysis.
*/
-case class UnresolvedNamespace(multipartIdentifier: Seq[String]) extends LeafNode {
- override lazy val resolved: Boolean = false
-
- override def output: Seq[Attribute] = Nil
-}
+case class UnresolvedNamespace(multipartIdentifier: Seq[String]) extends UnresolvedLeafNode
/**
* Holds the name of a table that has yet to be looked up in a catalog. It will be resolved to
@@ -47,11 +43,7 @@ case class UnresolvedNamespace(multipartIdentifier: Seq[String]) extends LeafNod
case class UnresolvedTable(
multipartIdentifier: Seq[String],
commandName: String,
- relationTypeMismatchHint: Option[String]) extends LeafNode {
- override lazy val resolved: Boolean = false
-
- override def output: Seq[Attribute] = Nil
-}
+ relationTypeMismatchHint: Option[String]) extends UnresolvedLeafNode
/**
* Holds the name of a view that has yet to be looked up. It will be resolved to
@@ -61,11 +53,7 @@ case class UnresolvedView(
multipartIdentifier: Seq[String],
commandName: String,
allowTemp: Boolean,
- relationTypeMismatchHint: Option[String]) extends LeafNode {
- override lazy val resolved: Boolean = false
-
- override def output: Seq[Attribute] = Nil
-}
+ relationTypeMismatchHint: Option[String]) extends UnresolvedLeafNode
/**
* Holds the name of a table or view that has yet to be looked up in a catalog. It will
@@ -75,10 +63,7 @@ case class UnresolvedView(
case class UnresolvedTableOrView(
multipartIdentifier: Seq[String],
commandName: String,
- allowTempView: Boolean) extends LeafNode {
- override lazy val resolved: Boolean = false
- override def output: Seq[Attribute] = Nil
-}
+ allowTempView: Boolean) extends UnresolvedLeafNode
sealed trait PartitionSpec extends LeafExpression with Unevaluable {
override def dataType: DataType = throw new IllegalStateException(
@@ -127,9 +112,7 @@ case class UnresolvedFunctionName(
commandName: String,
requirePersistent: Boolean,
funcTypeMismatchHint: Option[String],
- possibleQualifiedName: Option[Seq[String]] = None) extends LeafNode {
- override lazy val resolved: Boolean = false
- override def output: Seq[Attribute] = Nil
+ possibleQualifiedName: Option[Seq[String]] = None) extends UnresolvedLeafNode {
final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_FUNC)
}
@@ -138,10 +121,7 @@ case class UnresolvedFunctionName(
* be resolved to [[ResolvedIdentifier]] during analysis.
*/
case class UnresolvedIdentifier(nameParts: Seq[String], allowTemp: Boolean = false)
- extends LeafNode {
- override lazy val resolved: Boolean = false
- override def output: Seq[Attribute] = Nil
-}
+ extends UnresolvedLeafNode
/**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index fce820c4927..6b72500f3f6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -30,7 +30,7 @@ import org.json4s.jackson.JsonMethods._
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, SQLConfHelper, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
+import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, UnresolvedLeafNode}
import org.apache.spark.sql.catalyst.catalog.CatalogTable.VIEW_STORING_ANALYZED_PLAN
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Cast, ExprId, Literal}
import org.apache.spark.sql.catalyst.plans.logical._
@@ -790,10 +790,8 @@ object CatalogTypes {
case class UnresolvedCatalogRelation(
tableMeta: CatalogTable,
options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty(),
- override val isStreaming: Boolean = false) extends LeafNode {
+ override val isStreaming: Boolean = false) extends UnresolvedLeafNode {
assert(tableMeta.identifier.database.isDefined)
- override lazy val resolved: Boolean = false
- override def output: Seq[Attribute] = Nil
}
/**
@@ -803,12 +801,9 @@ case class UnresolvedCatalogRelation(
*/
case class TemporaryViewRelation(
tableMeta: CatalogTable,
- plan: Option[LogicalPlan] = None) extends LeafNode {
+ plan: Option[LogicalPlan] = None) extends UnresolvedLeafNode {
require(plan.isEmpty ||
(plan.get.resolved && tableMeta.properties.contains(VIEW_STORING_ANALYZED_PLAN)))
-
- override lazy val resolved: Boolean = false
- override def output: Seq[Attribute] = Nil
}
/**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index a90510fe681..e23966775e9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.plans.logical
import org.apache.spark.sql.catalyst.{AliasIdentifier, SQLConfHelper}
-import org.apache.spark.sql.catalyst.analysis.{AnsiTypeCoercion, MultiInstanceRelation, Resolver, TypeCoercion, TypeCoercionBase}
+import org.apache.spark.sql.catalyst.analysis.{AnsiTypeCoercion, MultiInstanceRelation, Resolver, TypeCoercion, TypeCoercionBase, UnresolvedUnaryNode}
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
import org.apache.spark.sql.catalyst.catalog.CatalogTable.VIEW_STORING_ANALYZED_PLAN
import org.apache.spark.sql.catalyst.expressions._
@@ -1498,12 +1498,10 @@ case class Unpivot(
aliases: Option[Seq[Option[String]]],
variableColumnName: String,
valueColumnNames: Seq[String],
- child: LogicalPlan) extends UnaryNode {
+ child: LogicalPlan) extends UnresolvedUnaryNode {
// There should be no code path that creates `Unpivot` with both set None
assert(ids.isDefined || values.isDefined, "at least one of `ids` and `values` must be defined")
- override lazy val resolved = false // Unpivot will be replaced after being resolved.
- override def output: Seq[Attribute] = Nil
override def metadataOutput: Seq[Attribute] = Nil
final override val nodePatterns: Seq[TreePattern] = Seq(UNPIVOT)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index b657dd55eb7..94bbb9e0caa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -95,10 +95,7 @@ case class TestFunction(
copy(children = newChildren)
}
-case class UnresolvedTestPlan() extends LeafNode {
- override lazy val resolved = false
- override def output: Seq[Attribute] = Nil
-}
+case class UnresolvedTestPlan() extends UnresolvedLeafNode
class AnalysisErrorSuite extends AnalysisTest {
import TestRelations._
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org