You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/03/27 16:37:16 UTC
[spark] branch branch-3.0 updated: [MINOR][DOC] Refine comments of
QueryPlan regarding subquery
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 7435f45 [MINOR][DOC] Refine comments of QueryPlan regarding subquery
7435f45 is described below
commit 7435f4543ea6f2b927da6055c1cfb75f4a62f19d
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Fri Mar 27 09:35:35 2020 -0700
[MINOR][DOC] Refine comments of QueryPlan regarding subquery
### What changes were proposed in this pull request?
The query plan of Spark SQL is a mutually recursive structure: QueryPlan -> Expression (PlanExpression) -> QueryPlan, but the transformations do not take this into account.
This PR refines the comments of `QueryPlan` to highlight this fact.
### Why are the changes needed?
better document.
### Does this PR introduce any user-facing change?
no
### How was this patch tested?
N/A
Closes #28050 from cloud-fan/comment.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
(cherry picked from commit 8a5d49610d875c473114781e92300c79e24a53cc)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../spark/sql/catalyst/plans/QueryPlan.scala | 32 ++++++++++++++--------
1 file changed, 21 insertions(+), 11 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 1248266..9f86fb2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -23,6 +23,16 @@ import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode, TreeNodeTag
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{DataType, StructType}
+/**
+ * An abstraction of the Spark SQL query plan tree, which can be logical or physical. This class
+ * defines some basic properties of a query plan node, as well as some new transform APIs to
+ * transform the expressions of the plan node.
+ *
+ * Note that, the query plan is a mutually recursive structure:
+ * QueryPlan -> Expression (subquery) -> QueryPlan
+ * The tree traverse APIs like `transform`, `foreach`, `collect`, etc. that are
+ * inherited from `TreeNode`, do not traverse into query plans inside subqueries.
+ */
abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanType] {
self: PlanType =>
@@ -133,7 +143,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
/**
* Returns the result of running [[transformExpressions]] on this node
- * and all its children.
+ * and all its children. Note that this method skips expressions inside subqueries.
*/
def transformAllExpressions(rule: PartialFunction[Expression, Expression]): this.type = {
transform {
@@ -204,7 +214,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
}
/**
- * All the subqueries of current plan.
+ * All the top-level subqueries of the current plan node. Nested subqueries are not included.
*/
def subqueries: Seq[PlanType] = {
expressions.flatMap(_.collect {
@@ -213,21 +223,21 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
}
/**
- * Returns a sequence containing the result of applying a partial function to all elements in this
- * plan, also considering all the plans in its (nested) subqueries
- */
- def collectInPlanAndSubqueries[B](f: PartialFunction[PlanType, B]): Seq[B] =
- (this +: subqueriesAll).flatMap(_.collect(f))
-
- /**
- * Returns a sequence containing the subqueries in this plan, also including the (nested)
- * subquries in its children
+ * All the subqueries of the current plan node and all its children. Nested subqueries are also
+ * included.
*/
def subqueriesAll: Seq[PlanType] = {
val subqueries = this.flatMap(_.subqueries)
subqueries ++ subqueries.flatMap(_.subqueriesAll)
}
+ /**
+ * Returns a sequence containing the result of applying a partial function to all elements in this
+ * plan, also considering all the plans in its (nested) subqueries
+ */
+ def collectInPlanAndSubqueries[B](f: PartialFunction[PlanType, B]): Seq[B] =
+ (this +: subqueriesAll).flatMap(_.collect(f))
+
override def innerChildren: Seq[QueryPlan[_]] = subqueries
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org