You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/03/27 16:37:16 UTC
[spark] branch branch-3.0 updated: [MINOR][DOC] Refine comments of QueryPlan regarding subquery

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 7435f45  [MINOR][DOC] Refine comments of QueryPlan regarding subquery
7435f45 is described below

commit 7435f4543ea6f2b927da6055c1cfb75f4a62f19d
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Fri Mar 27 09:35:35 2020 -0700

    [MINOR][DOC] Refine comments of QueryPlan regarding subquery
    
    ### What changes were proposed in this pull request?
    
    The query plan of Spark SQL is a mutually recursive structure: QueryPlan -> Expression (PlanExpression) -> QueryPlan, but the transformations do not take this into account.
    
    This PR refines the comments of `QueryPlan` to highlight this fact.
    
    ### Why are the changes needed?
    
    better document.
    
    ### Does this PR introduce any user-facing change?
    
    no
    
    ### How was this patch tested?
    
    N/A
    
    Closes #28050 from cloud-fan/comment.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
    (cherry picked from commit 8a5d49610d875c473114781e92300c79e24a53cc)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../spark/sql/catalyst/plans/QueryPlan.scala       | 32 ++++++++++++++--------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 1248266..9f86fb2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -23,6 +23,16 @@ import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode, TreeNodeTag
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
 
+/**
+ * An abstraction of the Spark SQL query plan tree, which can be logical or physical. This class
+ * defines some basic properties of a query plan node, as well as some new transform APIs to
+ * transform the expressions of the plan node.
+ *
+ * Note that, the query plan is a mutually recursive structure:
+ *   QueryPlan -> Expression (subquery) -> QueryPlan
+ * The tree traverse APIs like `transform`, `foreach`, `collect`, etc. that are
+ * inherited from `TreeNode`, do not traverse into query plans inside subqueries.
+ */
 abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanType] {
   self: PlanType =>
 
@@ -133,7 +143,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
 
   /**
    * Returns the result of running [[transformExpressions]] on this node
-   * and all its children.
+   * and all its children. Note that this method skips expressions inside subqueries.
    */
   def transformAllExpressions(rule: PartialFunction[Expression, Expression]): this.type = {
     transform {
@@ -204,7 +214,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
   }
 
   /**
-   * All the subqueries of current plan.
+   * All the top-level subqueries of the current plan node. Nested subqueries are not included.
    */
   def subqueries: Seq[PlanType] = {
     expressions.flatMap(_.collect {
@@ -213,21 +223,21 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
   }
 
   /**
-   * Returns a sequence containing the result of applying a partial function to all elements in this
-   * plan, also considering all the plans in its (nested) subqueries
-   */
-  def collectInPlanAndSubqueries[B](f: PartialFunction[PlanType, B]): Seq[B] =
-    (this +: subqueriesAll).flatMap(_.collect(f))
-
-  /**
-   * Returns a sequence containing the subqueries in this plan, also including the (nested)
-   * subquries in its children
+   * All the subqueries of the current plan node and all its children. Nested subqueries are also
+   * included.
    */
   def subqueriesAll: Seq[PlanType] = {
     val subqueries = this.flatMap(_.subqueries)
     subqueries ++ subqueries.flatMap(_.subqueriesAll)
   }
 
+  /**
+   * Returns a sequence containing the result of applying a partial function to all elements in this
+   * plan, also considering all the plans in its (nested) subqueries
+   */
+  def collectInPlanAndSubqueries[B](f: PartialFunction[PlanType, B]): Seq[B] =
+    (this +: subqueriesAll).flatMap(_.collect(f))
+
   override def innerChildren: Seq[QueryPlan[_]] = subqueries
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org