You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2021/03/04 08:26:17 UTC
[GitHub] [spark] viirya commented on a change in pull request #31728: [SPARK-34609][SQL] Unify resolveExpressionBottomUp and resolveExpressionTopDown

viirya commented on a change in pull request #31728:
URL: https://github.com/apache/spark/pull/31728#discussion_r587253516



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
##########
@@ -1914,49 +1857,126 @@ class Analyzer(override val catalogManager: CatalogManager)
   }
 
   /**
-   * Resolves the attribute, column value and extract value expressions(s) by traversing the
-   * input expression in bottom-up manner. In order to resolve the nested complex type fields
-   * correctly, this function makes use of `throws` parameter to control when to raise an
-   * AnalysisException.
+   * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by
+   * traversing the input expression in top-down manner. It must be top-down because we need to
+   * skip over unbound lambda function expression. The lambda expressions are resolved in a
+   * different place [[ResolveLambdaVariables]].
    *
    * Example :
-   * SELECT a.b FROM t ORDER BY b[0].d
+   * SELECT transform(array(1, 2, 3), (x, i) -> x + i)"
    *
-   * In the above example, in b needs to be resolved before d can be resolved. Given we are
-   * doing a bottom up traversal, it will first attempt to resolve d and fail as b has not
-   * been resolved yet. If `throws` is false, this function will handle the exception by
-   * returning the original attribute. In this case `d` will be resolved in subsequent passes
-   * after `b` is resolved.
+   * In the case above, x and i are resolved as lambda variables in [[ResolveLambdaVariables]].
    */
-  protected[sql] def resolveExpressionBottomUp(
+  private def resolveExpression(
       expr: Expression,
       plan: LogicalPlan,
-      throws: Boolean = false): Expression = {
-    if (expr.resolved) return expr
+      resolveColumnByName: Seq[String] => Option[Expression],
+      resolveColumnByOrdinal: Int => Attribute,
+      trimAlias: Boolean,
+      throws: Boolean): Expression = {
     // Resolve expression in one round.
-    // If throws == false or the desired attribute doesn't exist
-    // (like try to resolve `a.b` but `a` doesn't exist), fail and return the origin one.
-    // Else, throw exception.
-    try {
-      expr transformUp {
-        case GetColumnByOrdinal(ordinal, _) => plan.output(ordinal)
+    def innerResolve(e: Expression, isTopLevel: Boolean): Expression = {
+      if (e.resolved) return e
+      e match {
+        case f: LambdaFunction if !f.bound => f
+        case GetColumnByOrdinal(ordinal, _) => resolveColumnByOrdinal(ordinal)
         case u @ UnresolvedAttribute(nameParts) =>
-          val result =
-            withPosition(u) {
-              plan.resolve(nameParts, resolver)
-                .orElse(resolveLiteralFunction(nameParts, u, plan))
-                .getOrElse(u)
-            }
+          val resolved = withPosition(u) {
+            resolveColumnByName(nameParts)
+              .orElse(resolveLiteralFunction(nameParts, u, plan))
+              .getOrElse(u)
+          }
+          val result = resolved match {
+            // When trimAlias = true, we will trim unnecessary alias of `GetStructField` and
+            // we won't trim the alias of top-level `GetStructField`. Since we will call
+            // CleanupAliases later in Analyzer, trim non top-level unnecessary alias of
+            // `GetStructField` here is safe.
+            case Alias(s: GetStructField, _) if trimAlias && !isTopLevel => s
+            case others => others
+          }
           logDebug(s"Resolving $u to $result")
           result
-        case UnresolvedExtractValue(child, fieldName) if child.resolved =>
-          ExtractValue(child, fieldName, resolver)
+        case u @ UnresolvedExtractValue(child, fieldName) =>
+          val newChild = innerResolve(child, isTopLevel = false)
+          if (newChild.resolved) {
+            ExtractValue(newChild, fieldName, resolver)
+          } else {
+            u.copy(child = newChild)
+          }
+        case _ => e.mapChildren(innerResolve(_, isTopLevel = false))
       }
+    }
+
+    try {
+      innerResolve(expr, isTopLevel = true)
     } catch {
-      case a: AnalysisException if !throws => expr
+      case _: AnalysisException if !throws => expr
     }
   }
 
+  /**
+   * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by the
+   * input plan's output attributes. In order to resolve the nested complex type fields correctly,
+   * this function makes use of `throws` parameter to control when to raise an AnalysisException.
+   *
+   * Example :
+   * SELECT a.b FROM t ORDER BY b[0].d
+   *
+   * In the above example, in b needs to be resolved before d can be resolved. Given we are

Review comment:
       `in b` -> `b`?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org