You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by "zml1206 (via GitHub)" <gi...@apache.org> on 2023/12/08 14:06:30 UTC

Re: [PR] [SPARK-46228][SQL] Insert window group limit node for limit outside of window [spark]

zml1206 commented on code in PR #44145:
URL: https://github.com/apache/spark/pull/44145#discussion_r1420494712


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala:
##########
@@ -68,10 +72,58 @@ object InferWindowGroupLimit extends Rule[LogicalPlan] with PredicateHelper {
     case _ => false
   }
 
+  /**
+   * All window expressions should not have SizeBasedWindowFunction, all lower/upper of
+   * specifiedWindowFrame is UnboundedPreceding/CurrentRow, and window orderSpec is not foldable,
+   * so that we can safely do the early stop.
+   */
+  private def limitSupport(limit: Int, window: Window): Boolean =
+    limit <= conf.windowGroupLimitThreshold && window.child.maxRows.forall(_ > limit) &&
+      !window.child.isInstanceOf[WindowGroupLimit] &&
+      (window.orderSpec.exists(!_.child.foldable) || window.windowExpressions.forall(isRowFrame)) &&
+      window.windowExpressions.forall {
+        case Alias(WindowExpression(windowFunction, WindowSpecDefinition(_, _,
+        SpecifiedWindowFrame(_, UnboundedPreceding, CurrentRow))), _)
+          if !windowFunction.isInstanceOf[SizeBasedWindowFunction] &&
+            // LimitPushDownThroughWindow have better performance than WindowGroupLimit if the
+            // window function is rank-like and Window partitionSpec is empty.
+            (!support(windowFunction) || window.partitionSpec.nonEmpty) => true
+        case _ => false
+      }
+
+  private def isRowFrame(windowExpression: NamedExpression): Boolean = windowExpression match {
+    case Alias(WindowExpression(_, WindowSpecDefinition(_, _,
+    SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))), _) => true
+    case _ => false
+  }

Review Comment:
   removed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org