You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by eyalfa <gi...@git.apache.org> on 2018/02/04 22:19:26 UTC

[GitHub] spark pull request #19054: [SPARK-18067] Avoid shuffling child if join keys ...

Github user eyalfa commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19054#discussion_r165860433
  
    --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala ---
    @@ -220,45 +220,99 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
         operator.withNewChildren(children)
       }
     
    +  private def isSubset(biggerSet: Seq[Expression], smallerSet: Seq[Expression]): Boolean =
    +    smallerSet.length <= biggerSet.length &&
    +      smallerSet.forall(x => biggerSet.exists(_.semanticEquals(x)))
    +
    +  /**
    +   * Reorders `leftKeys` and `rightKeys` by aligning `currentOrderOfKeys` to be a prefix of
    +   * `expectedOrderOfKeys`
    +   */
       private def reorder(
           leftKeys: Seq[Expression],
           rightKeys: Seq[Expression],
    -      expectedOrderOfKeys: Seq[Expression],
    -      currentOrderOfKeys: Seq[Expression]): (Seq[Expression], Seq[Expression]) = {
    -    val leftKeysBuffer = ArrayBuffer[Expression]()
    -    val rightKeysBuffer = ArrayBuffer[Expression]()
    +      expectedOrderOfKeys: Seq[Expression], // comes from child's output partitioning
    +      currentOrderOfKeys: Seq[Expression]): // comes from join predicate
    +  (Seq[Expression], Seq[Expression], Seq[Expression], Seq[Expression]) = {
    --- End diff --
    
    can you please add a comment describing the return type? a tuple4 is not such a descriptive type :smiley: 


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org