You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2019/05/21 09:19:02 UTC

[GitHub] [spark] francis0407 commented on a change in pull request #24563: [SPARK-27359] [OPTIMIZER] [SQL] Rewrite ArraysOverlap Join

francis0407 commented on a change in pull request #24563: [SPARK-27359] [OPTIMIZER] [SQL] Rewrite ArraysOverlap Join
URL: https://github.com/apache/spark/pull/24563#discussion_r285917693
 
 

 ##########
 File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
 ##########
 @@ -20,13 +20,51 @@ package org.apache.spark.sql.catalyst.optimizer
 import scala.annotation.tailrec
 
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.ExtractFiltersAndInnerJoins
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.internal.SQLConf
 
+
+/**
+ * Replace an expensive array_overlap join with an equivalent equijoin.
+ */
+object RewriteArraysOverlapJoin extends Rule[LogicalPlan] {
+  private def makePrime(p: LogicalPlan, arr: NamedExpression, alias: String) = {
+    val exploded = Alias(Explode(arr), alias)(explicitMetadata = Some(arr.metadata))
+    val generate = ExtractGenerator(
+      Project(p.output :+ exploded, p)
+    )
+    (generate, generate.output.last)
+  }
+  
+  private def isIn(p: LogicalPlan, e: Expression) = p.output.map(_.expr).contains(e)
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case Join(left, right, joinType, Some(ArraysOverlap(arrA: NamedExpression, arrB: NamedExpression))) =>
+      val (leftArray, rightArray) =
+        if (isIn(left, arrA) && isIn(right, arrB)) {
+          (arrA, arrB)
+        } else { // other cases would be caught be the analyzer
 
 Review comment:
   nit: by the analyzer?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org