You are viewing a plain text version of this content. The canonical link for it is here.

Posted to notifications@kyuubi.apache.org by GitBox <gi...@apache.org> on 2022/12/12 09:47:18 UTC

[GitHub] [incubator-kyuubi] cfmcgrady commented on a diff in pull request #3962: Add two conditions to decide if add shuffle before writing

cfmcgrady commented on code in PR #3962:
URL: https://github.com/apache/incubator-kyuubi/pull/3962#discussion_r1045588799


##########
docs/extensions/engines/spark/rules.md:
##########
@@ -68,17 +68,18 @@ Now, you can enjoy the Kyuubi SQL Extension.
 

Review Comment:
   > Kyuubi provides SQL extension out of box. Due to the version compatibility with Apache Spark, currently we only support Apache Spark branch-3.1 (i.e 3.1.1 and 3.1.2).
   
   as we are here, can you also correct the supported version in the L21?



##########
extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/RepartitionBeforeWritingBase.scala:
##########
@@ -108,14 +108,37 @@ abstract class RepartitionBeforeWritingHiveBase extends RepartitionBuilder {
   }
 }
 
-trait RepartitionBeforeWriteHelper {
-  def canInsertRepartitionByExpression(plan: LogicalPlan): Boolean = plan match {
-    case Project(_, child) => canInsertRepartitionByExpression(child)
-    case SubqueryAlias(_, child) => canInsertRepartitionByExpression(child)
-    case Limit(_, _) => false
-    case _: Sort => false
-    case _: RepartitionByExpression => false
-    case _: Repartition => false
-    case _ => true
+trait RepartitionBeforeWriteHelper extends Rule[LogicalPlan] {
+  private def hasBenefit(plan: LogicalPlan): Boolean = {
+    def probablyHasShuffle: Boolean = plan.find {

Review Comment:
   nit: `plan.find {...}.isDefined` -> `plan.exists {...}`



##########
extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/RepartitionBeforeWritingBase.scala:
##########
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.command.CreateDataSourceTableAsSelectCommand
 import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand
 import org.apache.spark.sql.hive.execution.{CreateHiveTableAsSelectCommand, InsertIntoHiveTable, OptimizedCreateHiveTableAsSelectCommand}
-import org.apache.spark.sql.internal.StaticSQLConf
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}

Review Comment:
   nit: unused import



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@kyuubi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@kyuubi.apache.org
For additional commands, e-mail: notifications-help@kyuubi.apache.org