You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ch...@apache.org on 2021/10/12 07:35:59 UTC

[incubator-kyuubi] branch master updated: [KYUUBI #1216] Skip zorder if only requires one column

This is an automated email from the ASF dual-hosted git repository.

chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new b26ef2b  [KYUUBI #1216] Skip zorder if only requires one column
b26ef2b is described below

commit b26ef2ba9ef818f2549d7698e5adf6d9be81c9b4
Author: ulysses-you <ul...@gmail.com>
AuthorDate: Tue Oct 12 15:35:50 2021 +0800

    [KYUUBI #1216] Skip zorder if only requires one column
    
    <!--
    Thanks for sending a pull request!
    
    Here are some tips for you:
      1. If this is your first time, please read our contributor guidelines: https://kyuubi.readthedocs.io/en/latest/community/contributions.html
      2. If the PR is related to an issue in https://github.com/apache/incubator-kyuubi/issues, add '[KYUUBI #XXXX]' in your PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'.
      3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][KYUUBI #XXXX] Your PR title ...'.
    -->
    
    ### _Why are the changes needed?_
    <!--
    Please clarify why the changes are needed. For instance,
      1. If you add a feature, you can talk about the use case of it.
      2. If you fix a bug, you can clarify why it is a bug.
    -->
    Improve the perf.
    
    If user only requires one column as zorde columns, it has no meaning to wrap a zorder expression.
    
    ### _How was this patch tested?_
    - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible
    
    - [ ] Add screenshots for manual tests if appropriate
    
    - [ ] [Run test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests) locally before make a pull request
    
    Closes #1216 from ulysses-you/improve-zorder.
    
    Closes #1216
    
    9975fccd [ulysses-you] skip zorder if only requires one column
    
    Authored-by: ulysses-you <ul...@gmail.com>
    Signed-off-by: Cheng Pan <ch...@apache.org>
---
 .../sql/zorder/InsertZorderBeforeWriting.scala     |  7 +++++-
 .../kyuubi/sql/zorder/ZorderSqlAstBuilder.scala    |  7 +++++-
 .../scala/org/apache/spark/sql/ZorderSuite.scala   | 25 ++++++++++++++++++----
 3 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/dev/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/zorder/InsertZorderBeforeWriting.scala b/dev/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/zorder/InsertZorderBeforeWriting.scala
index 75a3445..0982706 100644
--- a/dev/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/zorder/InsertZorderBeforeWriting.scala
+++ b/dev/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/zorder/InsertZorderBeforeWriting.scala
@@ -135,8 +135,13 @@ trait InsertZorderHelper extends Rule[LogicalPlan] {
       plan
     } else {
       val bound = cols.map(attrs(_))
+      val orderExpr = if (bound.length == 1) {
+        bound.head
+      } else {
+        Zorder(bound)
+      }
       Sort(
-        SortOrder(Zorder(bound), Ascending, NullsLast, Seq.empty) :: Nil,
+        SortOrder(orderExpr, Ascending, NullsLast, Seq.empty) :: Nil,
         true,
         plan
       )
diff --git a/dev/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderSqlAstBuilder.scala b/dev/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderSqlAstBuilder.scala
index 179ccb3..f9eb7e4 100644
--- a/dev/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderSqlAstBuilder.scala
+++ b/dev/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderSqlAstBuilder.scala
@@ -76,9 +76,14 @@ class ZorderSqlAstBuilder extends ZorderSqlExtensionsBaseVisitor[AnyRef] {
       .map(UnresolvedAttribute(_))
       .toSeq
 
+    val orderExpr = if (zorderCols.length == 1) {
+      zorderCols.head
+    } else {
+      Zorder(zorderCols)
+    }
     val query =
       Sort(
-        SortOrder(Zorder(zorderCols), Ascending, NullsLast, Seq.empty) :: Nil,
+        SortOrder(orderExpr, Ascending, NullsLast, Seq.empty) :: Nil,
         true,
         Project(Seq(UnresolvedStar(None)), tableWithFilter))
 
diff --git a/dev/kyuubi-extension-spark-3-1/src/test/scala/org/apache/spark/sql/ZorderSuite.scala b/dev/kyuubi-extension-spark-3-1/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
index a94d2ec..ad74984 100644
--- a/dev/kyuubi-extension-spark-3-1/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
+++ b/dev/kyuubi-extension-spark-3-1/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Expression, ExpressionEvalHelper, Literal, NullsLast, SortOrder}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, AttributeReference, Expression, ExpressionEvalHelper, Literal, NullsLast, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, Project, Sort}
 import org.apache.spark.sql.execution.command.CreateDataSourceTableAsSelectCommand
 import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand
@@ -29,7 +29,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 import org.apache.kyuubi.sql.{KyuubiSQLConf, KyuubiSQLExtensionException}
-import org.apache.kyuubi.sql.zorder.Zorder
+import org.apache.kyuubi.sql.zorder.{OptimizeZorderCommand, Zorder}
 
 trait ZorderSuite extends KyuubiSparkSQLExtensionTest with ExpressionEvalHelper {
 
@@ -189,9 +189,14 @@ trait ZorderSuite extends KyuubiSparkSQLExtensionTest with ExpressionEvalHelper
     def checkSort(plan: LogicalPlan): Unit = {
       assert(plan.isInstanceOf[Sort] === resHasSort)
       if (plan.isInstanceOf[Sort]) {
-        val refs = plan.asInstanceOf[Sort].order.head
-          .child.asInstanceOf[Zorder].children.map(_.references.head)
         val colArr = cols.split(",")
+        val refs = if (colArr.length == 1) {
+          plan.asInstanceOf[Sort].order.head
+            .child.asInstanceOf[AttributeReference] :: Nil
+        } else {
+          plan.asInstanceOf[Sort].order.head
+            .child.asInstanceOf[Zorder].children.map(_.references.head)
+        }
         assert(refs.size === colArr.size)
         refs.zip(colArr).foreach { case (ref, col) =>
           assert(ref.name === col.trim)
@@ -484,6 +489,18 @@ trait ZorderSuite extends KyuubiSparkSQLExtensionTest with ExpressionEvalHelper
         Row(Short.MaxValue.toLong + 1, Short.MaxValue) :: Nil
     checkSort(df3, expected3, Array(LongType, ShortType))
   }
+
+  test("skip zorder if only requires one column") {
+    withTable("t") {
+      withSQLConf("spark.sql.hive.convertMetastoreParquet" -> "false") {
+        sql("CREATE TABLE t (c1 int, c2 string) stored as parquet")
+        val order1 = sql("OPTIMIZE t ZORDER BY c1").queryExecution.analyzed
+          .asInstanceOf[OptimizeZorderCommand].query.asInstanceOf[Sort].order.head.child
+        assert(!order1.isInstanceOf[Zorder])
+        assert(order1.isInstanceOf[AttributeReference])
+      }
+    }
+  }
 }
 
 class ZorderWithCodegenEnabledSuite extends ZorderSuite {