You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ya...@apache.org on 2022/08/24 06:06:53 UTC

[incubator-kyuubi] branch master updated: [KYUUBI #3128] Support CostMode for PlanOnlyStatement

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new 8870183ed [KYUUBI #3128] Support CostMode for PlanOnlyStatement
8870183ed is described below

commit 8870183ed70f531f73abfd80f15640486a4fabef
Author: wangjunbo <wa...@qiyi.com>
AuthorDate: Wed Aug 24 14:06:40 2022 +0800

    [KYUUBI #3128] Support CostMode for PlanOnlyStatement
    
    ### _Why are the changes needed?_
    close https://github.com/apache/incubator-kyuubi/issues/3128
    
    ### _How was this patch tested?_
    - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible
    
    - [ ] Add screenshots for manual tests if appropriate
    
    - [x] [Run test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests) locally before make a pull request
    
    Closes #3304 from Kwafoor/master.
    
    Closes #3128
    
    88a5ebe9 [wangjunbo] [kyuubi-3128] Support CostMode for PlanOnlyStatement
    
    Authored-by: wangjunbo <wa...@qiyi.com>
    Signed-off-by: Kent Yao <ya...@apache.org>
---
 docs/deployment/settings.md                                |  2 +-
 .../kyuubi/engine/spark/operation/PlanOnlyStatement.scala  | 10 ++++++++++
 .../main/scala/org/apache/kyuubi/config/KyuubiConf.scala   |  9 +++++----
 .../apache/kyuubi/operation/PlanOnlyOperationSuite.scala   | 14 ++++++++++++++
 4 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/docs/deployment/settings.md b/docs/deployment/settings.md
index e7be40852..51e0ca98e 100644
--- a/docs/deployment/settings.md
+++ b/docs/deployment/settings.md
@@ -406,7 +406,7 @@ kyuubi.operation.interrupt.on.cancel|true|When true, all running tasks will be i
 kyuubi.operation.language|SQL|Choose a programing language for the following inputs <ul><li>SQL: (Default) Run all following statements as SQL queries.</li> <li>SCALA: Run all following input a scala codes</li></ul>|string|1.5.0
 kyuubi.operation.log.dir.root|server_operation_logs|Root directory for query operation log at server-side.|string|1.4.0
 kyuubi.operation.plan.only.excludes|ResetCommand,SetCommand,SetNamespaceCommand,UseStatement,SetCatalogAndNamespace|Comma-separated list of query plan names, in the form of simple class names, i.e, for `set abc=xyz`, the value will be `SetCommand`. For those auxiliary plans, such as `switch databases`, `set properties`, or `create temporary view` e.t.c, which are used for setup evaluating environments for analyzing actual queries, we can use this config to exclude them and let them take  [...]
-kyuubi.operation.plan.only.mode|NONE|Whether to perform the statement in a PARSE, ANALYZE, OPTIMIZE, PHYSICAL, EXECUTION only way without executing the query. When it is NONE, the statement will be fully executed|string|1.4.0
+kyuubi.operation.plan.only.mode|NONE|Whether to perform the statement in a PARSE, ANALYZE, OPTIMIZE, OPTIMIZE_WITH_STATS, PHYSICAL, EXECUTION only way without executing the query. When it is NONE, the statement will be fully executed|string|1.4.0
 kyuubi.operation.progress.enabled|false|Whether to enable the operation progress. When true, the operation progress will be returned in `GetOperationStatus`.|boolean|1.6.0
 kyuubi.operation.query.timeout|&lt;undefined&gt;|Timeout for query executions at server-side, take affect with client-side timeout(`java.sql.Statement.setQueryTimeout`) together, a running query will be cancelled automatically if timeout. It's off by default, which means only client-side take fully control whether the query should timeout or not. If set, client-side timeout capped at this point. To cancel the queries right away without waiting task to finish, consider enabling kyuubi.ope [...]
 kyuubi.operation.result.max.rows|0|Max rows of Spark query results. Rows that exceeds the limit would be ignored. By setting this value to 0 to disable the max rows limit.|int|1.6.0
diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/PlanOnlyStatement.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/PlanOnlyStatement.scala
index 0839daf24..a3d6fda85 100644
--- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/PlanOnlyStatement.scala
+++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/PlanOnlyStatement.scala
@@ -72,6 +72,16 @@ class PlanOnlyStatement(
                 spark.sessionState.analyzer.checkAnalysis(analyzed)
                 val optimized = spark.sessionState.optimizer.execute(analyzed)
                 iter = new IterableFetchIterator(Seq(Row(optimized.toString())))
+              case OPTIMIZE_WITH_STATS =>
+                val analyzed = spark.sessionState.analyzer.execute(plan)
+                spark.sessionState.analyzer.checkAnalysis(analyzed)
+                val optimized = spark.sessionState.optimizer.execute(analyzed)
+                optimized.stats
+                iter = new IterableFetchIterator(Seq(Row(optimized.treeString(
+                  verbose = true,
+                  addSuffix = true,
+                  SQLConf.get.maxToStringFields,
+                  printOperatorId = false))))
               case PHYSICAL =>
                 val physical = spark.sql(statement).queryExecution.sparkPlan
                 iter = new IterableFetchIterator(Seq(Row(physical.toString())))
diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
index 7a1538ade..008fdbe93 100644
--- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
+++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
@@ -1639,13 +1639,14 @@ object KyuubiConf {
 
   object OperationModes extends Enumeration with Logging {
     type OperationMode = Value
-    val PARSE, ANALYZE, OPTIMIZE, PHYSICAL, EXECUTION, NONE, UNKNOWN = Value
+    val PARSE, ANALYZE, OPTIMIZE, OPTIMIZE_WITH_STATS, PHYSICAL, EXECUTION, NONE, UNKNOWN = Value
 
     def apply(mode: String): OperationMode = {
       mode.toUpperCase(Locale.ROOT) match {
         case "PARSE" => PARSE
         case "ANALYZE" => ANALYZE
         case "OPTIMIZE" => OPTIMIZE
+        case "OPTIMIZE_WITH_STATS" => OPTIMIZE_WITH_STATS
         case "PHYSICAL" => PHYSICAL
         case "EXECUTION" => EXECUTION
         case "NONE" => NONE
@@ -1658,9 +1659,9 @@ object KyuubiConf {
 
   val OPERATION_PLAN_ONLY_MODE: ConfigEntry[String] =
     buildConf("kyuubi.operation.plan.only.mode")
-      .doc("Whether to perform the statement in a PARSE, ANALYZE, OPTIMIZE, PHYSICAL, EXECUTION " +
-        "only way without executing the query. When it is NONE, the statement will be fully " +
-        "executed")
+      .doc("Whether to perform the statement in a PARSE, ANALYZE, OPTIMIZE, OPTIMIZE_WITH_STATS," +
+        " PHYSICAL, EXECUTION only way without executing the query. When it is NONE, " +
+        "the statement will be fully executed")
       .version("1.4.0")
       .stringConf
       .transform(_.toUpperCase(Locale.ROOT))
diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/PlanOnlyOperationSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/PlanOnlyOperationSuite.scala
index f5d8f0ab2..ab2793655 100644
--- a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/PlanOnlyOperationSuite.scala
+++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/PlanOnlyOperationSuite.scala
@@ -114,6 +114,20 @@ class PlanOnlyOperationSuite extends WithKyuubiServer with HiveJDBCTestHelper {
     }
   }
 
+  test("KYUUBI #3128: Support CostMode for PlanOnlyStatement") {
+    withSessionConf()(Map(KyuubiConf.OPERATION_PLAN_ONLY_MODE.key -> OPTIMIZE_WITH_STATS.toString))(
+      Map.empty) {
+      withJdbcStatement() { statement =>
+        val resultSet = statement.executeQuery(
+          "SELECT * FROM VALUES(1),(2),(3) AS t(c1) DISTRIBUTE BY c1")
+        assert(resultSet.next())
+        val operationPlan = resultSet.getString(1)
+        assert(operationPlan.startsWith("RepartitionByExpression")
+          && operationPlan.contains("Statistics"))
+      }
+    }
+  }
+
   test("kyuubi #3214: Plan only mode with an incorrect value") {
     withSessionConf()(Map(KyuubiConf.OPERATION_PLAN_ONLY_MODE.key -> "parse"))(Map.empty) {
       withJdbcStatement() { statement =>