You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ul...@apache.org on 2022/02/11 05:30:44 UTC

[incubator-kyuubi] branch master updated: [KYUUBI #1811] TPC-DS benchmark expose cli args breakdown and results-dir

This is an automated email from the ASF dual-hosted git repository.

ulyssesyou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new 68b9245  [KYUUBI #1811] TPC-DS benchmark expose cli args breakdown and results-dir
68b9245 is described below

commit 68b924513c7ea2d388cf32867e4800503bd14ffe
Author: Cheng Pan <ch...@apache.org>
AuthorDate: Fri Feb 11 13:30:37 2022 +0800

    [KYUUBI #1811] TPC-DS benchmark expose cli args breakdown and results-dir
    
    ### _Why are the changes needed?_
    
    Expose `breakdown` and `results-dir` as cli arg in TPC-DS benchmark tool
    
    ### _How was this patch tested?_
    - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible
    
    - [ ] Add screenshots for manual tests if appropriate
    
    - [ ] [Run test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests) locally before make a pull request
    
    Closes #1811 from pan3793/tpcds.
    
    Closes #1811
    
    18637ce7 [Cheng Pan] nit
    cc10a7ea [Cheng Pan] style
    51e07398 [Cheng Pan] expose results-dir
    4bc57995 [Cheng Pan] TPC-DS benchmark expose cli arg breakdown
    
    Authored-by: Cheng Pan <ch...@apache.org>
    Signed-off-by: ulysses-you <ul...@apache.org>
---
 dev/kyuubi-tpcds/README.md                                 | 14 ++++++++------
 .../org/apache/kyuubi/tpcds/benchmark/Benchmark.scala      | 13 +++++--------
 .../org/apache/kyuubi/tpcds/benchmark/RunBenchmark.scala   | 14 +++++++++++---
 3 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/dev/kyuubi-tpcds/README.md b/dev/kyuubi-tpcds/README.md
index bcfba85..adffb67 100644
--- a/dev/kyuubi-tpcds/README.md
+++ b/dev/kyuubi-tpcds/README.md
@@ -47,12 +47,14 @@ $SPARK_HOME/bin/spark-submit \
 
 Support options:
 
-| key        | default              |  description                                           |
-|------------|----------------------|--------------------------------------------------------|
-| db         | none(required)       | the TPC-DS database                                    |
-| benchmark  | tpcds-v2.4-benchmark | the name of application                                |
-| iterations | 3                    | the number of iterations to run                        |
-| filter     | a                    | filter on the name of the queries to run, e.g. q1-v2.4 |
+| key         | default                | description                                                   |
+|-------------|------------------------|---------------------------------------------------------------|
+| db          | none(required)         | the TPC-DS database                                           |
+| benchmark   | tpcds-v2.4-benchmark   | the name of application                                       |
+| iterations  | 3                      | the number of iterations to run                               |
+| breakdown   | false                  | whether to record breakdown results of an execution           |
+| filter      | a                      | filter on the name of the queries to run, e.g. q1-v2.4        |
+| results-dir | /spark/sql/performance | dir to store benchmark results, e.g. hdfs://hdfs-nn:9870/pref |
 
 Example: the following command to benchmark TPC-DS sf10 with exists database `tpcds_sf10`.
 
diff --git a/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/Benchmark.scala b/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/Benchmark.scala
index 5645bd5..8071bca 100644
--- a/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/Benchmark.scala
+++ b/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/Benchmark.scala
@@ -36,10 +36,7 @@ abstract class Benchmark(
 
   import Benchmark._
 
-  val resultsLocation: String =
-    sparkSession.conf.get(
-      "spark.sql.perf.results",
-      "/spark/sql/performance")
+  val resultsLocation: String = sparkSession.conf.get("spark.sql.perf.results")
 
   protected def sparkContext = sparkSession.sparkContext
 
@@ -82,7 +79,7 @@ abstract class Benchmark(
       variations: Seq[Variation[_]] = Seq(Variation("StandardRun", Seq("true")) { _ => {} }),
       tags: Map[String, String] = Map.empty,
       timeout: Long = 0L,
-      resultLocation: String = resultsLocation,
+      resultsDir: String = resultsLocation,
       forkThread: Boolean = true): ExperimentStatus = {
 
     new ExperimentStatus(
@@ -92,7 +89,7 @@ abstract class Benchmark(
       variations,
       tags,
       timeout,
-      resultLocation,
+      resultsDir,
       sparkSession,
       currentConfiguration,
       forkThread = forkThread)
@@ -143,7 +140,7 @@ object Benchmark {
       variations: Seq[Variation[_]],
       tags: Map[String, String],
       timeout: Long,
-      resultsLocation: String,
+      resultsDir: String,
       sparkSession: SparkSession,
       currentConfiguration: BenchmarkConfiguration,
       forkThread: Boolean = true) {
@@ -172,7 +169,7 @@ object Benchmark {
     }
 
     val timestamp: Long = System.currentTimeMillis()
-    val resultPath = s"$resultsLocation/timestamp=$timestamp"
+    val resultPath = s"$resultsDir/timestamp=$timestamp"
     val combinations: Seq[List[Int]] =
       cartesianProduct(variations.map(l => l.options.indices.toList).toList)
     val resultsFuture: Future[Unit] = Future {
diff --git a/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/RunBenchmark.scala b/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/RunBenchmark.scala
index 5e4b1c5..673c9c5 100644
--- a/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/RunBenchmark.scala
+++ b/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/RunBenchmark.scala
@@ -17,7 +17,6 @@
 
 package org.apache.kyuubi.tpcds.benchmark
 
-import java.io.File
 import java.net.InetAddress
 
 import org.apache.spark.SparkConf
@@ -28,7 +27,9 @@ case class RunConfig(
     db: String = null,
     benchmarkName: String = "tpcds-v2.4-benchmark",
     filter: Option[String] = None,
-    iterations: Int = 3)
+    iterations: Int = 3,
+    breakdown: Boolean = false,
+    resultsDir: String = "/spark/sql/performance")
 
 // scalastyle:off
 /**
@@ -55,9 +56,15 @@ object RunBenchmark {
       opt[String]('f', "filter")
         .action((x, c) => c.copy(filter = Some(x)))
         .text("a filter on the name of the queries to run")
+      opt[Boolean]('B', "breakdown")
+        .action((x, c) => c.copy(breakdown = x))
+        .text("whether to record breakdown results of an execution")
       opt[Int]('i', "iterations")
         .action((x, c) => c.copy(iterations = x))
         .text("the number of iterations to run")
+      opt[String]('r', "results-dir")
+        .action((x, c) => c.copy(filter = Some(x)))
+        .text("dir to store benchmark results, e.g. hdfs://hdfs-nn:9870/pref")
       help("help")
         .text("prints this usage text")
     }
@@ -75,7 +82,7 @@ object RunBenchmark {
     val sparkSession = SparkSession.builder.config(conf).enableHiveSupport().getOrCreate()
     import sparkSession.implicits._
 
-    sparkSession.conf.set("spark.sql.perf.results", new File("performance").toURI.toString)
+    sparkSession.conf.set("spark.sql.perf.results", config.resultsDir)
 
     val benchmark = new TPCDS(sparkSession)
 
@@ -94,6 +101,7 @@ object RunBenchmark {
 
     val experiment = benchmark.runExperiment(
       executionsToRun = allQueries,
+      includeBreakdown = config.breakdown,
       iterations = config.iterations,
       tags = Map("host" -> InetAddress.getLocalHost.getHostName))