You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ul...@apache.org on 2022/02/11 05:30:44 UTC
[incubator-kyuubi] branch master updated: [KYUUBI #1811] TPC-DS benchmark expose cli args breakdown and results-dir
This is an automated email from the ASF dual-hosted git repository.
ulyssesyou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new 68b9245 [KYUUBI #1811] TPC-DS benchmark expose cli args breakdown and results-dir
68b9245 is described below
commit 68b924513c7ea2d388cf32867e4800503bd14ffe
Author: Cheng Pan <ch...@apache.org>
AuthorDate: Fri Feb 11 13:30:37 2022 +0800
[KYUUBI #1811] TPC-DS benchmark expose cli args breakdown and results-dir
### _Why are the changes needed?_
Expose `breakdown` and `results-dir` as cli arg in TPC-DS benchmark tool
### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [ ] [Run test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests) locally before make a pull request
Closes #1811 from pan3793/tpcds.
Closes #1811
18637ce7 [Cheng Pan] nit
cc10a7ea [Cheng Pan] style
51e07398 [Cheng Pan] expose results-dir
4bc57995 [Cheng Pan] TPC-DS benchmark expose cli arg breakdown
Authored-by: Cheng Pan <ch...@apache.org>
Signed-off-by: ulysses-you <ul...@apache.org>
---
dev/kyuubi-tpcds/README.md | 14 ++++++++------
.../org/apache/kyuubi/tpcds/benchmark/Benchmark.scala | 13 +++++--------
.../org/apache/kyuubi/tpcds/benchmark/RunBenchmark.scala | 14 +++++++++++---
3 files changed, 24 insertions(+), 17 deletions(-)
diff --git a/dev/kyuubi-tpcds/README.md b/dev/kyuubi-tpcds/README.md
index bcfba85..adffb67 100644
--- a/dev/kyuubi-tpcds/README.md
+++ b/dev/kyuubi-tpcds/README.md
@@ -47,12 +47,14 @@ $SPARK_HOME/bin/spark-submit \
Support options:
-| key | default | description |
-|------------|----------------------|--------------------------------------------------------|
-| db | none(required) | the TPC-DS database |
-| benchmark | tpcds-v2.4-benchmark | the name of application |
-| iterations | 3 | the number of iterations to run |
-| filter | a | filter on the name of the queries to run, e.g. q1-v2.4 |
+| key | default | description |
+|-------------|------------------------|---------------------------------------------------------------|
+| db | none(required) | the TPC-DS database |
+| benchmark | tpcds-v2.4-benchmark | the name of application |
+| iterations | 3 | the number of iterations to run |
+| breakdown | false | whether to record breakdown results of an execution |
+| filter | a | filter on the name of the queries to run, e.g. q1-v2.4 |
+| results-dir | /spark/sql/performance | dir to store benchmark results, e.g. hdfs://hdfs-nn:9870/pref |
Example: the following command to benchmark TPC-DS sf10 with exists database `tpcds_sf10`.
diff --git a/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/Benchmark.scala b/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/Benchmark.scala
index 5645bd5..8071bca 100644
--- a/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/Benchmark.scala
+++ b/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/Benchmark.scala
@@ -36,10 +36,7 @@ abstract class Benchmark(
import Benchmark._
- val resultsLocation: String =
- sparkSession.conf.get(
- "spark.sql.perf.results",
- "/spark/sql/performance")
+ val resultsLocation: String = sparkSession.conf.get("spark.sql.perf.results")
protected def sparkContext = sparkSession.sparkContext
@@ -82,7 +79,7 @@ abstract class Benchmark(
variations: Seq[Variation[_]] = Seq(Variation("StandardRun", Seq("true")) { _ => {} }),
tags: Map[String, String] = Map.empty,
timeout: Long = 0L,
- resultLocation: String = resultsLocation,
+ resultsDir: String = resultsLocation,
forkThread: Boolean = true): ExperimentStatus = {
new ExperimentStatus(
@@ -92,7 +89,7 @@ abstract class Benchmark(
variations,
tags,
timeout,
- resultLocation,
+ resultsDir,
sparkSession,
currentConfiguration,
forkThread = forkThread)
@@ -143,7 +140,7 @@ object Benchmark {
variations: Seq[Variation[_]],
tags: Map[String, String],
timeout: Long,
- resultsLocation: String,
+ resultsDir: String,
sparkSession: SparkSession,
currentConfiguration: BenchmarkConfiguration,
forkThread: Boolean = true) {
@@ -172,7 +169,7 @@ object Benchmark {
}
val timestamp: Long = System.currentTimeMillis()
- val resultPath = s"$resultsLocation/timestamp=$timestamp"
+ val resultPath = s"$resultsDir/timestamp=$timestamp"
val combinations: Seq[List[Int]] =
cartesianProduct(variations.map(l => l.options.indices.toList).toList)
val resultsFuture: Future[Unit] = Future {
diff --git a/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/RunBenchmark.scala b/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/RunBenchmark.scala
index 5e4b1c5..673c9c5 100644
--- a/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/RunBenchmark.scala
+++ b/dev/kyuubi-tpcds/src/main/scala/org/apache/kyuubi/tpcds/benchmark/RunBenchmark.scala
@@ -17,7 +17,6 @@
package org.apache.kyuubi.tpcds.benchmark
-import java.io.File
import java.net.InetAddress
import org.apache.spark.SparkConf
@@ -28,7 +27,9 @@ case class RunConfig(
db: String = null,
benchmarkName: String = "tpcds-v2.4-benchmark",
filter: Option[String] = None,
- iterations: Int = 3)
+ iterations: Int = 3,
+ breakdown: Boolean = false,
+ resultsDir: String = "/spark/sql/performance")
// scalastyle:off
/**
@@ -55,9 +56,15 @@ object RunBenchmark {
opt[String]('f', "filter")
.action((x, c) => c.copy(filter = Some(x)))
.text("a filter on the name of the queries to run")
+ opt[Boolean]('B', "breakdown")
+ .action((x, c) => c.copy(breakdown = x))
+ .text("whether to record breakdown results of an execution")
opt[Int]('i', "iterations")
.action((x, c) => c.copy(iterations = x))
.text("the number of iterations to run")
+ opt[String]('r', "results-dir")
+ .action((x, c) => c.copy(filter = Some(x)))
+ .text("dir to store benchmark results, e.g. hdfs://hdfs-nn:9870/pref")
help("help")
.text("prints this usage text")
}
@@ -75,7 +82,7 @@ object RunBenchmark {
val sparkSession = SparkSession.builder.config(conf).enableHiveSupport().getOrCreate()
import sparkSession.implicits._
- sparkSession.conf.set("spark.sql.perf.results", new File("performance").toURI.toString)
+ sparkSession.conf.set("spark.sql.perf.results", config.resultsDir)
val benchmark = new TPCDS(sparkSession)
@@ -94,6 +101,7 @@ object RunBenchmark {
val experiment = benchmark.runExperiment(
executionsToRun = allQueries,
+ includeBreakdown = config.breakdown,
iterations = config.iterations,
tags = Map("host" -> InetAddress.getLocalHost.getHostName))