You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/11/11 06:53:06 UTC
[spark] branch branch-2.4 updated: [SPARK-33417][SQL][TEST] Correct
the behaviour of query filters in TPCDSQueryBenchmark
This is an automated email from the ASF dual-hosted git repository.
yamamuro pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new fece4a3 [SPARK-33417][SQL][TEST] Correct the behaviour of query filters in TPCDSQueryBenchmark
fece4a3 is described below
commit fece4a3a36e23c7b99d6cb64e0c4484c9e17235f
Author: Takeshi Yamamuro <ya...@apache.org>
AuthorDate: Wed Nov 11 15:24:05 2020 +0900
[SPARK-33417][SQL][TEST] Correct the behaviour of query filters in TPCDSQueryBenchmark
### What changes were proposed in this pull request?
This PR intends to fix the behaviour of query filters in `TPCDSQueryBenchmark`. We can use an option `--query-filter` for selecting TPCDS queries to run, e.g., `--query-filter q6,q8,q13`. But, the current master has a weird behaviour about the option. For example, if we pass `--query-filter q6` so as to run the TPCDS q6 only, `TPCDSQueryBenchmark` runs `q6` and `q6-v2.7` because the `filterQueries` method does not respect the name suffix. So, there is no way now to run the TPCDS q6 only.
### Why are the changes needed?
Bugfix.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Manually checked.
Closes #30324 from maropu/FilterBugInTPCDSQueryBenchmark.
Authored-by: Takeshi Yamamuro <ya...@apache.org>
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
(cherry picked from commit 4b367976a877adb981f65d546e1522fdf30d0731)
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
.../execution/benchmark/TPCDSQueryBenchmark.scala | 21 ++++++++++++++-------
1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index fccee97..1f8b057 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -90,11 +90,16 @@ object TPCDSQueryBenchmark extends Logging {
}
}
- def filterQueries(
+ private def filterQueries(
origQueries: Seq[String],
- args: TPCDSQueryBenchmarkArguments): Seq[String] = {
- if (args.queryFilter.nonEmpty) {
- origQueries.filter(args.queryFilter.contains)
+ queryFilter: Set[String],
+ nameSuffix: String = ""): Seq[String] = {
+ if (queryFilter.nonEmpty) {
+ if (nameSuffix.nonEmpty) {
+ origQueries.filter { name => queryFilter.contains(s"$name$nameSuffix") }
+ } else {
+ origQueries.filter(queryFilter.contains)
+ }
} else {
origQueries
}
@@ -117,6 +122,7 @@ object TPCDSQueryBenchmark extends Logging {
"q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")
// This list only includes TPC-DS v2.7 queries that are different from v1.4 ones
+ val nameSuffixForQueriesV2_7 = "-v2.7"
val tpcdsQueriesV2_7 = Seq(
"q5a", "q6", "q10a", "q11", "q12", "q14", "q14a", "q18a",
"q20", "q22", "q22a", "q24", "q27a", "q34", "q35", "q35a", "q36a", "q47", "q49",
@@ -124,8 +130,9 @@ object TPCDSQueryBenchmark extends Logging {
"q80a", "q86a", "q98")
// If `--query-filter` defined, filters the queries that this option selects
- val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs)
- val queriesV2_7ToRun = filterQueries(tpcdsQueriesV2_7, benchmarkArgs)
+ val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs.queryFilter)
+ val queriesV2_7ToRun = filterQueries(tpcdsQueriesV2_7, benchmarkArgs.queryFilter,
+ nameSuffix = nameSuffixForQueriesV2_7)
if ((queriesV1_4ToRun ++ queriesV2_7ToRun).isEmpty) {
throw new RuntimeException(
@@ -135,6 +142,6 @@ object TPCDSQueryBenchmark extends Logging {
val tableSizes = setupTables(benchmarkArgs.dataLocation)
runTpcdsQueries(queryLocation = "tpcds", queries = queriesV1_4ToRun, tableSizes)
runTpcdsQueries(queryLocation = "tpcds-v2.7.0", queries = queriesV2_7ToRun, tableSizes,
- nameSuffix = "-v2.7")
+ nameSuffix = nameSuffixForQueriesV2_7)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org