You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2020/11/11 06:53:06 UTC

[spark] branch branch-2.4 updated: [SPARK-33417][SQL][TEST] Correct the behaviour of query filters in TPCDSQueryBenchmark

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new fece4a3  [SPARK-33417][SQL][TEST] Correct the behaviour of query filters in TPCDSQueryBenchmark
fece4a3 is described below

commit fece4a3a36e23c7b99d6cb64e0c4484c9e17235f
Author: Takeshi Yamamuro <ya...@apache.org>
AuthorDate: Wed Nov 11 15:24:05 2020 +0900

    [SPARK-33417][SQL][TEST] Correct the behaviour of query filters in TPCDSQueryBenchmark
    
    ### What changes were proposed in this pull request?
    
    This PR intends to fix the behaviour of query filters in `TPCDSQueryBenchmark`. We can use an option `--query-filter` for selecting TPCDS queries to run, e.g., `--query-filter q6,q8,q13`. But, the current master has a weird behaviour about the option. For example, if we pass `--query-filter q6` so as to run the TPCDS q6 only, `TPCDSQueryBenchmark` runs `q6` and `q6-v2.7` because the `filterQueries` method does not respect the name suffix. So, there is no way now to run the TPCDS q6 only.
    
    ### Why are the changes needed?
    
    Bugfix.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Manually checked.
    
    Closes #30324 from maropu/FilterBugInTPCDSQueryBenchmark.
    
    Authored-by: Takeshi Yamamuro <ya...@apache.org>
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
    (cherry picked from commit 4b367976a877adb981f65d546e1522fdf30d0731)
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
 .../execution/benchmark/TPCDSQueryBenchmark.scala   | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index fccee97..1f8b057 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -90,11 +90,16 @@ object TPCDSQueryBenchmark extends Logging {
     }
   }
 
-  def filterQueries(
+  private def filterQueries(
       origQueries: Seq[String],
-      args: TPCDSQueryBenchmarkArguments): Seq[String] = {
-    if (args.queryFilter.nonEmpty) {
-      origQueries.filter(args.queryFilter.contains)
+      queryFilter: Set[String],
+      nameSuffix: String = ""): Seq[String] = {
+    if (queryFilter.nonEmpty) {
+      if (nameSuffix.nonEmpty) {
+        origQueries.filter { name => queryFilter.contains(s"$name$nameSuffix") }
+      } else {
+        origQueries.filter(queryFilter.contains)
+      }
     } else {
       origQueries
     }
@@ -117,6 +122,7 @@ object TPCDSQueryBenchmark extends Logging {
       "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")
 
     // This list only includes TPC-DS v2.7 queries that are different from v1.4 ones
+    val nameSuffixForQueriesV2_7 = "-v2.7"
     val tpcdsQueriesV2_7 = Seq(
       "q5a", "q6", "q10a", "q11", "q12", "q14", "q14a", "q18a",
       "q20", "q22", "q22a", "q24", "q27a", "q34", "q35", "q35a", "q36a", "q47", "q49",
@@ -124,8 +130,9 @@ object TPCDSQueryBenchmark extends Logging {
       "q80a", "q86a", "q98")
 
     // If `--query-filter` defined, filters the queries that this option selects
-    val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs)
-    val queriesV2_7ToRun = filterQueries(tpcdsQueriesV2_7, benchmarkArgs)
+    val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs.queryFilter)
+    val queriesV2_7ToRun = filterQueries(tpcdsQueriesV2_7, benchmarkArgs.queryFilter,
+      nameSuffix = nameSuffixForQueriesV2_7)
 
     if ((queriesV1_4ToRun ++ queriesV2_7ToRun).isEmpty) {
       throw new RuntimeException(
@@ -135,6 +142,6 @@ object TPCDSQueryBenchmark extends Logging {
     val tableSizes = setupTables(benchmarkArgs.dataLocation)
     runTpcdsQueries(queryLocation = "tpcds", queries = queriesV1_4ToRun, tableSizes)
     runTpcdsQueries(queryLocation = "tpcds-v2.7.0", queries = queriesV2_7ToRun, tableSizes,
-      nameSuffix = "-v2.7")
+      nameSuffix = nameSuffixForQueriesV2_7)
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org