You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2019/09/12 12:32:58 UTC
[spark] branch master updated: [SPARK-29065][SQL][TEST] Extend
`EXTRACT` benchmark
This is an automated email from the ASF dual-hosted git repository.
yamamuro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 8e9fafb [SPARK-29065][SQL][TEST] Extend `EXTRACT` benchmark
8e9fafb is described below
commit 8e9fafbb21a26028710df1831fc16b41855f0b4a
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Thu Sep 12 21:32:35 2019 +0900
[SPARK-29065][SQL][TEST] Extend `EXTRACT` benchmark
### What changes were proposed in this pull request?
In the PR, I propose to extend `ExtractBenchmark` and add new ones for:
- `EXTRACT` and `DATE` as input column
- the `DATE_PART` function and `DATE`/`TIMESTAMP` input column
### Why are the changes needed?
The `EXTRACT` expression is rebased on the `DATE_PART` expression by the PR https://github.com/apache/spark/pull/25410 where some of sub-expressions take `DATE` column as the input (`Millennium`, `Year` and etc.) but others require `TIMESTAMP` column (`Hour`, `Minute`). Separate benchmarks for `DATE` should exclude overhead of implicit conversions `DATE` <-> `TIMESTAMP`.
### Does this PR introduce any user-facing change?
No, it doesn't.
### How was this patch tested?
- Regenerated results of `ExtractBenchmark`
Closes #25772 from MaxGekk/date_part-benchmark.
Authored-by: Maxim Gekk <ma...@gmail.com>
Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
sql/core/benchmarks/ExtractBenchmark-results.txt | 243 +++++++++------------
.../sql/execution/benchmark/ExtractBenchmark.scala | 95 +++++---
2 files changed, 161 insertions(+), 177 deletions(-)
diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt
index 7ee185e..31ad787 100644
--- a/sql/core/benchmarks/ExtractBenchmark-results.txt
+++ b/sql/core/benchmarks/ExtractBenchmark-results.txt
@@ -1,145 +1,100 @@
-================================================================================================
-Extract
-================================================================================================
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-cast to timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off 407 432 36 24.6 40.7 1.0X
-cast to timestamp wholestage on 348 396 80 28.7 34.8 1.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MILLENNIUM of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-MILLENNIUM of timestamp wholestage off 1407 1408 2 7.1 140.7 1.0X
-MILLENNIUM of timestamp wholestage on 1334 1380 81 7.5 133.4 1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-CENTURY of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-CENTURY of timestamp wholestage off 1362 1364 3 7.3 136.2 1.0X
-CENTURY of timestamp wholestage on 1334 1342 8 7.5 133.4 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DECADE of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-DECADE of timestamp wholestage off 1226 1229 4 8.2 122.6 1.0X
-DECADE of timestamp wholestage on 1218 1225 8 8.2 121.8 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-YEAR of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-YEAR of timestamp wholestage off 1207 1210 4 8.3 120.7 1.0X
-YEAR of timestamp wholestage on 1201 1216 17 8.3 120.1 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-ISOYEAR of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-ISOYEAR of timestamp wholestage off 1442 1446 6 6.9 144.2 1.0X
-ISOYEAR of timestamp wholestage on 1315 1336 18 7.6 131.5 1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-QUARTER of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-QUARTER of timestamp wholestage off 1443 1454 16 6.9 144.3 1.0X
-QUARTER of timestamp wholestage on 1429 1442 9 7.0 142.9 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MONTH of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-MONTH of timestamp wholestage off 1196 1200 5 8.4 119.6 1.0X
-MONTH of timestamp wholestage on 1192 1204 10 8.4 119.2 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-WEEK of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-WEEK of timestamp wholestage off 2103 2104 2 4.8 210.3 1.0X
-WEEK of timestamp wholestage on 1798 1804 8 5.6 179.8 1.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DAY of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-DAY of timestamp wholestage off 1211 1228 23 8.3 121.1 1.0X
-DAY of timestamp wholestage on 1204 1212 6 8.3 120.4 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DAYOFWEEK of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-DAYOFWEEK of timestamp wholestage off 1387 1389 3 7.2 138.7 1.0X
-DAYOFWEEK of timestamp wholestage on 1353 1360 8 7.4 135.3 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DOW of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-DOW of timestamp wholestage off 1373 1373 0 7.3 137.3 1.0X
-DOW of timestamp wholestage on 1361 1372 15 7.3 136.1 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-ISODOW of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-ISODOW of timestamp wholestage off 1311 1366 77 7.6 131.1 1.0X
-ISODOW of timestamp wholestage on 1307 1314 6 7.7 130.7 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DOY of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-DOY of timestamp wholestage off 1241 1243 2 8.1 124.1 1.0X
-DOY of timestamp wholestage on 1229 1239 9 8.1 122.9 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-HOUR of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-HOUR of timestamp wholestage off 353 358 8 28.3 35.3 1.0X
-HOUR of timestamp wholestage on 358 365 5 27.9 35.8 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MINUTE of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-MINUTE of timestamp wholestage off 353 354 2 28.3 35.3 1.0X
-MINUTE of timestamp wholestage on 362 368 9 27.6 36.2 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SECOND of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-SECOND of timestamp wholestage off 341 350 13 29.3 34.1 1.0X
-SECOND of timestamp wholestage on 362 368 7 27.6 36.2 0.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MILLISECONDS of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-MILLISECONDS of timestamp wholestage off 36785 36808 32 0.3 3678.5 1.0X
-MILLISECONDS of timestamp wholestage on 36644 36760 72 0.3 3664.4 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MICROSECONDS of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-MICROSECONDS of timestamp wholestage off 446 447 0 22.4 44.6 1.0X
-MICROSECONDS of timestamp wholestage on 458 463 4 21.8 45.8 1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-EPOCH of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-EPOCH of timestamp wholestage off 29807 29811 5 0.3 2980.7 1.0X
-EPOCH of timestamp wholestage on 29843 29930 64 0.3 2984.3 1.0X
-
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Invoke extract for timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp 264 281 25 37.9 26.4 1.0X
+MILLENNIUM of timestamp 1187 1198 16 8.4 118.7 0.2X
+CENTURY of timestamp 1122 1124 2 8.9 112.2 0.2X
+DECADE of timestamp 1041 1049 7 9.6 104.1 0.3X
+YEAR of timestamp 1027 1035 6 9.7 102.7 0.3X
+ISOYEAR of timestamp 1155 1171 27 8.7 115.5 0.2X
+QUARTER of timestamp 1181 1192 10 8.5 118.1 0.2X
+MONTH of timestamp 1023 1030 7 9.8 102.3 0.3X
+WEEK of timestamp 1511 1517 5 6.6 151.1 0.2X
+DAY of timestamp 1010 1016 6 9.9 101.0 0.3X
+DAYOFWEEK of timestamp 1127 1129 4 8.9 112.7 0.2X
+DOW of timestamp 1123 1130 6 8.9 112.3 0.2X
+ISODOW of timestamp 1099 1105 6 9.1 109.9 0.2X
+DOY of timestamp 1029 1030 1 9.7 102.9 0.3X
+HOUR of timestamp 415 417 1 24.1 41.5 0.6X
+MINUTE of timestamp 409 418 13 24.4 40.9 0.6X
+SECOND of timestamp 408 413 8 24.5 40.8 0.6X
+MILLISECONDS of timestamp 28956 29040 73 0.3 2895.6 0.0X
+MICROSECONDS of timestamp 504 519 13 19.8 50.4 0.5X
+EPOCH of timestamp 23543 23566 28 0.4 2354.3 0.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Invoke extract for date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date 853 856 6 11.7 85.3 1.0X
+MILLENNIUM of date 1081 1088 6 9.2 108.1 0.8X
+CENTURY of date 1087 1090 4 9.2 108.7 0.8X
+DECADE of date 1018 1019 1 9.8 101.8 0.8X
+YEAR of date 996 1006 9 10.0 99.6 0.9X
+ISOYEAR of date 1133 1147 21 8.8 113.3 0.8X
+QUARTER of date 1246 1254 10 8.0 124.6 0.7X
+MONTH of date 998 1002 4 10.0 99.8 0.9X
+WEEK of date 1483 1490 7 6.7 148.3 0.6X
+DAY of date 992 998 5 10.1 99.2 0.9X
+DAYOFWEEK of date 1121 1128 7 8.9 112.1 0.8X
+DOW of date 1118 1126 8 8.9 111.8 0.8X
+ISODOW of date 1093 1103 9 9.1 109.3 0.8X
+DOY of date 1026 1032 5 9.7 102.6 0.8X
+HOUR of date 1707 1726 24 5.9 170.7 0.5X
+MINUTE of date 1710 1731 19 5.8 171.0 0.5X
+SECOND of date 1701 1720 19 5.9 170.1 0.5X
+MILLISECONDS of date 2256 2272 19 4.4 225.6 0.4X
+MICROSECONDS of date 1801 1810 11 5.6 180.1 0.5X
+EPOCH of date 24848 24860 17 0.4 2484.8 0.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Invoke date_part for timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp 227 253 39 44.0 22.7 1.0X
+MILLENNIUM of timestamp 1121 1127 9 8.9 112.1 0.2X
+CENTURY of timestamp 1084 1094 17 9.2 108.4 0.2X
+DECADE of timestamp 1015 1037 35 9.8 101.5 0.2X
+YEAR of timestamp 1011 1013 4 9.9 101.1 0.2X
+ISOYEAR of timestamp 1121 1126 7 8.9 112.1 0.2X
+QUARTER of timestamp 1243 1247 4 8.0 124.3 0.2X
+MONTH of timestamp 1000 1009 14 10.0 100.0 0.2X
+WEEK of timestamp 1481 1492 11 6.8 148.1 0.2X
+DAY of timestamp 999 1000 1 10.0 99.9 0.2X
+DAYOFWEEK of timestamp 1114 1132 20 9.0 111.4 0.2X
+DOW of timestamp 1135 1147 17 8.8 113.5 0.2X
+ISODOW of timestamp 1070 1079 10 9.3 107.0 0.2X
+DOY of timestamp 1018 1027 8 9.8 101.8 0.2X
+HOUR of timestamp 413 419 9 24.2 41.3 0.6X
+MINUTE of timestamp 411 414 3 24.3 41.1 0.6X
+SECOND of timestamp 410 413 3 24.4 41.0 0.6X
+MILLISECONDS of timestamp 29225 29264 52 0.3 2922.5 0.0X
+MICROSECONDS of timestamp 507 512 7 19.7 50.7 0.4X
+EPOCH of timestamp 23565 23608 56 0.4 2356.5 0.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Invoke date_part for date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date 848 852 3 11.8 84.8 1.0X
+MILLENNIUM of date 1083 1086 3 9.2 108.3 0.8X
+CENTURY of date 1081 1084 5 9.3 108.1 0.8X
+DECADE of date 1010 1014 4 9.9 101.0 0.8X
+YEAR of date 992 1000 9 10.1 99.2 0.9X
+ISOYEAR of date 1111 1116 6 9.0 111.1 0.8X
+QUARTER of date 1230 1249 17 8.1 123.0 0.7X
+MONTH of date 996 1008 11 10.0 99.6 0.9X
+WEEK of date 1487 1516 28 6.7 148.7 0.6X
+DAY of date 993 999 6 10.1 99.3 0.9X
+DAYOFWEEK of date 1113 1118 5 9.0 111.3 0.8X
+DOW of date 1113 1116 2 9.0 111.3 0.8X
+ISODOW of date 1069 1072 3 9.4 106.9 0.8X
+DOY of date 1027 1028 1 9.7 102.7 0.8X
+HOUR of date 1707 1710 3 5.9 170.7 0.5X
+MINUTE of date 1704 1705 2 5.9 170.4 0.5X
+SECOND of date 1701 1705 4 5.9 170.1 0.5X
+MILLISECONDS of date 2229 2238 9 4.5 222.9 0.4X
+MICROSECONDS of date 1801 1808 12 5.6 180.1 0.5X
+EPOCH of date 24783 24817 31 0.4 2478.3 0.0X
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
index dbbad43..b9086aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
@@ -19,6 +19,11 @@ package org.apache.spark.sql.execution.benchmark
import java.time.Instant
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
+
/**
* Synthetic benchmark for the extract function.
* To run this benchmark:
@@ -31,52 +36,76 @@ import java.time.Instant
* Results will be written to "benchmarks/ExtractBenchmark-results.txt".
* }}}
*/
-object ExtractBenchmark extends SqlBasedBenchmark {
+object ExtractBenchmark extends BenchmarkBase with SQLHelper {
+ private val spark: SparkSession = SparkSession.builder()
+ .master("local[1]")
+ .appName(this.getClass.getCanonicalName)
+ .getOrCreate()
+
private def doBenchmark(cardinality: Long, exprs: String*): Unit = {
val sinceSecond = Instant.parse("2010-01-01T00:00:00Z").getEpochSecond
- spark
- .range(sinceSecond, sinceSecond + cardinality, 1, 1)
- .selectExpr(exprs: _*)
- .write
- .format("noop")
- .save()
+ withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
+ spark
+ .range(sinceSecond, sinceSecond + cardinality, 1, 1)
+ .selectExpr(exprs: _*)
+ .write
+ .format("noop")
+ .save()
+ }
}
- private def run(cardinality: Long, name: String, exprs: String*): Unit = {
- codegenBenchmark(name, cardinality) {
+ private def run(
+ benchmark: Benchmark,
+ cardinality: Long,
+ name: String,
+ exprs: String*): Unit = {
+ benchmark.addCase(name, numIters = 3) { _ =>
doBenchmark(cardinality, exprs: _*)
}
}
- private def run(cardinality: Long, field: String): Unit = {
- codegenBenchmark(s"$field of timestamp", cardinality) {
- doBenchmark(cardinality, s"EXTRACT($field FROM (cast(id as timestamp)))")
+ private def castExpr(from: String): String = from match {
+ case "timestamp" => s"cast(id as timestamp)"
+ case "date" => s"cast(cast(id as timestamp) as date)"
+ case other => throw new IllegalArgumentException(
+ s"Unsupported column type $other. Valid column types are 'timestamp' and 'date'")
+ }
+
+ private def run(
+ benchmark: Benchmark,
+ func: String,
+ cardinality: Long,
+ field: String,
+ from: String): Unit = {
+ val expr = func match {
+ case "extract" => s"EXTRACT($field FROM ${castExpr(from)})"
+ case "date_part" => s"DATE_PART('$field', ${castExpr(from)})"
+ case other => throw new IllegalArgumentException(
+ s"Unsupported function '$other'. Valid functions are 'extract' and 'date_part'.")
+ }
+ benchmark.addCase(s"$field of $from", numIters = 3) { _ =>
+ doBenchmark(cardinality, expr)
}
}
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
val N = 10000000L
- runBenchmark("Extract") {
- run(N, "cast to timestamp", "cast(id as timestamp)")
- run(N, "MILLENNIUM")
- run(N, "CENTURY")
- run(N, "DECADE")
- run(N, "YEAR")
- run(N, "ISOYEAR")
- run(N, "QUARTER")
- run(N, "MONTH")
- run(N, "WEEK")
- run(N, "DAY")
- run(N, "DAYOFWEEK")
- run(N, "DOW")
- run(N, "ISODOW")
- run(N, "DOY")
- run(N, "HOUR")
- run(N, "MINUTE")
- run(N, "SECOND")
- run(N, "MILLISECONDS")
- run(N, "MICROSECONDS")
- run(N, "EPOCH")
+ val fields = Seq(
+ "MILLENNIUM", "CENTURY", "DECADE", "YEAR",
+ "ISOYEAR", "QUARTER", "MONTH", "WEEK",
+ "DAY", "DAYOFWEEK", "DOW", "ISODOW",
+ "DOY", "HOUR", "MINUTE", "SECOND",
+ "MILLISECONDS", "MICROSECONDS", "EPOCH")
+
+ Seq("extract", "date_part").foreach { func =>
+ Seq("timestamp", "date").foreach { dateType =>
+ val benchmark = new Benchmark(s"Invoke $func for $dateType", N, output = output)
+
+ run(benchmark, N, s"cast to $dateType", castExpr(dateType))
+ fields.foreach(run(benchmark, func, N, _, dateType))
+
+ benchmark.run()
+ }
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org