You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2019/09/12 12:32:58 UTC

[spark] branch master updated: [SPARK-29065][SQL][TEST] Extend `EXTRACT` benchmark

This is an automated email from the ASF dual-hosted git repository.

yamamuro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e9fafb  [SPARK-29065][SQL][TEST] Extend `EXTRACT` benchmark
8e9fafb is described below

commit 8e9fafbb21a26028710df1831fc16b41855f0b4a
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Thu Sep 12 21:32:35 2019 +0900

    [SPARK-29065][SQL][TEST] Extend `EXTRACT` benchmark
    
    ### What changes were proposed in this pull request?
    
    In the PR, I propose to extend `ExtractBenchmark` and add new ones for:
    - `EXTRACT` and `DATE` as input column
    - the `DATE_PART` function and `DATE`/`TIMESTAMP` input column
    
    ### Why are the changes needed?
    
    The `EXTRACT` expression is rebased on the `DATE_PART` expression by the PR https://github.com/apache/spark/pull/25410 where some of sub-expressions take `DATE` column as the input (`Millennium`, `Year` and etc.) but others require `TIMESTAMP` column (`Hour`, `Minute`). Separate benchmarks for `DATE` should exclude overhead of implicit conversions `DATE` <-> `TIMESTAMP`.
    
    ### Does this PR introduce any user-facing change?
    
    No, it doesn't.
    
    ### How was this patch tested?
    - Regenerated results of `ExtractBenchmark`
    
    Closes #25772 from MaxGekk/date_part-benchmark.
    
    Authored-by: Maxim Gekk <ma...@gmail.com>
    Signed-off-by: Takeshi Yamamuro <ya...@apache.org>
---
 sql/core/benchmarks/ExtractBenchmark-results.txt   | 243 +++++++++------------
 .../sql/execution/benchmark/ExtractBenchmark.scala |  95 +++++---
 2 files changed, 161 insertions(+), 177 deletions(-)

diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt
index 7ee185e..31ad787 100644
--- a/sql/core/benchmarks/ExtractBenchmark-results.txt
+++ b/sql/core/benchmarks/ExtractBenchmark-results.txt
@@ -1,145 +1,100 @@
-================================================================================================
-Extract
-================================================================================================
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    407            432          36         24.6          40.7       1.0X
-cast to timestamp wholestage on                     348            396          80         28.7          34.8       1.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MILLENNIUM of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MILLENNIUM of timestamp wholestage off             1407           1408           2          7.1         140.7       1.0X
-MILLENNIUM of timestamp wholestage on              1334           1380          81          7.5         133.4       1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-CENTURY of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-CENTURY of timestamp wholestage off                1362           1364           3          7.3         136.2       1.0X
-CENTURY of timestamp wholestage on                 1334           1342           8          7.5         133.4       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DECADE of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DECADE of timestamp wholestage off                 1226           1229           4          8.2         122.6       1.0X
-DECADE of timestamp wholestage on                  1218           1225           8          8.2         121.8       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-YEAR of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-YEAR of timestamp wholestage off                   1207           1210           4          8.3         120.7       1.0X
-YEAR of timestamp wholestage on                    1201           1216          17          8.3         120.1       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-ISOYEAR of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ISOYEAR of timestamp wholestage off                1442           1446           6          6.9         144.2       1.0X
-ISOYEAR of timestamp wholestage on                 1315           1336          18          7.6         131.5       1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-QUARTER of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-QUARTER of timestamp wholestage off                1443           1454          16          6.9         144.3       1.0X
-QUARTER of timestamp wholestage on                 1429           1442           9          7.0         142.9       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MONTH of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MONTH of timestamp wholestage off                  1196           1200           5          8.4         119.6       1.0X
-MONTH of timestamp wholestage on                   1192           1204          10          8.4         119.2       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-WEEK of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-WEEK of timestamp wholestage off                   2103           2104           2          4.8         210.3       1.0X
-WEEK of timestamp wholestage on                    1798           1804           8          5.6         179.8       1.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DAY of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DAY of timestamp wholestage off                    1211           1228          23          8.3         121.1       1.0X
-DAY of timestamp wholestage on                     1204           1212           6          8.3         120.4       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DAYOFWEEK of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DAYOFWEEK of timestamp wholestage off              1387           1389           3          7.2         138.7       1.0X
-DAYOFWEEK of timestamp wholestage on               1353           1360           8          7.4         135.3       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DOW of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DOW of timestamp wholestage off                    1373           1373           0          7.3         137.3       1.0X
-DOW of timestamp wholestage on                     1361           1372          15          7.3         136.1       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-ISODOW of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ISODOW of timestamp wholestage off                 1311           1366          77          7.6         131.1       1.0X
-ISODOW of timestamp wholestage on                  1307           1314           6          7.7         130.7       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DOY of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DOY of timestamp wholestage off                    1241           1243           2          8.1         124.1       1.0X
-DOY of timestamp wholestage on                     1229           1239           9          8.1         122.9       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-HOUR of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-HOUR of timestamp wholestage off                    353            358           8         28.3          35.3       1.0X
-HOUR of timestamp wholestage on                     358            365           5         27.9          35.8       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MINUTE of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MINUTE of timestamp wholestage off                  353            354           2         28.3          35.3       1.0X
-MINUTE of timestamp wholestage on                   362            368           9         27.6          36.2       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SECOND of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-SECOND of timestamp wholestage off                  341            350          13         29.3          34.1       1.0X
-SECOND of timestamp wholestage on                   362            368           7         27.6          36.2       0.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MILLISECONDS of timestamp:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MILLISECONDS of timestamp wholestage off          36785          36808          32          0.3        3678.5       1.0X
-MILLISECONDS of timestamp wholestage on           36644          36760          72          0.3        3664.4       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MICROSECONDS of timestamp:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MICROSECONDS of timestamp wholestage off            446            447           0         22.4          44.6       1.0X
-MICROSECONDS of timestamp wholestage on             458            463           4         21.8          45.8       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-EPOCH of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-EPOCH of timestamp wholestage off                 29807          29811           5          0.3        2980.7       1.0X
-EPOCH of timestamp wholestage on                  29843          29930          64          0.3        2984.3       1.0X
-
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp                                   264            281          25         37.9          26.4       1.0X
+MILLENNIUM of timestamp                            1187           1198          16          8.4         118.7       0.2X
+CENTURY of timestamp                               1122           1124           2          8.9         112.2       0.2X
+DECADE of timestamp                                1041           1049           7          9.6         104.1       0.3X
+YEAR of timestamp                                  1027           1035           6          9.7         102.7       0.3X
+ISOYEAR of timestamp                               1155           1171          27          8.7         115.5       0.2X
+QUARTER of timestamp                               1181           1192          10          8.5         118.1       0.2X
+MONTH of timestamp                                 1023           1030           7          9.8         102.3       0.3X
+WEEK of timestamp                                  1511           1517           5          6.6         151.1       0.2X
+DAY of timestamp                                   1010           1016           6          9.9         101.0       0.3X
+DAYOFWEEK of timestamp                             1127           1129           4          8.9         112.7       0.2X
+DOW of timestamp                                   1123           1130           6          8.9         112.3       0.2X
+ISODOW of timestamp                                1099           1105           6          9.1         109.9       0.2X
+DOY of timestamp                                   1029           1030           1          9.7         102.9       0.3X
+HOUR of timestamp                                   415            417           1         24.1          41.5       0.6X
+MINUTE of timestamp                                 409            418          13         24.4          40.9       0.6X
+SECOND of timestamp                                 408            413           8         24.5          40.8       0.6X
+MILLISECONDS of timestamp                         28956          29040          73          0.3        2895.6       0.0X
+MICROSECONDS of timestamp                           504            519          13         19.8          50.4       0.5X
+EPOCH of timestamp                                23543          23566          28          0.4        2354.3       0.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date                                        853            856           6         11.7          85.3       1.0X
+MILLENNIUM of date                                 1081           1088           6          9.2         108.1       0.8X
+CENTURY of date                                    1087           1090           4          9.2         108.7       0.8X
+DECADE of date                                     1018           1019           1          9.8         101.8       0.8X
+YEAR of date                                        996           1006           9         10.0          99.6       0.9X
+ISOYEAR of date                                    1133           1147          21          8.8         113.3       0.8X
+QUARTER of date                                    1246           1254          10          8.0         124.6       0.7X
+MONTH of date                                       998           1002           4         10.0          99.8       0.9X
+WEEK of date                                       1483           1490           7          6.7         148.3       0.6X
+DAY of date                                         992            998           5         10.1          99.2       0.9X
+DAYOFWEEK of date                                  1121           1128           7          8.9         112.1       0.8X
+DOW of date                                        1118           1126           8          8.9         111.8       0.8X
+ISODOW of date                                     1093           1103           9          9.1         109.3       0.8X
+DOY of date                                        1026           1032           5          9.7         102.6       0.8X
+HOUR of date                                       1707           1726          24          5.9         170.7       0.5X
+MINUTE of date                                     1710           1731          19          5.8         171.0       0.5X
+SECOND of date                                     1701           1720          19          5.9         170.1       0.5X
+MILLISECONDS of date                               2256           2272          19          4.4         225.6       0.4X
+MICROSECONDS of date                               1801           1810          11          5.6         180.1       0.5X
+EPOCH of date                                     24848          24860          17          0.4        2484.8       0.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp                                   227            253          39         44.0          22.7       1.0X
+MILLENNIUM of timestamp                            1121           1127           9          8.9         112.1       0.2X
+CENTURY of timestamp                               1084           1094          17          9.2         108.4       0.2X
+DECADE of timestamp                                1015           1037          35          9.8         101.5       0.2X
+YEAR of timestamp                                  1011           1013           4          9.9         101.1       0.2X
+ISOYEAR of timestamp                               1121           1126           7          8.9         112.1       0.2X
+QUARTER of timestamp                               1243           1247           4          8.0         124.3       0.2X
+MONTH of timestamp                                 1000           1009          14         10.0         100.0       0.2X
+WEEK of timestamp                                  1481           1492          11          6.8         148.1       0.2X
+DAY of timestamp                                    999           1000           1         10.0          99.9       0.2X
+DAYOFWEEK of timestamp                             1114           1132          20          9.0         111.4       0.2X
+DOW of timestamp                                   1135           1147          17          8.8         113.5       0.2X
+ISODOW of timestamp                                1070           1079          10          9.3         107.0       0.2X
+DOY of timestamp                                   1018           1027           8          9.8         101.8       0.2X
+HOUR of timestamp                                   413            419           9         24.2          41.3       0.6X
+MINUTE of timestamp                                 411            414           3         24.3          41.1       0.6X
+SECOND of timestamp                                 410            413           3         24.4          41.0       0.6X
+MILLISECONDS of timestamp                         29225          29264          52          0.3        2922.5       0.0X
+MICROSECONDS of timestamp                           507            512           7         19.7          50.7       0.4X
+EPOCH of timestamp                                23565          23608          56          0.4        2356.5       0.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.6
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date                                        848            852           3         11.8          84.8       1.0X
+MILLENNIUM of date                                 1083           1086           3          9.2         108.3       0.8X
+CENTURY of date                                    1081           1084           5          9.3         108.1       0.8X
+DECADE of date                                     1010           1014           4          9.9         101.0       0.8X
+YEAR of date                                        992           1000           9         10.1          99.2       0.9X
+ISOYEAR of date                                    1111           1116           6          9.0         111.1       0.8X
+QUARTER of date                                    1230           1249          17          8.1         123.0       0.7X
+MONTH of date                                       996           1008          11         10.0          99.6       0.9X
+WEEK of date                                       1487           1516          28          6.7         148.7       0.6X
+DAY of date                                         993            999           6         10.1          99.3       0.9X
+DAYOFWEEK of date                                  1113           1118           5          9.0         111.3       0.8X
+DOW of date                                        1113           1116           2          9.0         111.3       0.8X
+ISODOW of date                                     1069           1072           3          9.4         106.9       0.8X
+DOY of date                                        1027           1028           1          9.7         102.7       0.8X
+HOUR of date                                       1707           1710           3          5.9         170.7       0.5X
+MINUTE of date                                     1704           1705           2          5.9         170.4       0.5X
+SECOND of date                                     1701           1705           4          5.9         170.1       0.5X
+MILLISECONDS of date                               2229           2238           9          4.5         222.9       0.4X
+MICROSECONDS of date                               1801           1808          12          5.6         180.1       0.5X
+EPOCH of date                                     24783          24817          31          0.4        2478.3       0.0X
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
index dbbad43..b9086aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
@@ -19,6 +19,11 @@ package org.apache.spark.sql.execution.benchmark
 
 import java.time.Instant
 
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
+
 /**
  * Synthetic benchmark for the extract function.
  * To run this benchmark:
@@ -31,52 +36,76 @@ import java.time.Instant
  *      Results will be written to "benchmarks/ExtractBenchmark-results.txt".
  * }}}
  */
-object ExtractBenchmark extends SqlBasedBenchmark {
+object ExtractBenchmark extends BenchmarkBase with SQLHelper {
+  private val spark: SparkSession = SparkSession.builder()
+    .master("local[1]")
+    .appName(this.getClass.getCanonicalName)
+    .getOrCreate()
+
   private def doBenchmark(cardinality: Long, exprs: String*): Unit = {
     val sinceSecond = Instant.parse("2010-01-01T00:00:00Z").getEpochSecond
-    spark
-      .range(sinceSecond, sinceSecond + cardinality, 1, 1)
-      .selectExpr(exprs: _*)
-      .write
-      .format("noop")
-      .save()
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
+      spark
+        .range(sinceSecond, sinceSecond + cardinality, 1, 1)
+        .selectExpr(exprs: _*)
+        .write
+        .format("noop")
+        .save()
+    }
   }
 
-  private def run(cardinality: Long, name: String, exprs: String*): Unit = {
-    codegenBenchmark(name, cardinality) {
+  private def run(
+      benchmark: Benchmark,
+      cardinality: Long,
+      name: String,
+      exprs: String*): Unit = {
+    benchmark.addCase(name, numIters = 3) { _ =>
       doBenchmark(cardinality, exprs: _*)
     }
   }
 
-  private def run(cardinality: Long, field: String): Unit = {
-    codegenBenchmark(s"$field of timestamp", cardinality) {
-      doBenchmark(cardinality, s"EXTRACT($field FROM (cast(id as timestamp)))")
+  private def castExpr(from: String): String = from match {
+    case "timestamp" => s"cast(id as timestamp)"
+    case "date" => s"cast(cast(id as timestamp) as date)"
+    case other => throw new IllegalArgumentException(
+      s"Unsupported column type $other. Valid column types are 'timestamp' and 'date'")
+  }
+
+  private def run(
+      benchmark: Benchmark,
+      func: String,
+      cardinality: Long,
+      field: String,
+      from: String): Unit = {
+    val expr = func match {
+      case "extract" => s"EXTRACT($field FROM ${castExpr(from)})"
+      case "date_part" => s"DATE_PART('$field', ${castExpr(from)})"
+      case other => throw new IllegalArgumentException(
+        s"Unsupported function '$other'. Valid functions are 'extract' and 'date_part'.")
+    }
+    benchmark.addCase(s"$field of $from", numIters = 3) { _ =>
+      doBenchmark(cardinality, expr)
     }
   }
 
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
     val N = 10000000L
-    runBenchmark("Extract") {
-      run(N, "cast to timestamp", "cast(id as timestamp)")
-      run(N, "MILLENNIUM")
-      run(N, "CENTURY")
-      run(N, "DECADE")
-      run(N, "YEAR")
-      run(N, "ISOYEAR")
-      run(N, "QUARTER")
-      run(N, "MONTH")
-      run(N, "WEEK")
-      run(N, "DAY")
-      run(N, "DAYOFWEEK")
-      run(N, "DOW")
-      run(N, "ISODOW")
-      run(N, "DOY")
-      run(N, "HOUR")
-      run(N, "MINUTE")
-      run(N, "SECOND")
-      run(N, "MILLISECONDS")
-      run(N, "MICROSECONDS")
-      run(N, "EPOCH")
+    val fields = Seq(
+      "MILLENNIUM", "CENTURY", "DECADE", "YEAR",
+      "ISOYEAR", "QUARTER", "MONTH", "WEEK",
+      "DAY", "DAYOFWEEK", "DOW", "ISODOW",
+      "DOY", "HOUR", "MINUTE", "SECOND",
+      "MILLISECONDS", "MICROSECONDS", "EPOCH")
+
+    Seq("extract", "date_part").foreach { func =>
+      Seq("timestamp", "date").foreach { dateType =>
+        val benchmark = new Benchmark(s"Invoke $func for $dateType", N, output = output)
+
+        run(benchmark, N, s"cast to $dateType", castExpr(dateType))
+        fields.foreach(run(benchmark, func, N, _, dateType))
+
+        benchmark.run()
+      }
     }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org