You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/07/02 20:28:53 UTC

[spark] branch master updated: [SPARK-32130][SQL][FOLLOWUP] Enable timestamps inference in JsonBenchmark

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 42f01e3  [SPARK-32130][SQL][FOLLOWUP] Enable timestamps inference in JsonBenchmark
42f01e3 is described below

commit 42f01e314b4874236544cc8b94bef766269385ee
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Thu Jul 2 13:26:57 2020 -0700

    [SPARK-32130][SQL][FOLLOWUP] Enable timestamps inference in JsonBenchmark
    
    ### What changes were proposed in this pull request?
    Set the JSON option `inferTimestamp` to `true` for the cases that measure perf of timestamp inference.
    
    ### Why are the changes needed?
    The PR https://github.com/apache/spark/pull/28966 disabled timestamp inference by default. As a consequence, some benchmarks don't measure perf of timestamp inference from JSON fields. This PR explicitly enable such inference.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    By re-generating results of `JsonBenchmark`.
    
    Closes #28981 from MaxGekk/json-inferTimestamps-disable-by-default-followup.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../benchmarks/JsonBenchmark-jdk11-results.txt     | 86 +++++++++++-----------
 sql/core/benchmarks/JsonBenchmark-results.txt      | 86 +++++++++++-----------
 .../execution/datasources/json/JsonBenchmark.scala |  4 +-
 3 files changed, 88 insertions(+), 88 deletions(-)

diff --git a/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt b/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
index ff37084..2d506f0 100644
--- a/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
@@ -7,106 +7,106 @@ OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-106
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       69219          69342         116          1.4         692.2       1.0X
-UTF-8 is set                                     143950         143986          55          0.7        1439.5       0.5X
+No encoding                                       73307          73400         141          1.4         733.1       1.0X
+UTF-8 is set                                     143834         143925         152          0.7        1438.3       0.5X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       57828          57913         136          1.7         578.3       1.0X
-UTF-8 is set                                      83649          83711          60          1.2         836.5       0.7X
+No encoding                                       50894          51065         292          2.0         508.9       1.0X
+UTF-8 is set                                      98462          99455        1173          1.0         984.6       0.5X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       64560          65193        1023          0.2        6456.0       1.0X
-UTF-8 is set                                     102925         103174         216          0.1       10292.5       0.6X
+No encoding                                       64011          64969        1001          0.2        6401.1       1.0X
+UTF-8 is set                                     102757         102984         311          0.1       10275.7       0.6X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                      131002         132316        1160          0.0      262003.1       1.0X
-UTF-8 is set                                     152128         152371         332          0.0      304256.5       0.9X
+No encoding                                      132559         133561        1010          0.0      265117.3       1.0X
+UTF-8 is set                                     151458         152129         611          0.0      302915.4       0.9X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                 19376          19514         160          0.5        1937.6       1.0X
-Select 1 column                                   24089          24156          58          0.4        2408.9       0.8X
+Select 10 columns                                 21148          21202          87          0.5        2114.8       1.0X
+Select 1 column                                   24701          24724          21          0.4        2470.1       0.9X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                      8131           8219         103          1.2         813.1       1.0X
-Short column with UTF-8                           13464          13508          44          0.7        1346.4       0.6X
-Wide column without encoding                     108012         108598         914          0.1       10801.2       0.1X
-Wide column with UTF-8                           150988         151369         412          0.1       15098.8       0.1X
+Short column without encoding                      6945           6998          59          1.4         694.5       1.0X
+Short column with UTF-8                           11510          11569          51          0.9        1151.0       0.6X
+Wide column without encoding                      95004          95795         790          0.1        9500.4       0.1X
+Wide column with UTF-8                           149223         149409         276          0.1       14922.3       0.0X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           753            765          18         13.3          75.3       1.0X
-from_json                                         23182          23446         230          0.4        2318.2       0.0X
-json_tuple                                        31129          31304         181          0.3        3112.9       0.0X
-get_json_object                                   22821          23073         225          0.4        2282.1       0.0X
+Text read                                           649            652           3         15.4          64.9       1.0X
+from_json                                         22284          22393          99          0.4        2228.4       0.0X
+json_tuple                                        32310          32824         484          0.3        3231.0       0.0X
+get_json_object                                   22111          22751         568          0.5        2211.1       0.0X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                          3078           3101          26         16.2          61.6       1.0X
-schema inferring                                  30225          30434         333          1.7         604.5       0.1X
-parsing                                           32237          32308          63          1.6         644.7       0.1X
+Text read                                          2894           2903           8         17.3          57.9       1.0X
+schema inferring                                  26724          26785          62          1.9         534.5       0.1X
+parsing                                           37502          37632         131          1.3         750.0       0.1X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                         10835          10900          86          4.6         216.7       1.0X
-Schema inferring                                  37720          37805         110          1.3         754.4       0.3X
-Parsing without charset                           35464          35538         100          1.4         709.3       0.3X
-Parsing with UTF-8                                67311          67738         381          0.7        1346.2       0.2X
+Text read                                         10994          11010          16          4.5         219.9       1.0X
+Schema inferring                                  45654          45677          37          1.1         913.1       0.2X
+Parsing without charset                           34476          34559          73          1.5         689.5       0.3X
+Parsing with UTF-8                                56987          57002          13          0.9        1139.7       0.2X
 
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     2208           2222          14          4.5         220.8       1.0X
-to_json(timestamp)                                14299          14570         285          0.7        1429.9       0.2X
-write timestamps to files                         12955          12969          13          0.8        1295.5       0.2X
-Create a dataset of dates                          2297           2323          30          4.4         229.7       1.0X
-to_json(date)                                      8509           8561          74          1.2         850.9       0.3X
-write dates to files                               6786           6827          45          1.5         678.6       0.3X
+Create a dataset of timestamps                     2150           2188          35          4.7         215.0       1.0X
+to_json(timestamp)                                17874          18080         294          0.6        1787.4       0.1X
+write timestamps to files                         12518          12538          34          0.8        1251.8       0.2X
+Create a dataset of dates                          2298           2310          18          4.4         229.8       0.9X
+to_json(date)                                     11673          11703          27          0.9        1167.3       0.2X
+write dates to files                               7121           7135          12          1.4         712.1       0.3X
 
 OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     2598           2613          18          3.8         259.8       1.0X
-read timestamps from files                        42007          42028          19          0.2        4200.7       0.1X
-infer timestamps from files                       18102          18120          28          0.6        1810.2       0.1X
-read date text from files                          2355           2360           5          4.2         235.5       1.1X
-read date from files                              17420          17458          33          0.6        1742.0       0.1X
-timestamp strings                                  3099           3101           3          3.2         309.9       0.8X
-parse timestamps from Dataset[String]             48188          48215          25          0.2        4818.8       0.1X
-infer timestamps from Dataset[String]             22929          22988         102          0.4        2292.9       0.1X
-date strings                                       4090           4103          11          2.4         409.0       0.6X
-parse dates from Dataset[String]                  24952          25068         139          0.4        2495.2       0.1X
-from_json(timestamp)                              66038          66352         413          0.2        6603.8       0.0X
-from_json(date)                                   43755          43782          27          0.2        4375.5       0.1X
+read timestamp text from files                     2616           2641          34          3.8         261.6       1.0X
+read timestamps from files                        37481          37517          58          0.3        3748.1       0.1X
+infer timestamps from files                       84774          84964         201          0.1        8477.4       0.0X
+read date text from files                          2362           2365           3          4.2         236.2       1.1X
+read date from files                              16583          16612          29          0.6        1658.3       0.2X
+timestamp strings                                  3927           3963          40          2.5         392.7       0.7X
+parse timestamps from Dataset[String]             52827          53004         243          0.2        5282.7       0.0X
+infer timestamps from Dataset[String]            101108         101644         769          0.1       10110.8       0.0X
+date strings                                       4886           4906          26          2.0         488.6       0.5X
+parse dates from Dataset[String]                  27623          27694          62          0.4        2762.3       0.1X
+from_json(timestamp)                              71764          71887         124          0.1        7176.4       0.0X
+from_json(date)                                   46200          46314          99          0.2        4620.0       0.1X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt b/sql/core/benchmarks/JsonBenchmark-results.txt
index 0e4ce90..c22118f 100644
--- a/sql/core/benchmarks/JsonBenchmark-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-results.txt
@@ -7,106 +7,106 @@ OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aw
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       64950          65182         306          1.5         649.5       1.0X
-UTF-8 is set                                     129566         129796         229          0.8        1295.7       0.5X
+No encoding                                       63839          64000         263          1.6         638.4       1.0X
+UTF-8 is set                                     124633         124945         429          0.8        1246.3       0.5X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       50896          51277         372          2.0         509.0       1.0X
-UTF-8 is set                                      89712          89763          49          1.1         897.1       0.6X
+No encoding                                       51720          51901         157          1.9         517.2       1.0X
+UTF-8 is set                                      91161          91190          25          1.1         911.6       0.6X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       59415          59785         372          0.2        5941.5       1.0X
-UTF-8 is set                                     103059         103165         156          0.1       10305.9       0.6X
+No encoding                                       58486          59038         714          0.2        5848.6       1.0X
+UTF-8 is set                                     103045         103350         358          0.1       10304.5       0.6X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                      132951         133122         288          0.0      265901.9       1.0X
-UTF-8 is set                                     149318         149441         107          0.0      298635.3       0.9X
+No encoding                                      134909         135024         105          0.0      269818.6       1.0X
+UTF-8 is set                                     154418         154593         155          0.0      308836.7       0.9X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                 18491          18552          85          0.5        1849.1       1.0X
-Select 1 column                                   25908          25946          65          0.4        2590.8       0.7X
+Select 10 columns                                 19538          19620          70          0.5        1953.8       1.0X
+Select 1 column                                   26142          26159          15          0.4        2614.2       0.7X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                      9264           9307          49          1.1         926.4       1.0X
-Short column with UTF-8                           14707          14727          17          0.7        1470.7       0.6X
-Wide column without encoding                     141138         141347         276          0.1       14113.8       0.1X
-Wide column with UTF-8                           179601         180035         664          0.1       17960.1       0.1X
+Short column without encoding                      8103           8162          53          1.2         810.3       1.0X
+Short column with UTF-8                           13104          13150          58          0.8        1310.4       0.6X
+Wide column without encoding                     135280         135593         375          0.1       13528.0       0.1X
+Wide column with UTF-8                           175189         175483         278          0.1       17518.9       0.0X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                          1173           1184           9          8.5         117.3       1.0X
-from_json                                         23432          23738         338          0.4        2343.2       0.1X
-json_tuple                                        32573          32851         358          0.3        3257.3       0.0X
-get_json_object                                   22442          22489          47          0.4        2244.2       0.1X
+Text read                                          1225           1234           8          8.2         122.5       1.0X
+from_json                                         22482          22552          95          0.4        2248.2       0.1X
+json_tuple                                        30203          30338         146          0.3        3020.3       0.0X
+get_json_object                                   22219          22245          26          0.5        2221.9       0.1X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                          5656           5680          31          8.8         113.1       1.0X
-schema inferring                                  33283          33337          64          1.5         665.7       0.2X
-parsing                                           41771          41929         178          1.2         835.4       0.1X
+Text read                                          5897           5904          10          8.5         117.9       1.0X
+schema inferring                                  30282          30340          50          1.7         605.6       0.2X
+parsing                                           33304          33577         289          1.5         666.1       0.2X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                          9626           9668          39          5.2         192.5       1.0X
-Schema inferring                                  39489          39579          91          1.3         789.8       0.2X
-Parsing without charset                           38096          38232         125          1.3         761.9       0.3X
-Parsing with UTF-8                                64565          64725         165          0.8        1291.3       0.1X
+Text read                                          9710           9757          80          5.1         194.2       1.0X
+Schema inferring                                  35929          35939           9          1.4         718.6       0.3X
+Parsing without charset                           39175          39227          87          1.3         783.5       0.2X
+Parsing with UTF-8                                59188          59294         109          0.8        1183.8       0.2X
 
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1898           1912          13          5.3         189.8       1.0X
-to_json(timestamp)                                20011          20092         119          0.5        2001.1       0.1X
-write timestamps to files                         13388          13427          35          0.7        1338.8       0.1X
-Create a dataset of dates                          2351           2368          18          4.3         235.1       0.8X
-to_json(date)                                     11884          11913          40          0.8        1188.4       0.2X
-write dates to files                               7317           7326           9          1.4         731.7       0.3X
+Create a dataset of timestamps                     1967           1977           9          5.1         196.7       1.0X
+to_json(timestamp)                                17086          17304         371          0.6        1708.6       0.1X
+write timestamps to files                         12691          12716          28          0.8        1269.1       0.2X
+Create a dataset of dates                          2192           2217          39          4.6         219.2       0.9X
+to_json(date)                                     10541          10656         137          0.9        1054.1       0.2X
+write dates to files                               7259           7311          46          1.4         725.9       0.3X
 
 OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     2316           2324          13          4.3         231.6       1.0X
-read timestamps from files                        43712          43900         165          0.2        4371.2       0.1X
-infer timestamps from files                       19302          19328          38          0.5        1930.2       0.1X
-read date text from files                          2090           2099          11          4.8         209.0       1.1X
-read date from files                              18914          18940          44          0.5        1891.4       0.1X
-timestamp strings                                  3785           3793          11          2.6         378.5       0.6X
-parse timestamps from Dataset[String]             51177          51353         160          0.2        5117.7       0.0X
-infer timestamps from Dataset[String]             27907          28119         186          0.4        2790.7       0.1X
-date strings                                       4446           4452           6          2.2         444.6       0.5X
-parse dates from Dataset[String]                  28124          28172          55          0.4        2812.4       0.1X
-from_json(timestamp)                              71432          71827         354          0.1        7143.2       0.0X
-from_json(date)                                   46497          46651         163          0.2        4649.7       0.0X
+read timestamp text from files                     2318           2326          13          4.3         231.8       1.0X
+read timestamps from files                        43345          43627         258          0.2        4334.5       0.1X
+infer timestamps from files                       89570          89621          59          0.1        8957.0       0.0X
+read date text from files                          2099           2107           9          4.8         209.9       1.1X
+read date from files                              18000          18065          98          0.6        1800.0       0.1X
+timestamp strings                                  3937           3956          32          2.5         393.7       0.6X
+parse timestamps from Dataset[String]             56001          56429         539          0.2        5600.1       0.0X
+infer timestamps from Dataset[String]            109410         109963         559          0.1       10941.0       0.0X
+date strings                                       4530           4540           9          2.2         453.0       0.5X
+parse dates from Dataset[String]                  29723          29767          72          0.3        2972.3       0.1X
+from_json(timestamp)                              74106          74619         728          0.1        7410.6       0.0X
+from_json(date)                                   46599          46632          32          0.2        4659.9       0.0X
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index 5693088..0dbd6b5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -430,7 +430,7 @@ object JsonBenchmark extends SqlBasedBenchmark {
       }
 
       readBench.addCase("infer timestamps from files", numIters) { _ =>
-        spark.read.json(timestampDir).noop()
+        spark.read.option("inferTimestamp", true).json(timestampDir).noop()
       }
 
       val dateSchema = new StructType().add("date", DateType)
@@ -460,7 +460,7 @@ object JsonBenchmark extends SqlBasedBenchmark {
       }
 
       readBench.addCase("infer timestamps from Dataset[String]", numIters) { _ =>
-        spark.read.json(timestampStr).noop()
+        spark.read.option("inferTimestamp", true).json(timestampStr).noop()
       }
 
       def dateStr: Dataset[String] = {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org