You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2018/09/29 16:48:11 UTC

spark git commit: [SPARK-25508][SQL][TEST] Refactor OrcReadBenchmark to use main method

Repository: spark
Updated Branches:
  refs/heads/master 623c2ec4e -> f246813af


[SPARK-25508][SQL][TEST] Refactor OrcReadBenchmark to use main method

## What changes were proposed in this pull request?

Refactor OrcReadBenchmark to use main method.
Generate benchmark result:
```
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "hive/test:runMain org.apache.spark.sql.hive.orc.OrcReadBenchmark"
```
## How was this patch tested?

manual tests

Closes #22580 from yucai/SPARK-25508.

Lead-authored-by: yucai <yy...@ebay.com>
Co-authored-by: Yucai Yu <yu...@foxmail.com>
Co-authored-by: Dongjoon Hyun <do...@apache.org>
Signed-off-by: Dongjoon Hyun <do...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f246813a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f246813a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f246813a

Branch: refs/heads/master
Commit: f246813afba16fee4d703f09e6302011b11806f3
Parents: 623c2ec
Author: yucai <yy...@ebay.com>
Authored: Sat Sep 29 09:48:03 2018 -0700
Committer: Dongjoon Hyun <do...@apache.org>
Committed: Sat Sep 29 09:48:03 2018 -0700

----------------------------------------------------------------------
 .../benchmarks/OrcReadBenchmark-results.txt     | 173 ++++++++++++++++
 .../spark/sql/hive/orc/OrcReadBenchmark.scala   | 196 ++++---------------
 2 files changed, 212 insertions(+), 157 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f246813a/sql/hive/benchmarks/OrcReadBenchmark-results.txt
----------------------------------------------------------------------
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
new file mode 100644
index 0000000..c77f966
--- /dev/null
+++ b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
@@ -0,0 +1,173 @@
+================================================================================================
+SQL Single Numeric Column Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single TINYINT Column Scan:          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 1630 / 1639          9.7         103.6       1.0X
+Native ORC Vectorized                          253 /  288         62.2          16.1       6.4X
+Native ORC Vectorized with copy                227 /  244         69.2          14.5       7.2X
+Hive built-in ORC                             1980 / 1991          7.9         125.9       0.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single SMALLINT Column Scan:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 1587 / 1589          9.9         100.9       1.0X
+Native ORC Vectorized                          227 /  242         69.2          14.5       7.0X
+Native ORC Vectorized with copy                228 /  238         69.0          14.5       7.0X
+Hive built-in ORC                             2323 / 2332          6.8         147.7       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single INT Column Scan:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 1726 / 1771          9.1         109.7       1.0X
+Native ORC Vectorized                          309 /  333         50.9          19.7       5.6X
+Native ORC Vectorized with copy                313 /  321         50.2          19.9       5.5X
+Hive built-in ORC                             2668 / 2672          5.9         169.6       0.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single BIGINT Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 1722 / 1747          9.1         109.5       1.0X
+Native ORC Vectorized                          395 /  403         39.8          25.1       4.4X
+Native ORC Vectorized with copy                399 /  405         39.4          25.4       4.3X
+Hive built-in ORC                             2767 / 2777          5.7         175.9       0.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single FLOAT Column Scan:            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 1797 / 1824          8.8         114.2       1.0X
+Native ORC Vectorized                          434 /  441         36.2          27.6       4.1X
+Native ORC Vectorized with copy                437 /  447         36.0          27.8       4.1X
+Hive built-in ORC                             2701 / 2710          5.8         171.7       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single DOUBLE Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 1931 / 2028          8.1         122.8       1.0X
+Native ORC Vectorized                          542 /  557         29.0          34.5       3.6X
+Native ORC Vectorized with copy                550 /  564         28.6          35.0       3.5X
+Hive built-in ORC                             2816 / 3206          5.6         179.1       0.7X
+
+
+================================================================================================
+Int and String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Int and String Scan:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 4012 / 4068          2.6         382.6       1.0X
+Native ORC Vectorized                         2337 / 2339          4.5         222.9       1.7X
+Native ORC Vectorized with copy               2520 / 2540          4.2         240.3       1.6X
+Hive built-in ORC                             5503 / 5575          1.9         524.8       0.7X
+
+
+================================================================================================
+Partitioned Table Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Partitioned Table:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Data column - Native ORC MR                   2020 / 2025          7.8         128.4       1.0X
+Data column - Native ORC Vectorized            398 /  409         39.5          25.3       5.1X
+Data column - Native ORC Vectorized with copy       406 /  411         38.8          25.8       5.0X
+Data column - Hive built-in ORC               2967 / 2969          5.3         188.6       0.7X
+Partition column - Native ORC MR              1494 / 1505         10.5          95.0       1.4X
+Partition column - Native ORC Vectorized        73 /   82        216.3           4.6      27.8X
+Partition column - Native ORC Vectorized with copy        71 /   80        221.4           4.5      28.4X
+Partition column - Hive built-in ORC          1932 / 1937          8.1         122.8       1.0X
+Both columns - Native ORC MR                  2057 / 2071          7.6         130.8       1.0X
+Both columns - Native ORC Vectorized           445 /  448         35.4          28.3       4.5X
+Both column - Native ORC Vectorized with copy       534 /  539         29.4          34.0       3.8X
+Both columns - Hive built-in ORC              2994 / 2994          5.3         190.3       0.7X
+
+
+================================================================================================
+Repeated String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repeated String:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 1771 / 1785          5.9         168.9       1.0X
+Native ORC Vectorized                          372 /  375         28.2          35.5       4.8X
+Native ORC Vectorized with copy                543 /  576         19.3          51.8       3.3X
+Hive built-in ORC                             2671 / 2671          3.9         254.7       0.7X
+
+
+================================================================================================
+String with Nulls Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (0.0%):           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 3276 / 3302          3.2         312.5       1.0X
+Native ORC Vectorized                         1057 / 1080          9.9         100.8       3.1X
+Native ORC Vectorized with copy               1420 / 1431          7.4         135.4       2.3X
+Hive built-in ORC                             5377 / 5407          2.0         512.8       0.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (0.5%):           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 3147 / 3147          3.3         300.1       1.0X
+Native ORC Vectorized                         1305 / 1319          8.0         124.4       2.4X
+Native ORC Vectorized with copy               1685 / 1686          6.2         160.7       1.9X
+Hive built-in ORC                             4077 / 4085          2.6         388.8       0.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (0.95%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 1739 / 1744          6.0         165.8       1.0X
+Native ORC Vectorized                          500 /  501         21.0          47.7       3.5X
+Native ORC Vectorized with copy                618 /  631         17.0          58.9       2.8X
+Hive built-in ORC                             2411 / 2427          4.3         229.9       0.7X
+
+
+================================================================================================
+Single Column Scan From Wide Columns
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 100 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 1348 / 1366          0.8        1285.3       1.0X
+Native ORC Vectorized                          119 /  134          8.8         113.5      11.3X
+Native ORC Vectorized with copy                119 /  148          8.8         113.9      11.3X
+Hive built-in ORC                              487 /  507          2.2         464.8       2.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 200 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 2667 / 2837          0.4        2543.6       1.0X
+Native ORC Vectorized                          203 /  222          5.2         193.4      13.2X
+Native ORC Vectorized with copy                217 /  255          4.8         207.0      12.3X
+Hive built-in ORC                              737 /  741          1.4         702.4       3.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 300 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Native ORC MR                                 3954 / 3956          0.3        3770.4       1.0X
+Native ORC Vectorized                          348 /  360          3.0         331.7      11.4X
+Native ORC Vectorized with copy                349 /  359          3.0         333.2      11.3X
+Hive built-in ORC                             1057 / 1067          1.0        1008.0       3.7X
+
+

http://git-wip-us.apache.org/repos/asf/spark/blob/f246813a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
index 49de007..0bb5e8c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
@@ -22,20 +22,26 @@ import java.io.File
 import scala.util.Random
 
 import org.apache.spark.SparkConf
-import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-
 /**
  * Benchmark to measure ORC read performance.
+ * {{{
+ *   To run this benchmark:
+ *   1. without sbt: bin/spark-submit --class <this class> <spark sql test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/OrcReadBenchmark-results.txt".
+ * }}}
  *
  * This is in `sql/hive` module in order to compare `sql/core` and `sql/hive` ORC data sources.
  */
 // scalastyle:off line.size.limit
-object OrcReadBenchmark extends SQLHelper {
+object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
   val conf = new SparkConf()
   conf.set("orc.compression", "snappy")
 
@@ -69,7 +75,7 @@ object OrcReadBenchmark extends SQLHelper {
   }
 
   def numericScanBenchmark(values: Int, dataType: DataType): Unit = {
-    val benchmark = new Benchmark(s"SQL Single ${dataType.sql} Column Scan", values)
+    val benchmark = new Benchmark(s"SQL Single ${dataType.sql} Column Scan", values, output = output)
 
     withTempPath { dir =>
       withTempTable("t1", "nativeOrcTable", "hiveOrcTable") {
@@ -98,59 +104,13 @@ object OrcReadBenchmark extends SQLHelper {
           spark.sql("SELECT sum(id) FROM hiveOrcTable").collect()
         }
 
-        /*
-        Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
-        Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-        SQL Single TINYINT Column Scan:          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 1135 / 1171         13.9          72.2       1.0X
-        Native ORC Vectorized                          152 /  163        103.4           9.7       7.5X
-        Native ORC Vectorized with copy                149 /  162        105.4           9.5       7.6X
-        Hive built-in ORC                             1380 / 1384         11.4          87.7       0.8X
-
-        SQL Single SMALLINT Column Scan:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 1182 / 1244         13.3          75.2       1.0X
-        Native ORC Vectorized                          145 /  156        108.7           9.2       8.2X
-        Native ORC Vectorized with copy                148 /  158        106.4           9.4       8.0X
-        Hive built-in ORC                             1591 / 1636          9.9         101.2       0.7X
-
-        SQL Single INT Column Scan:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 1271 / 1271         12.4          80.8       1.0X
-        Native ORC Vectorized                          206 /  212         76.3          13.1       6.2X
-        Native ORC Vectorized with copy                200 /  213         78.8          12.7       6.4X
-        Hive built-in ORC                             1776 / 1787          8.9         112.9       0.7X
-
-        SQL Single BIGINT Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 1344 / 1355         11.7          85.4       1.0X
-        Native ORC Vectorized                          258 /  268         61.0          16.4       5.2X
-        Native ORC Vectorized with copy                252 /  257         62.4          16.0       5.3X
-        Hive built-in ORC                             1818 / 1823          8.7         115.6       0.7X
-
-        SQL Single FLOAT Column Scan:            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 1333 / 1352         11.8          84.8       1.0X
-        Native ORC Vectorized                          310 /  324         50.7          19.7       4.3X
-        Native ORC Vectorized with copy                312 /  320         50.4          19.9       4.3X
-        Hive built-in ORC                             1904 / 1918          8.3         121.0       0.7X
-
-        SQL Single DOUBLE Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 1408 / 1585         11.2          89.5       1.0X
-        Native ORC Vectorized                          359 /  368         43.8          22.8       3.9X
-        Native ORC Vectorized with copy                364 /  371         43.2          23.2       3.9X
-        Hive built-in ORC                             1881 / 1954          8.4         119.6       0.7X
-        */
         benchmark.run()
       }
     }
   }
 
   def intStringScanBenchmark(values: Int): Unit = {
-    val benchmark = new Benchmark("Int and String Scan", values)
+    val benchmark = new Benchmark("Int and String Scan", values, output = output)
 
     withTempPath { dir =>
       withTempTable("t1", "nativeOrcTable", "hiveOrcTable") {
@@ -181,24 +141,13 @@ object OrcReadBenchmark extends SQLHelper {
           spark.sql("SELECT sum(c1), sum(length(c2)) FROM hiveOrcTable").collect()
         }
 
-        /*
-        Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
-        Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-        Int and String Scan:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 2566 / 2592          4.1         244.7       1.0X
-        Native ORC Vectorized                         1098 / 1113          9.6         104.7       2.3X
-        Native ORC Vectorized with copy               1527 / 1593          6.9         145.6       1.7X
-        Hive built-in ORC                             3561 / 3705          2.9         339.6       0.7X
-        */
         benchmark.run()
       }
     }
   }
 
   def partitionTableScanBenchmark(values: Int): Unit = {
-    val benchmark = new Benchmark("Partitioned Table", values)
+    val benchmark = new Benchmark("Partitioned Table", values, output = output)
 
     withTempPath { dir =>
       withTempTable("t1", "nativeOrcTable", "hiveOrcTable") {
@@ -267,32 +216,13 @@ object OrcReadBenchmark extends SQLHelper {
           spark.sql("SELECT sum(p), sum(id) FROM hiveOrcTable").collect()
         }
 
-        /*
-        Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
-        Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-        Partitioned Table:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Data only - Native ORC MR                      1447 / 1457         10.9          92.0       1.0X
-        Data only - Native ORC Vectorized               256 /  266         61.4          16.3       5.6X
-        Data only - Native ORC Vectorized with copy     263 /  273         59.8          16.7       5.5X
-        Data only - Hive built-in ORC                  1960 / 1988          8.0         124.6       0.7X
-        Partition only - Native ORC MR                 1039 / 1043         15.1          66.0       1.4X
-        Partition only - Native ORC Vectorized           48 /   53        326.6           3.1      30.1X
-        Partition only - Native ORC Vectorized with copy 48 /   53        328.4           3.0      30.2X
-        Partition only - Hive built-in ORC             1234 / 1242         12.7          78.4       1.2X
-        Both columns - Native ORC MR                   1465 / 1475         10.7          93.1       1.0X
-        Both columns - Native ORC Vectorized            292 /  301         53.9          18.6       5.0X
-        Both column - Native ORC Vectorized with copy   348 /  354         45.1          22.2       4.2X
-        Both columns - Hive built-in ORC               2051 / 2060          7.7         130.4       0.7X
-        */
         benchmark.run()
       }
     }
   }
 
   def repeatedStringScanBenchmark(values: Int): Unit = {
-    val benchmark = new Benchmark("Repeated String", values)
+    val benchmark = new Benchmark("Repeated String", values, output = output)
 
     withTempPath { dir =>
       withTempTable("t1", "nativeOrcTable", "hiveOrcTable") {
@@ -320,17 +250,6 @@ object OrcReadBenchmark extends SQLHelper {
           spark.sql("SELECT sum(length(c1)) FROM hiveOrcTable").collect()
         }
 
-        /*
-        Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
-        Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-        Repeated String:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 1271 / 1278          8.3         121.2       1.0X
-        Native ORC Vectorized                          200 /  212         52.4          19.1       6.4X
-        Native ORC Vectorized with copy                342 /  347         30.7          32.6       3.7X
-        Hive built-in ORC                             1874 / 2105          5.6         178.7       0.7X
-        */
         benchmark.run()
       }
     }
@@ -347,7 +266,8 @@ object OrcReadBenchmark extends SQLHelper {
             s"SELECT IF(RAND(1) < $fractionOfNulls, NULL, CAST(id as STRING)) AS c1, " +
             s"IF(RAND(2) < $fractionOfNulls, NULL, CAST(id as STRING)) AS c2 FROM t1"))
 
-        val benchmark = new Benchmark(s"String with Nulls Scan ($fractionOfNulls%)", values)
+        val benchmark =
+          new Benchmark(s"String with Nulls Scan ($fractionOfNulls%)", values, output = output)
 
         benchmark.addCase("Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
@@ -373,38 +293,13 @@ object OrcReadBenchmark extends SQLHelper {
             "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
         }
 
-        /*
-        Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
-        Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-        String with Nulls Scan (0.0%):           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 2394 / 2886          4.4         228.3       1.0X
-        Native ORC Vectorized                          699 /  729         15.0          66.7       3.4X
-        Native ORC Vectorized with copy                959 / 1025         10.9          91.5       2.5X
-        Hive built-in ORC                             3899 / 3901          2.7         371.9       0.6X
-
-        String with Nulls Scan (0.5%):           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 2234 / 2255          4.7         213.1       1.0X
-        Native ORC Vectorized                          854 /  869         12.3          81.4       2.6X
-        Native ORC Vectorized with copy               1099 / 1128          9.5         104.8       2.0X
-        Hive built-in ORC                             2767 / 2793          3.8         263.9       0.8X
-
-        String with Nulls Scan (0.95%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 1166 / 1202          9.0         111.2       1.0X
-        Native ORC Vectorized                          338 /  345         31.1          32.2       3.5X
-        Native ORC Vectorized with copy                418 /  428         25.1          39.9       2.8X
-        Hive built-in ORC                             1730 / 1761          6.1         164.9       0.7X
-        */
         benchmark.run()
       }
     }
   }
 
   def columnsBenchmark(values: Int, width: Int): Unit = {
-    val benchmark = new Benchmark(s"Single Column Scan from $width columns", values)
+    val benchmark = new Benchmark(s"Single Column Scan from $width columns", values, output = output)
 
     withTempPath { dir =>
       withTempTable("t1", "nativeOrcTable", "hiveOrcTable") {
@@ -436,49 +331,36 @@ object OrcReadBenchmark extends SQLHelper {
           spark.sql(s"SELECT sum(c$middle) FROM hiveOrcTable").collect()
         }
 
-        /*
-        Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
-        Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-        Single Column Scan from 100 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 1050 / 1053          1.0        1001.1       1.0X
-        Native ORC Vectorized                           95 /  101         11.0          90.9      11.0X
-        Native ORC Vectorized with copy                 95 /  102         11.0          90.9      11.0X
-        Hive built-in ORC                              348 /  358          3.0         331.8       3.0X
-
-        Single Column Scan from 200 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 2099 / 2108          0.5        2002.1       1.0X
-        Native ORC Vectorized                          179 /  187          5.8         171.1      11.7X
-        Native ORC Vectorized with copy                176 /  188          6.0         167.6      11.9X
-        Hive built-in ORC                              562 /  581          1.9         535.9       3.7X
-
-        Single Column Scan from 300 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-        ------------------------------------------------------------------------------------------------
-        Native ORC MR                                 3221 / 3246          0.3        3071.4       1.0X
-        Native ORC Vectorized                          312 /  322          3.4         298.0      10.3X
-        Native ORC Vectorized with copy                306 /  320          3.4         291.6      10.5X
-        Hive built-in ORC                              815 /  824          1.3         777.3       4.0X
-        */
         benchmark.run()
       }
     }
   }
 
-  def main(args: Array[String]): Unit = {
-    Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType).foreach { dataType =>
-      numericScanBenchmark(1024 * 1024 * 15, dataType)
+  override def benchmark(): Unit = {
+    runBenchmark("SQL Single Numeric Column Scan") {
+      Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType).foreach { dataType =>
+        numericScanBenchmark(1024 * 1024 * 15, dataType)
+      }
+    }
+    runBenchmark("Int and String Scan") {
+      intStringScanBenchmark(1024 * 1024 * 10)
+    }
+    runBenchmark("Partitioned Table Scan") {
+      partitionTableScanBenchmark(1024 * 1024 * 15)
+    }
+    runBenchmark("Repeated String Scan") {
+      repeatedStringScanBenchmark(1024 * 1024 * 10)
+    }
+    runBenchmark("String with Nulls Scan") {
+      for (fractionOfNulls <- List(0.0, 0.50, 0.95)) {
+        stringWithNullsScanBenchmark(1024 * 1024 * 10, fractionOfNulls)
+      }
     }
-    intStringScanBenchmark(1024 * 1024 * 10)
-    partitionTableScanBenchmark(1024 * 1024 * 15)
-    repeatedStringScanBenchmark(1024 * 1024 * 10)
-    for (fractionOfNulls <- List(0.0, 0.50, 0.95)) {
-      stringWithNullsScanBenchmark(1024 * 1024 * 10, fractionOfNulls)
+    runBenchmark("Single Column Scan From Wide Columns") {
+      columnsBenchmark(1024 * 1024 * 1, 100)
+      columnsBenchmark(1024 * 1024 * 1, 200)
+      columnsBenchmark(1024 * 1024 * 1, 300)
     }
-    columnsBenchmark(1024 * 1024 * 1, 100)
-    columnsBenchmark(1024 * 1024 * 1, 200)
-    columnsBenchmark(1024 * 1024 * 1, 300)
   }
 }
 // scalastyle:on line.size.limit


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org