You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/07/06 03:14:05 UTC

spark git commit: [SPARK-24692][TESTS] Improvement FilterPushdownBenchmark

Repository: spark
Updated Branches:
  refs/heads/master 01fcba2c6 -> bf67f70c4


[SPARK-24692][TESTS] Improvement FilterPushdownBenchmark

## What changes were proposed in this pull request?
Refer to the [`WideSchemaBenchmark`](https://github.com/apache/spark/blob/v2.3.1/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala) update `FilterPushdownBenchmark`:
1. Write the result to `benchmarks/FilterPushdownBenchmark-results.txt` for easy maintenance.
2. Add more benchmark case: `StringStartsWith`, `Decimal`, `InSet -> InFilters` and `tinyint`.

## How was this patch tested?

manual tests

Author: Yuming Wang <yu...@ebay.com>

Closes #21677 from wangyum/SPARK-24692.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bf67f70c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bf67f70c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bf67f70c

Branch: refs/heads/master
Commit: bf67f70c48881ee99751f7d51fbcbda1e593d90a
Parents: 01fcba2
Author: Yuming Wang <yu...@ebay.com>
Authored: Fri Jul 6 11:13:57 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Fri Jul 6 11:13:57 2018 +0800

----------------------------------------------------------------------
 .../FilterPushdownBenchmark-results.txt         | 580 +++++++++++++++++++
 .../benchmark/FilterPushdownBenchmark.scala     | 405 ++++++-------
 2 files changed, 748 insertions(+), 237 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/bf67f70c/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
----------------------------------------------------------------------
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
new file mode 100644
index 0000000..29fe434
--- /dev/null
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
@@ -0,0 +1,580 @@
+================================================================================================
+Pushdown for many distinct value case
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 0 string row (value IS NULL):     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            8970 / 9122          1.8         570.3       1.0X
+Parquet Vectorized (Pushdown)                  471 /  491         33.4          30.0      19.0X
+Native ORC Vectorized                         7661 / 7853          2.1         487.0       1.2X
+Native ORC Vectorized (Pushdown)              1134 / 1161         13.9          72.1       7.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 0 string row ('7864320' < value < '7864320'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            9246 / 9297          1.7         587.8       1.0X
+Parquet Vectorized (Pushdown)                  480 /  488         32.8          30.5      19.3X
+Native ORC Vectorized                         7838 / 7850          2.0         498.3       1.2X
+Native ORC Vectorized (Pushdown)              1054 / 1118         14.9          67.0       8.8X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 string row (value = '7864320'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            8989 / 9100          1.7         571.5       1.0X
+Parquet Vectorized (Pushdown)                  448 /  467         35.1          28.5      20.1X
+Native ORC Vectorized                         7680 / 7768          2.0         488.3       1.2X
+Native ORC Vectorized (Pushdown)              1067 / 1118         14.7          67.8       8.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 string row (value <=> '7864320'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            9115 / 9266          1.7         579.5       1.0X
+Parquet Vectorized (Pushdown)                  466 /  492         33.7          29.7      19.5X
+Native ORC Vectorized                         7800 / 7914          2.0         495.9       1.2X
+Native ORC Vectorized (Pushdown)              1075 / 1102         14.6          68.4       8.5X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 string row ('7864320' <= value <= '7864320'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            9099 / 9237          1.7         578.5       1.0X
+Parquet Vectorized (Pushdown)                  462 /  475         34.1          29.3      19.7X
+Native ORC Vectorized                         7847 / 7925          2.0         498.9       1.2X
+Native ORC Vectorized (Pushdown)              1078 / 1114         14.6          68.5       8.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select all string rows (value IS NOT NULL): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                          19303 / 19547          0.8        1227.3       1.0X
+Parquet Vectorized (Pushdown)               19924 / 20089          0.8        1266.7       1.0X
+Native ORC Vectorized                       18725 / 19079          0.8        1190.5       1.0X
+Native ORC Vectorized (Pushdown)            19310 / 19492          0.8        1227.7       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 0 int row (value IS NULL):        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            8117 / 8323          1.9         516.1       1.0X
+Parquet Vectorized (Pushdown)                  484 /  494         32.5          30.8      16.8X
+Native ORC Vectorized                         6811 / 7036          2.3         433.0       1.2X
+Native ORC Vectorized (Pushdown)              1061 / 1082         14.8          67.5       7.6X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 0 int row (7864320 < value < 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            8105 / 8140          1.9         515.3       1.0X
+Parquet Vectorized (Pushdown)                  478 /  505         32.9          30.4      17.0X
+Native ORC Vectorized                         6914 / 7211          2.3         439.6       1.2X
+Native ORC Vectorized (Pushdown)              1044 / 1064         15.1          66.4       7.8X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 int row (value = 7864320):      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7983 / 8116          2.0         507.6       1.0X
+Parquet Vectorized (Pushdown)                  464 /  487         33.9          29.5      17.2X
+Native ORC Vectorized                         6703 / 6774          2.3         426.1       1.2X
+Native ORC Vectorized (Pushdown)              1017 / 1058         15.5          64.6       7.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 int row (value <=> 7864320):    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7942 / 7983          2.0         504.9       1.0X
+Parquet Vectorized (Pushdown)                  468 /  479         33.6          29.7      17.0X
+Native ORC Vectorized                         6677 / 6779          2.4         424.5       1.2X
+Native ORC Vectorized (Pushdown)              1021 / 1068         15.4          64.9       7.8X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 int row (7864320 <= value <= 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7909 / 7958          2.0         502.8       1.0X
+Parquet Vectorized (Pushdown)                  485 /  494         32.4          30.8      16.3X
+Native ORC Vectorized                         6751 / 6846          2.3         429.2       1.2X
+Native ORC Vectorized (Pushdown)              1043 / 1077         15.1          66.3       7.6X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 int row (7864319 < value < 7864321): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            8010 / 8033          2.0         509.2       1.0X
+Parquet Vectorized (Pushdown)                  472 /  489         33.3          30.0      17.0X
+Native ORC Vectorized                         6655 / 6808          2.4         423.1       1.2X
+Native ORC Vectorized (Pushdown)              1015 / 1067         15.5          64.5       7.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 10% int rows (value < 1572864):   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            8983 / 9035          1.8         571.1       1.0X
+Parquet Vectorized (Pushdown)                 2204 / 2231          7.1         140.1       4.1X
+Native ORC Vectorized                         7864 / 8011          2.0         500.0       1.1X
+Native ORC Vectorized (Pushdown)              2674 / 2789          5.9         170.0       3.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 50% int rows (value < 7864320):   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                          12723 / 12903          1.2         808.9       1.0X
+Parquet Vectorized (Pushdown)                 9112 / 9282          1.7         579.3       1.4X
+Native ORC Vectorized                       12090 / 12230          1.3         768.7       1.1X
+Native ORC Vectorized (Pushdown)              9242 / 9372          1.7         587.6       1.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 90% int rows (value < 14155776):  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                          16453 / 16678          1.0        1046.1       1.0X
+Parquet Vectorized (Pushdown)               15997 / 16262          1.0        1017.0       1.0X
+Native ORC Vectorized                       16652 / 17070          0.9        1058.7       1.0X
+Native ORC Vectorized (Pushdown)            15843 / 16112          1.0        1007.2       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select all int rows (value IS NOT NULL): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                          17098 / 17254          0.9        1087.1       1.0X
+Parquet Vectorized (Pushdown)               17302 / 17529          0.9        1100.1       1.0X
+Native ORC Vectorized                       16790 / 17098          0.9        1067.5       1.0X
+Native ORC Vectorized (Pushdown)            17329 / 17914          0.9        1101.7       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select all int rows (value > -1):        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                          17088 / 17392          0.9        1086.4       1.0X
+Parquet Vectorized (Pushdown)               17609 / 17863          0.9        1119.5       1.0X
+Native ORC Vectorized                       18334 / 69831          0.9        1165.7       0.9X
+Native ORC Vectorized (Pushdown)            17465 / 17629          0.9        1110.4       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select all int rows (value != -1):       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                          16903 / 17233          0.9        1074.6       1.0X
+Parquet Vectorized (Pushdown)               16945 / 17032          0.9        1077.3       1.0X
+Native ORC Vectorized                       16377 / 16762          1.0        1041.2       1.0X
+Native ORC Vectorized (Pushdown)            16950 / 17212          0.9        1077.7       1.0X
+
+
+================================================================================================
+Pushdown for few distinct value case (use dictionary encoding)
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 0 distinct string row (value IS NULL): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7245 / 7322          2.2         460.7       1.0X
+Parquet Vectorized (Pushdown)                  378 /  389         41.6          24.0      19.2X
+Native ORC Vectorized                         6720 / 6778          2.3         427.2       1.1X
+Native ORC Vectorized (Pushdown)              1009 / 1032         15.6          64.2       7.2X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 0 distinct string row ('100' < value < '100'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7627 / 7795          2.1         484.9       1.0X
+Parquet Vectorized (Pushdown)                  384 /  406         41.0          24.4      19.9X
+Native ORC Vectorized                         6724 / 7824          2.3         427.5       1.1X
+Native ORC Vectorized (Pushdown)               968 /  986         16.3          61.5       7.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 distinct string row (value = '100'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7157 / 7534          2.2         455.0       1.0X
+Parquet Vectorized (Pushdown)                  542 /  565         29.0          34.5      13.2X
+Native ORC Vectorized                         6716 / 7214          2.3         427.0       1.1X
+Native ORC Vectorized (Pushdown)              1212 / 1288         13.0          77.0       5.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 distinct string row (value <=> '100'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7368 / 7552          2.1         468.4       1.0X
+Parquet Vectorized (Pushdown)                  544 /  556         28.9          34.6      13.5X
+Native ORC Vectorized                         6740 / 6867          2.3         428.5       1.1X
+Native ORC Vectorized (Pushdown)              1230 / 1426         12.8          78.2       6.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 distinct string row ('100' <= value <= '100'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7427 / 7734          2.1         472.2       1.0X
+Parquet Vectorized (Pushdown)                  556 /  568         28.3          35.4      13.3X
+Native ORC Vectorized                         6847 / 7059          2.3         435.3       1.1X
+Native ORC Vectorized (Pushdown)              1226 / 1230         12.8          77.9       6.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select all distinct string rows (value IS NOT NULL): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                          16998 / 17311          0.9        1080.7       1.0X
+Parquet Vectorized (Pushdown)               16977 / 17250          0.9        1079.4       1.0X
+Native ORC Vectorized                       18447 / 19852          0.9        1172.8       0.9X
+Native ORC Vectorized (Pushdown)            16614 / 17102          0.9        1056.3       1.0X
+
+
+================================================================================================
+Pushdown benchmark for StringStartsWith
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+StringStartsWith filter: (value like '10%'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                           9705 / 10814          1.6         617.0       1.0X
+Parquet Vectorized (Pushdown)                 3086 / 3574          5.1         196.2       3.1X
+Native ORC Vectorized                       10094 / 10695          1.6         641.8       1.0X
+Native ORC Vectorized (Pushdown)              9611 / 9999          1.6         611.0       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+StringStartsWith filter: (value like '1000%'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            8016 / 8183          2.0         509.7       1.0X
+Parquet Vectorized (Pushdown)                  444 /  457         35.4          28.2      18.0X
+Native ORC Vectorized                         6970 / 7169          2.3         443.2       1.2X
+Native ORC Vectorized (Pushdown)              7447 / 7503          2.1         473.5       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+StringStartsWith filter: (value like '786432%'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7908 / 8046          2.0         502.8       1.0X
+Parquet Vectorized (Pushdown)                  408 /  429         38.6          25.9      19.4X
+Native ORC Vectorized                         7021 / 7100          2.2         446.4       1.1X
+Native ORC Vectorized (Pushdown)              7310 / 7490          2.2         464.8       1.1X
+
+
+================================================================================================
+Pushdown benchmark for decimal
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 decimal(9, 2) row (value = 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            3785 / 3867          4.2         240.6       1.0X
+Parquet Vectorized (Pushdown)                 3820 / 3928          4.1         242.9       1.0X
+Native ORC Vectorized                         3981 / 4049          4.0         253.1       1.0X
+Native ORC Vectorized (Pushdown)               702 /  735         22.4          44.6       5.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 10% decimal(9, 2) rows (value < 1572864): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            4694 / 4813          3.4         298.4       1.0X
+Parquet Vectorized (Pushdown)                 4839 / 4907          3.3         307.6       1.0X
+Native ORC Vectorized                         4943 / 5032          3.2         314.2       0.9X
+Native ORC Vectorized (Pushdown)              2043 / 2085          7.7         129.9       2.3X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 50% decimal(9, 2) rows (value < 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            8321 / 8472          1.9         529.0       1.0X
+Parquet Vectorized (Pushdown)                 8125 / 8471          1.9         516.6       1.0X
+Native ORC Vectorized                         8524 / 8616          1.8         541.9       1.0X
+Native ORC Vectorized (Pushdown)              7961 / 8383          2.0         506.1       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 90% decimal(9, 2) rows (value < 14155776): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                           9587 / 10112          1.6         609.5       1.0X
+Parquet Vectorized (Pushdown)                9726 / 10370          1.6         618.3       1.0X
+Native ORC Vectorized                       10119 / 11147          1.6         643.4       0.9X
+Native ORC Vectorized (Pushdown)              9366 / 9497          1.7         595.5       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 decimal(18, 2) row (value = 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            4060 / 4093          3.9         258.1       1.0X
+Parquet Vectorized (Pushdown)                 4037 / 4125          3.9         256.6       1.0X
+Native ORC Vectorized                         4756 / 4811          3.3         302.4       0.9X
+Native ORC Vectorized (Pushdown)               824 /  889         19.1          52.4       4.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 10% decimal(18, 2) rows (value < 1572864): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            5157 / 5271          3.0         327.9       1.0X
+Parquet Vectorized (Pushdown)                 5051 / 5141          3.1         321.1       1.0X
+Native ORC Vectorized                         5723 / 6146          2.7         363.9       0.9X
+Native ORC Vectorized (Pushdown)              2198 / 2317          7.2         139.8       2.3X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 50% decimal(18, 2) rows (value < 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            8608 / 8647          1.8         547.3       1.0X
+Parquet Vectorized (Pushdown)                 8471 / 8584          1.9         538.6       1.0X
+Native ORC Vectorized                        9249 / 10048          1.7         588.0       0.9X
+Native ORC Vectorized (Pushdown)              7645 / 8091          2.1         486.1       1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 90% decimal(18, 2) rows (value < 14155776): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                          11658 / 11888          1.3         741.2       1.0X
+Parquet Vectorized (Pushdown)               11812 / 12098          1.3         751.0       1.0X
+Native ORC Vectorized                       12943 / 13312          1.2         822.9       0.9X
+Native ORC Vectorized (Pushdown)            13139 / 13465          1.2         835.4       0.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 decimal(38, 2) row (value = 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            5491 / 5716          2.9         349.1       1.0X
+Parquet Vectorized (Pushdown)                 5515 / 5615          2.9         350.6       1.0X
+Native ORC Vectorized                         4582 / 4654          3.4         291.3       1.2X
+Native ORC Vectorized (Pushdown)               815 /  861         19.3          51.8       6.7X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 10% decimal(38, 2) rows (value < 1572864): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            6432 / 6527          2.4         409.0       1.0X
+Parquet Vectorized (Pushdown)                 6513 / 6607          2.4         414.1       1.0X
+Native ORC Vectorized                         5618 / 6085          2.8         357.2       1.1X
+Native ORC Vectorized (Pushdown)              2403 / 2443          6.5         152.8       2.7X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 50% decimal(38, 2) rows (value < 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                          11041 / 11467          1.4         701.9       1.0X
+Parquet Vectorized (Pushdown)               10909 / 11484          1.4         693.5       1.0X
+Native ORC Vectorized                        9860 / 10436          1.6         626.9       1.1X
+Native ORC Vectorized (Pushdown)              7908 / 8069          2.0         502.8       1.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 90% decimal(38, 2) rows (value < 14155776): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                          14816 / 16877          1.1         942.0       1.0X
+Parquet Vectorized (Pushdown)               15383 / 15740          1.0         978.0       1.0X
+Native ORC Vectorized                       14408 / 14771          1.1         916.0       1.0X
+Native ORC Vectorized (Pushdown)            13968 / 14805          1.1         888.1       1.1X
+
+
+================================================================================================
+Pushdown benchmark for InSet -> InFilters
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 5, distribution: 10): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7477 / 7587          2.1         475.4       1.0X
+Parquet Vectorized (Pushdown)                 7862 / 8346          2.0         499.9       1.0X
+Native ORC Vectorized                         6447 / 7021          2.4         409.9       1.2X
+Native ORC Vectorized (Pushdown)               983 / 1003         16.0          62.5       7.6X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 5, distribution: 50): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7107 / 7290          2.2         451.9       1.0X
+Parquet Vectorized (Pushdown)                 7196 / 7258          2.2         457.5       1.0X
+Native ORC Vectorized                         6102 / 6222          2.6         388.0       1.2X
+Native ORC Vectorized (Pushdown)               926 /  958         17.0          58.9       7.7X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 5, distribution: 90): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7374 / 7692          2.1         468.8       1.0X
+Parquet Vectorized (Pushdown)                 7771 / 7848          2.0         494.1       0.9X
+Native ORC Vectorized                         6184 / 6356          2.5         393.2       1.2X
+Native ORC Vectorized (Pushdown)               920 /  963         17.1          58.5       8.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 10, distribution: 10): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7073 / 7326          2.2         449.7       1.0X
+Parquet Vectorized (Pushdown)                 7304 / 7647          2.2         464.4       1.0X
+Native ORC Vectorized                         6222 / 6579          2.5         395.6       1.1X
+Native ORC Vectorized (Pushdown)               958 /  994         16.4          60.9       7.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 10, distribution: 50): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7121 / 7501          2.2         452.7       1.0X
+Parquet Vectorized (Pushdown)                 7751 / 8334          2.0         492.8       0.9X
+Native ORC Vectorized                         6225 / 6680          2.5         395.8       1.1X
+Native ORC Vectorized (Pushdown)               998 / 1020         15.8          63.5       7.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 10, distribution: 90): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7157 / 7399          2.2         455.1       1.0X
+Parquet Vectorized (Pushdown)                 7806 / 7911          2.0         496.3       0.9X
+Native ORC Vectorized                         6548 / 6720          2.4         416.3       1.1X
+Native ORC Vectorized (Pushdown)              1016 / 1050         15.5          64.6       7.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 50, distribution: 10): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7662 / 7805          2.1         487.1       1.0X
+Parquet Vectorized (Pushdown)                 7590 / 7861          2.1         482.5       1.0X
+Native ORC Vectorized                         6840 / 8073          2.3         434.9       1.1X
+Native ORC Vectorized (Pushdown)              1041 / 1075         15.1          66.2       7.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 50, distribution: 50): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            8230 / 9266          1.9         523.2       1.0X
+Parquet Vectorized (Pushdown)                 7735 / 7960          2.0         491.8       1.1X
+Native ORC Vectorized                         6945 / 7109          2.3         441.6       1.2X
+Native ORC Vectorized (Pushdown)              1123 / 1144         14.0          71.4       7.3X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 50, distribution: 90): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7656 / 8058          2.1         486.7       1.0X
+Parquet Vectorized (Pushdown)                 7860 / 8247          2.0         499.7       1.0X
+Native ORC Vectorized                         6684 / 7003          2.4         424.9       1.1X
+Native ORC Vectorized (Pushdown)              1085 / 1172         14.5          69.0       7.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 100, distribution: 10): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7594 / 8128          2.1         482.8       1.0X
+Parquet Vectorized (Pushdown)                 7845 / 7923          2.0         498.8       1.0X
+Native ORC Vectorized                         5859 / 6421          2.7         372.5       1.3X
+Native ORC Vectorized (Pushdown)              1037 / 1054         15.2          66.0       7.3X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 100, distribution: 50): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            6762 / 6775          2.3         429.9       1.0X
+Parquet Vectorized (Pushdown)                 6911 / 6970          2.3         439.4       1.0X
+Native ORC Vectorized                         5884 / 5960          2.7         374.1       1.1X
+Native ORC Vectorized (Pushdown)              1028 / 1052         15.3          65.4       6.6X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 100, distribution: 90): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            6718 / 6767          2.3         427.1       1.0X
+Parquet Vectorized (Pushdown)                 6812 / 6909          2.3         433.1       1.0X
+Native ORC Vectorized                         5842 / 5883          2.7         371.4       1.1X
+Native ORC Vectorized (Pushdown)              1040 / 1058         15.1          66.1       6.5X
+
+
+================================================================================================
+Pushdown benchmark for tinyint
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 tinyint row (value = CAST(63 AS tinyint)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            3726 / 3775          4.2         236.9       1.0X
+Parquet Vectorized (Pushdown)                 3741 / 3789          4.2         237.9       1.0X
+Native ORC Vectorized                         2793 / 2909          5.6         177.6       1.3X
+Native ORC Vectorized (Pushdown)               530 /  561         29.7          33.7       7.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 10% tinyint rows (value < CAST(12 AS tinyint)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            4385 / 4406          3.6         278.8       1.0X
+Parquet Vectorized (Pushdown)                 4398 / 4454          3.6         279.6       1.0X
+Native ORC Vectorized                         3420 / 3501          4.6         217.4       1.3X
+Native ORC Vectorized (Pushdown)              1395 / 1432         11.3          88.7       3.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 50% tinyint rows (value < CAST(63 AS tinyint)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                            7307 / 7394          2.2         464.6       1.0X
+Parquet Vectorized (Pushdown)                 7411 / 7461          2.1         471.2       1.0X
+Native ORC Vectorized                         6501 / 7814          2.4         413.4       1.1X
+Native ORC Vectorized (Pushdown)              7341 / 8637          2.1         466.7       1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 90% tinyint rows (value < CAST(114 AS tinyint)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized                          11886 / 13122          1.3         755.7       1.0X
+Parquet Vectorized (Pushdown)               12557 / 14173          1.3         798.4       0.9X
+Native ORC Vectorized                       10758 / 11971          1.5         684.0       1.1X
+Native ORC Vectorized (Pushdown)            10564 / 10713          1.5         671.6       1.1X
+
+

http://git-wip-us.apache.org/repos/asf/spark/blob/bf67f70c/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
index 6d7c7de..fc716de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
@@ -17,25 +17,30 @@
 
 package org.apache.spark.sql.execution.benchmark
 
-import java.io.File
+import java.io.{File, FileOutputStream, OutputStream}
 
 import scala.util.{Random, Try}
 
+import org.scalatest.{BeforeAndAfterEachTestData, Suite, TestData}
+
 import org.apache.spark.SparkConf
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.functions.monotonically_increasing_id
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType}
 import org.apache.spark.util.{Benchmark, Utils}
 
-
 /**
  * Benchmark to measure read performance with Filter pushdown.
  * To run this:
- *  spark-submit --class <this class> <spark sql test jar>
+ *  build/sbt "sql/test-only *FilterPushdownBenchmark"
+ *
+ * Results will be written to "benchmarks/FilterPushdownBenchmark-results.txt".
  */
-object FilterPushdownBenchmark {
-  val conf = new SparkConf()
-    .setAppName("FilterPushdownBenchmark")
+class FilterPushdownBenchmark extends SparkFunSuite with BenchmarkBeforeAndAfterEachTest {
+  private val conf = new SparkConf()
+    .setAppName(this.getClass.getSimpleName)
     // Since `spark.master` always exists, overrides this value
     .set("spark.master", "local[1]")
     .setIfMissing("spark.driver.memory", "3g")
@@ -44,8 +49,40 @@ object FilterPushdownBenchmark {
     .setIfMissing("orc.compression", "snappy")
     .setIfMissing("spark.sql.parquet.compression.codec", "snappy")
 
+  private val numRows = 1024 * 1024 * 15
+  private val width = 5
+  private val mid = numRows / 2
+  private val blockSize = 1048576
+
   private val spark = SparkSession.builder().config(conf).getOrCreate()
 
+  private var out: OutputStream = _
+
+  override def beforeAll() {
+    super.beforeAll()
+    out = new FileOutputStream(new File("benchmarks/FilterPushdownBenchmark-results.txt"))
+  }
+
+  override def beforeEach(td: TestData) {
+    super.beforeEach(td)
+    val separator = "=" * 96
+    val testHeader = (separator + '\n' + td.name + '\n' + separator + '\n' + '\n').getBytes
+    out.write(testHeader)
+  }
+
+  override def afterEach(td: TestData) {
+    out.write('\n')
+    super.afterEach(td)
+  }
+
+  override def afterAll() {
+    try {
+      out.close()
+    } finally {
+      super.afterAll()
+    }
+  }
+
   def withTempPath(f: File => Unit): Unit = {
     val path = Utils.createTempDir()
     path.delete()
@@ -81,8 +118,7 @@ object FilterPushdownBenchmark {
       .withColumn("value", valueCol)
       .sort("value")
 
-    saveAsOrcTable(df, dir.getCanonicalPath + "/orc")
-    saveAsParquetTable(df, dir.getCanonicalPath + "/parquet")
+    saveAsTable(df, dir)
   }
 
   private def prepareStringDictTable(
@@ -93,19 +129,22 @@ object FilterPushdownBenchmark {
     }
     val df = spark.range(numRows).selectExpr(selectExpr: _*).sort("value")
 
-    saveAsOrcTable(df, dir.getCanonicalPath + "/orc")
-    saveAsParquetTable(df, dir.getCanonicalPath + "/parquet")
+    saveAsTable(df, dir)
   }
 
-  private def saveAsOrcTable(df: DataFrame, dir: String): Unit = {
-    // To always turn on dictionary encoding, we set 1.0 at the threshold (the default is 0.8)
-    df.write.mode("overwrite").option("orc.dictionary.key.threshold", 1.0).orc(dir)
-    spark.read.orc(dir).createOrReplaceTempView("orcTable")
-  }
+  private def saveAsTable(df: DataFrame, dir: File): Unit = {
+    val orcPath = dir.getCanonicalPath + "/orc"
+    val parquetPath = dir.getCanonicalPath + "/parquet"
 
-  private def saveAsParquetTable(df: DataFrame, dir: String): Unit = {
-    df.write.mode("overwrite").parquet(dir)
-    spark.read.parquet(dir).createOrReplaceTempView("parquetTable")
+    // To always turn on dictionary encoding, we set 1.0 at the threshold (the default is 0.8)
+    df.write.mode("overwrite")
+      .option("orc.dictionary.key.threshold", 1.0)
+      .option("orc.stripe.size", blockSize).orc(orcPath)
+    spark.read.orc(orcPath).createOrReplaceTempView("orcTable")
+
+    df.write.mode("overwrite")
+      .option("parquet.block.size", blockSize).parquet(parquetPath)
+    spark.read.parquet(parquetPath).createOrReplaceTempView("parquetTable")
   }
 
   def filterPushDownBenchmark(
@@ -113,7 +152,7 @@ object FilterPushdownBenchmark {
       title: String,
       whereExpr: String,
       selectExpr: String = "*"): Unit = {
-    val benchmark = new Benchmark(title, values, minNumIters = 5)
+    val benchmark = new Benchmark(title, values, minNumIters = 5, output = Some(out))
 
     Seq(false, true).foreach { pushDownEnabled =>
       val name = s"Parquet Vectorized ${if (pushDownEnabled) s"(Pushdown)" else ""}"
@@ -133,214 +172,6 @@ object FilterPushdownBenchmark {
       }
     }
 
-    /*
-    OpenJDK 64-Bit Server VM 1.8.0_171-b10 on Linux 4.14.33-51.37.amzn1.x86_64
-    Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-    Select 0 string row (value IS NULL):     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            9201 / 9300          1.7         585.0       1.0X
-    Parquet Vectorized (Pushdown)                   89 /  105        176.3           5.7     103.1X
-    Native ORC Vectorized                         8886 / 8898          1.8         564.9       1.0X
-    Native ORC Vectorized (Pushdown)               110 /  128        143.4           7.0      83.9X
-
-
-    Select 0 string row
-    ('7864320' < value < '7864320'):         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            9336 / 9357          1.7         593.6       1.0X
-    Parquet Vectorized (Pushdown)                  927 /  937         17.0          58.9      10.1X
-    Native ORC Vectorized                         9026 / 9041          1.7         573.9       1.0X
-    Native ORC Vectorized (Pushdown)               257 /  272         61.1          16.4      36.3X
-
-
-    Select 1 string row (value = '7864320'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            9209 / 9223          1.7         585.5       1.0X
-    Parquet Vectorized (Pushdown)                  908 /  925         17.3          57.7      10.1X
-    Native ORC Vectorized                         8878 / 8904          1.8         564.4       1.0X
-    Native ORC Vectorized (Pushdown)               248 /  261         63.4          15.8      37.1X
-
-
-    Select 1 string row
-    (value <=> '7864320'):                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            9194 / 9216          1.7         584.5       1.0X
-    Parquet Vectorized (Pushdown)                  899 /  908         17.5          57.2      10.2X
-    Native ORC Vectorized                         8934 / 8962          1.8         568.0       1.0X
-    Native ORC Vectorized (Pushdown)               249 /  254         63.3          15.8      37.0X
-
-
-    Select 1 string row
-    ('7864320' <= value <= '7864320'):       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            9332 / 9351          1.7         593.3       1.0X
-    Parquet Vectorized (Pushdown)                  915 /  934         17.2          58.2      10.2X
-    Native ORC Vectorized                         9049 / 9057          1.7         575.3       1.0X
-    Native ORC Vectorized (Pushdown)               248 /  258         63.5          15.8      37.7X
-
-
-    Select all string rows
-    (value IS NOT NULL):                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                          20478 / 20497          0.8        1301.9       1.0X
-    Parquet Vectorized (Pushdown)               20461 / 20550          0.8        1300.9       1.0X
-    Native ORC Vectorized                       27464 / 27482          0.6        1746.1       0.7X
-    Native ORC Vectorized (Pushdown)            27454 / 27488          0.6        1745.5       0.7X
-
-
-    Select 0 int row (value IS NULL):        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            8489 / 8519          1.9         539.7       1.0X
-    Parquet Vectorized (Pushdown)                   64 /   69        246.1           4.1     132.8X
-    Native ORC Vectorized                         8064 / 8099          2.0         512.7       1.1X
-    Native ORC Vectorized (Pushdown)                88 /   94        178.6           5.6      96.4X
-
-
-    Select 0 int row
-    (7864320 < value < 7864320):             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            8494 / 8514          1.9         540.0       1.0X
-    Parquet Vectorized (Pushdown)                  835 /  840         18.8          53.1      10.2X
-    Native ORC Vectorized                         8090 / 8106          1.9         514.4       1.0X
-    Native ORC Vectorized (Pushdown)               249 /  257         63.2          15.8      34.1X
-
-
-    Select 1 int row (value = 7864320):      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            8552 / 8560          1.8         543.7       1.0X
-    Parquet Vectorized (Pushdown)                  837 /  841         18.8          53.2      10.2X
-    Native ORC Vectorized                         8178 / 8188          1.9         519.9       1.0X
-    Native ORC Vectorized (Pushdown)               249 /  258         63.2          15.8      34.4X
-
-
-    Select 1 int row (value <=> 7864320):    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            8562 / 8580          1.8         544.3       1.0X
-    Parquet Vectorized (Pushdown)                  833 /  836         18.9          53.0      10.3X
-    Native ORC Vectorized                         8164 / 8185          1.9         519.0       1.0X
-    Native ORC Vectorized (Pushdown)               245 /  254         64.3          15.6      35.0X
-
-
-    Select 1 int row
-    (7864320 <= value <= 7864320):           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            8540 / 8555          1.8         542.9       1.0X
-    Parquet Vectorized (Pushdown)                  837 /  839         18.8          53.2      10.2X
-    Native ORC Vectorized                         8182 / 8231          1.9         520.2       1.0X
-    Native ORC Vectorized (Pushdown)               250 /  259         62.9          15.9      34.1X
-
-
-    Select 1 int row
-    (7864319 < value < 7864321):             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            8535 / 8555          1.8         542.6       1.0X
-    Parquet Vectorized (Pushdown)                  835 /  841         18.8          53.1      10.2X
-    Native ORC Vectorized                         8159 / 8179          1.9         518.8       1.0X
-    Native ORC Vectorized (Pushdown)               244 /  250         64.5          15.5      35.0X
-
-
-    Select 10% int rows (value < 1572864):   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            9609 / 9634          1.6         610.9       1.0X
-    Parquet Vectorized (Pushdown)                 2663 / 2672          5.9         169.3       3.6X
-    Native ORC Vectorized                         9824 / 9850          1.6         624.6       1.0X
-    Native ORC Vectorized (Pushdown)              2717 / 2722          5.8         172.7       3.5X
-
-
-    Select 50% int rows (value < 7864320):   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                          13592 / 13613          1.2         864.2       1.0X
-    Parquet Vectorized (Pushdown)                 9720 / 9738          1.6         618.0       1.4X
-    Native ORC Vectorized                       16366 / 16397          1.0        1040.5       0.8X
-    Native ORC Vectorized (Pushdown)            12437 / 12459          1.3         790.7       1.1X
-
-
-    Select 90% int rows (value < 14155776):  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                          17580 / 17617          0.9        1117.7       1.0X
-    Parquet Vectorized (Pushdown)               16803 / 16827          0.9        1068.3       1.0X
-    Native ORC Vectorized                       24169 / 24187          0.7        1536.6       0.7X
-    Native ORC Vectorized (Pushdown)            22147 / 22341          0.7        1408.1       0.8X
-
-
-    Select all int rows (value IS NOT NULL): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                          18461 / 18491          0.9        1173.7       1.0X
-    Parquet Vectorized (Pushdown)               18466 / 18530          0.9        1174.1       1.0X
-    Native ORC Vectorized                       24231 / 24270          0.6        1540.6       0.8X
-    Native ORC Vectorized (Pushdown)            24207 / 24304          0.6        1539.0       0.8X
-
-
-    Select all int rows (value > -1):        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                          18414 / 18453          0.9        1170.7       1.0X
-    Parquet Vectorized (Pushdown)               18435 / 18464          0.9        1172.1       1.0X
-    Native ORC Vectorized                       24430 / 24454          0.6        1553.2       0.8X
-    Native ORC Vectorized (Pushdown)            24410 / 24465          0.6        1552.0       0.8X
-
-
-    Select all int rows (value != -1):       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                          18446 / 18457          0.9        1172.8       1.0X
-    Parquet Vectorized (Pushdown)               18428 / 18440          0.9        1171.6       1.0X
-    Native ORC Vectorized                       24414 / 24450          0.6        1552.2       0.8X
-    Native ORC Vectorized (Pushdown)            24385 / 24472          0.6        1550.4       0.8X
-
-
-    Select 0 distinct string row
-    (value IS NULL):                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            8322 / 8352          1.9         529.1       1.0X
-    Parquet Vectorized (Pushdown)                   53 /   57        296.3           3.4     156.7X
-    Native ORC Vectorized                         7903 / 7953          2.0         502.4       1.1X
-    Native ORC Vectorized (Pushdown)                80 /   82        197.2           5.1     104.3X
-
-
-    Select 0 distinct string row
-    ('100' < value < '100'):                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            8712 / 8743          1.8         553.9       1.0X
-    Parquet Vectorized (Pushdown)                  995 / 1030         15.8          63.3       8.8X
-    Native ORC Vectorized                         8345 / 8362          1.9         530.6       1.0X
-    Native ORC Vectorized (Pushdown)                84 /   87        187.6           5.3     103.9X
-
-
-    Select 1 distinct string row
-    (value = '100'):                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            8574 / 8610          1.8         545.1       1.0X
-    Parquet Vectorized (Pushdown)                 1127 / 1135         14.0          71.6       7.6X
-    Native ORC Vectorized                         8163 / 8181          1.9         519.0       1.1X
-    Native ORC Vectorized (Pushdown)               426 /  433         36.9          27.1      20.1X
-
-
-    Select 1 distinct string row
-    (value <=> '100'):                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            8549 / 8568          1.8         543.5       1.0X
-    Parquet Vectorized (Pushdown)                 1124 / 1131         14.0          71.4       7.6X
-    Native ORC Vectorized                         8163 / 8210          1.9         519.0       1.0X
-    Native ORC Vectorized (Pushdown)               426 /  436         36.9          27.1      20.1X
-
-
-    Select 1 distinct string row
-    ('100' <= value <= '100'):               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                            8889 / 8896          1.8         565.2       1.0X
-    Parquet Vectorized (Pushdown)                 1161 / 1168         13.6          73.8       7.7X
-    Native ORC Vectorized                         8519 / 8554          1.8         541.6       1.0X
-    Native ORC Vectorized (Pushdown)               430 /  437         36.6          27.3      20.7X
-
-
-    Select all distinct string rows
-    (value IS NOT NULL):                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-    ------------------------------------------------------------------------------------------------
-    Parquet Vectorized                          20433 / 20533          0.8        1299.1       1.0X
-    Parquet Vectorized (Pushdown)               20433 / 20456          0.8        1299.1       1.0X
-    Native ORC Vectorized                       25435 / 25513          0.6        1617.1       0.8X
-    Native ORC Vectorized (Pushdown)            25435 / 25507          0.6        1617.1       0.8X
-    */
-
     benchmark.run()
   }
 
@@ -408,14 +239,8 @@ object FilterPushdownBenchmark {
     }
   }
 
-  def main(args: Array[String]): Unit = {
-    val numRows = 1024 * 1024 * 15
-    val width = 5
-
-    // Pushdown for many distinct value case
+  ignore("Pushdown for many distinct value case") {
     withTempPath { dir =>
-      val mid = numRows / 2
-
       withTempTable("orcTable", "patquetTable") {
         Seq(true, false).foreach { useStringForValue =>
           prepareTable(dir, numRows, width, useStringForValue)
@@ -427,16 +252,122 @@ object FilterPushdownBenchmark {
         }
       }
     }
+  }
 
-    // Pushdown for few distinct value case (use dictionary encoding)
+  ignore("Pushdown for few distinct value case (use dictionary encoding)") {
     withTempPath { dir =>
       val numDistinctValues = 200
-      val mid = numDistinctValues / 2
 
       withTempTable("orcTable", "patquetTable") {
         prepareStringDictTable(dir, numRows, numDistinctValues, width)
-        runStringBenchmark(numRows, width, mid, "distinct string")
+        runStringBenchmark(numRows, width, numDistinctValues / 2, "distinct string")
       }
     }
   }
+
+  ignore("Pushdown benchmark for StringStartsWith") {
+    withTempPath { dir =>
+      withTempTable("orcTable", "patquetTable") {
+        prepareTable(dir, numRows, width, true)
+        Seq(
+          "value like '10%'",
+          "value like '1000%'",
+          s"value like '${mid.toString.substring(0, mid.toString.length - 1)}%'"
+        ).foreach { whereExpr =>
+          val title = s"StringStartsWith filter: ($whereExpr)"
+          filterPushDownBenchmark(numRows, title, whereExpr)
+        }
+      }
+    }
+  }
+
+  ignore(s"Pushdown benchmark for ${DecimalType.simpleString}") {
+    withTempPath { dir =>
+      Seq(
+        s"decimal(${Decimal.MAX_INT_DIGITS}, 2)",
+        s"decimal(${Decimal.MAX_LONG_DIGITS}, 2)",
+        s"decimal(${DecimalType.MAX_PRECISION}, 2)"
+      ).foreach { dt =>
+        val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
+        val df = spark.range(numRows).selectExpr(columns: _*)
+          .withColumn("value", monotonically_increasing_id().cast(dt))
+        withTempTable("orcTable", "patquetTable") {
+          saveAsTable(df, dir)
+
+          Seq(s"value = $mid").foreach { whereExpr =>
+            val title = s"Select 1 $dt row ($whereExpr)".replace("value AND value", "value")
+            filterPushDownBenchmark(numRows, title, whereExpr)
+          }
+
+          val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ", MAX(value)")
+          Seq(10, 50, 90).foreach { percent =>
+            filterPushDownBenchmark(
+              numRows,
+              s"Select $percent% $dt rows (value < ${numRows * percent / 100})",
+              s"value < ${numRows * percent / 100}",
+              selectExpr
+            )
+          }
+        }
+      }
+    }
+  }
+
+  ignore("Pushdown benchmark for InSet -> InFilters") {
+    withTempPath { dir =>
+      withTempTable("orcTable", "patquetTable") {
+        prepareTable(dir, numRows, width, false)
+        Seq(5, 10, 50, 100).foreach { count =>
+          Seq(10, 50, 90).foreach { distribution =>
+            val filter =
+              Range(0, count).map(r => scala.util.Random.nextInt(numRows * distribution / 100))
+            val whereExpr = s"value in(${filter.mkString(",")})"
+            val title = s"InSet -> InFilters (values count: $count, distribution: $distribution)"
+            filterPushDownBenchmark(numRows, title, whereExpr)
+          }
+        }
+      }
+    }
+  }
+
+  ignore(s"Pushdown benchmark for ${ByteType.simpleString}") {
+    withTempPath { dir =>
+      val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
+      val df = spark.range(numRows).selectExpr(columns: _*)
+        .withColumn("value", (monotonically_increasing_id() % Byte.MaxValue).cast(ByteType))
+        .orderBy("value")
+      withTempTable("orcTable", "patquetTable") {
+        saveAsTable(df, dir)
+
+        Seq(s"value = CAST(${Byte.MaxValue / 2} AS ${ByteType.simpleString})")
+          .foreach { whereExpr =>
+            val title = s"Select 1 ${ByteType.simpleString} row ($whereExpr)"
+              .replace("value AND value", "value")
+            filterPushDownBenchmark(numRows, title, whereExpr)
+          }
+
+        val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ", MAX(value)")
+        Seq(10, 50, 90).foreach { percent =>
+          filterPushDownBenchmark(
+            numRows,
+            s"Select $percent% ${ByteType.simpleString} rows " +
+              s"(value < CAST(${Byte.MaxValue * percent / 100} AS ${ByteType.simpleString}))",
+            s"value < CAST(${Byte.MaxValue * percent / 100} AS ${ByteType.simpleString})",
+            selectExpr
+          )
+        }
+      }
+    }
+  }
+}
+
+trait BenchmarkBeforeAndAfterEachTest extends BeforeAndAfterEachTestData { this: Suite =>
+
+  override def beforeEach(td: TestData) {
+    super.beforeEach(td)
+  }
+
+  override def afterEach(td: TestData) {
+    super.afterEach(td)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org