You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/07/06 03:14:05 UTC
spark git commit: [SPARK-24692][TESTS] Improvement
FilterPushdownBenchmark
Repository: spark
Updated Branches:
refs/heads/master 01fcba2c6 -> bf67f70c4
[SPARK-24692][TESTS] Improvement FilterPushdownBenchmark
## What changes were proposed in this pull request?
Refer to the [`WideSchemaBenchmark`](https://github.com/apache/spark/blob/v2.3.1/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala) update `FilterPushdownBenchmark`:
1. Write the result to `benchmarks/FilterPushdownBenchmark-results.txt` for easy maintenance.
2. Add more benchmark case: `StringStartsWith`, `Decimal`, `InSet -> InFilters` and `tinyint`.
## How was this patch tested?
manual tests
Author: Yuming Wang <yu...@ebay.com>
Closes #21677 from wangyum/SPARK-24692.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bf67f70c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bf67f70c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bf67f70c
Branch: refs/heads/master
Commit: bf67f70c48881ee99751f7d51fbcbda1e593d90a
Parents: 01fcba2
Author: Yuming Wang <yu...@ebay.com>
Authored: Fri Jul 6 11:13:57 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Fri Jul 6 11:13:57 2018 +0800
----------------------------------------------------------------------
.../FilterPushdownBenchmark-results.txt | 580 +++++++++++++++++++
.../benchmark/FilterPushdownBenchmark.scala | 405 ++++++-------
2 files changed, 748 insertions(+), 237 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/bf67f70c/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
----------------------------------------------------------------------
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
new file mode 100644
index 0000000..29fe434
--- /dev/null
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
@@ -0,0 +1,580 @@
+================================================================================================
+Pushdown for many distinct value case
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 0 string row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 8970 / 9122 1.8 570.3 1.0X
+Parquet Vectorized (Pushdown) 471 / 491 33.4 30.0 19.0X
+Native ORC Vectorized 7661 / 7853 2.1 487.0 1.2X
+Native ORC Vectorized (Pushdown) 1134 / 1161 13.9 72.1 7.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 0 string row ('7864320' < value < '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 9246 / 9297 1.7 587.8 1.0X
+Parquet Vectorized (Pushdown) 480 / 488 32.8 30.5 19.3X
+Native ORC Vectorized 7838 / 7850 2.0 498.3 1.2X
+Native ORC Vectorized (Pushdown) 1054 / 1118 14.9 67.0 8.8X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 string row (value = '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 8989 / 9100 1.7 571.5 1.0X
+Parquet Vectorized (Pushdown) 448 / 467 35.1 28.5 20.1X
+Native ORC Vectorized 7680 / 7768 2.0 488.3 1.2X
+Native ORC Vectorized (Pushdown) 1067 / 1118 14.7 67.8 8.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 string row (value <=> '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 9115 / 9266 1.7 579.5 1.0X
+Parquet Vectorized (Pushdown) 466 / 492 33.7 29.7 19.5X
+Native ORC Vectorized 7800 / 7914 2.0 495.9 1.2X
+Native ORC Vectorized (Pushdown) 1075 / 1102 14.6 68.4 8.5X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 string row ('7864320' <= value <= '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 9099 / 9237 1.7 578.5 1.0X
+Parquet Vectorized (Pushdown) 462 / 475 34.1 29.3 19.7X
+Native ORC Vectorized 7847 / 7925 2.0 498.9 1.2X
+Native ORC Vectorized (Pushdown) 1078 / 1114 14.6 68.5 8.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select all string rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 19303 / 19547 0.8 1227.3 1.0X
+Parquet Vectorized (Pushdown) 19924 / 20089 0.8 1266.7 1.0X
+Native ORC Vectorized 18725 / 19079 0.8 1190.5 1.0X
+Native ORC Vectorized (Pushdown) 19310 / 19492 0.8 1227.7 1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 0 int row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 8117 / 8323 1.9 516.1 1.0X
+Parquet Vectorized (Pushdown) 484 / 494 32.5 30.8 16.8X
+Native ORC Vectorized 6811 / 7036 2.3 433.0 1.2X
+Native ORC Vectorized (Pushdown) 1061 / 1082 14.8 67.5 7.6X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 0 int row (7864320 < value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 8105 / 8140 1.9 515.3 1.0X
+Parquet Vectorized (Pushdown) 478 / 505 32.9 30.4 17.0X
+Native ORC Vectorized 6914 / 7211 2.3 439.6 1.2X
+Native ORC Vectorized (Pushdown) 1044 / 1064 15.1 66.4 7.8X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 int row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7983 / 8116 2.0 507.6 1.0X
+Parquet Vectorized (Pushdown) 464 / 487 33.9 29.5 17.2X
+Native ORC Vectorized 6703 / 6774 2.3 426.1 1.2X
+Native ORC Vectorized (Pushdown) 1017 / 1058 15.5 64.6 7.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 int row (value <=> 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7942 / 7983 2.0 504.9 1.0X
+Parquet Vectorized (Pushdown) 468 / 479 33.6 29.7 17.0X
+Native ORC Vectorized 6677 / 6779 2.4 424.5 1.2X
+Native ORC Vectorized (Pushdown) 1021 / 1068 15.4 64.9 7.8X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 int row (7864320 <= value <= 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7909 / 7958 2.0 502.8 1.0X
+Parquet Vectorized (Pushdown) 485 / 494 32.4 30.8 16.3X
+Native ORC Vectorized 6751 / 6846 2.3 429.2 1.2X
+Native ORC Vectorized (Pushdown) 1043 / 1077 15.1 66.3 7.6X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 int row (7864319 < value < 7864321): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 8010 / 8033 2.0 509.2 1.0X
+Parquet Vectorized (Pushdown) 472 / 489 33.3 30.0 17.0X
+Native ORC Vectorized 6655 / 6808 2.4 423.1 1.2X
+Native ORC Vectorized (Pushdown) 1015 / 1067 15.5 64.5 7.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 10% int rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 8983 / 9035 1.8 571.1 1.0X
+Parquet Vectorized (Pushdown) 2204 / 2231 7.1 140.1 4.1X
+Native ORC Vectorized 7864 / 8011 2.0 500.0 1.1X
+Native ORC Vectorized (Pushdown) 2674 / 2789 5.9 170.0 3.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 50% int rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 12723 / 12903 1.2 808.9 1.0X
+Parquet Vectorized (Pushdown) 9112 / 9282 1.7 579.3 1.4X
+Native ORC Vectorized 12090 / 12230 1.3 768.7 1.1X
+Native ORC Vectorized (Pushdown) 9242 / 9372 1.7 587.6 1.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 90% int rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 16453 / 16678 1.0 1046.1 1.0X
+Parquet Vectorized (Pushdown) 15997 / 16262 1.0 1017.0 1.0X
+Native ORC Vectorized 16652 / 17070 0.9 1058.7 1.0X
+Native ORC Vectorized (Pushdown) 15843 / 16112 1.0 1007.2 1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select all int rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 17098 / 17254 0.9 1087.1 1.0X
+Parquet Vectorized (Pushdown) 17302 / 17529 0.9 1100.1 1.0X
+Native ORC Vectorized 16790 / 17098 0.9 1067.5 1.0X
+Native ORC Vectorized (Pushdown) 17329 / 17914 0.9 1101.7 1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select all int rows (value > -1): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 17088 / 17392 0.9 1086.4 1.0X
+Parquet Vectorized (Pushdown) 17609 / 17863 0.9 1119.5 1.0X
+Native ORC Vectorized 18334 / 69831 0.9 1165.7 0.9X
+Native ORC Vectorized (Pushdown) 17465 / 17629 0.9 1110.4 1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select all int rows (value != -1): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 16903 / 17233 0.9 1074.6 1.0X
+Parquet Vectorized (Pushdown) 16945 / 17032 0.9 1077.3 1.0X
+Native ORC Vectorized 16377 / 16762 1.0 1041.2 1.0X
+Native ORC Vectorized (Pushdown) 16950 / 17212 0.9 1077.7 1.0X
+
+
+================================================================================================
+Pushdown for few distinct value case (use dictionary encoding)
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 0 distinct string row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7245 / 7322 2.2 460.7 1.0X
+Parquet Vectorized (Pushdown) 378 / 389 41.6 24.0 19.2X
+Native ORC Vectorized 6720 / 6778 2.3 427.2 1.1X
+Native ORC Vectorized (Pushdown) 1009 / 1032 15.6 64.2 7.2X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 0 distinct string row ('100' < value < '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7627 / 7795 2.1 484.9 1.0X
+Parquet Vectorized (Pushdown) 384 / 406 41.0 24.4 19.9X
+Native ORC Vectorized 6724 / 7824 2.3 427.5 1.1X
+Native ORC Vectorized (Pushdown) 968 / 986 16.3 61.5 7.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 distinct string row (value = '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7157 / 7534 2.2 455.0 1.0X
+Parquet Vectorized (Pushdown) 542 / 565 29.0 34.5 13.2X
+Native ORC Vectorized 6716 / 7214 2.3 427.0 1.1X
+Native ORC Vectorized (Pushdown) 1212 / 1288 13.0 77.0 5.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 distinct string row (value <=> '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7368 / 7552 2.1 468.4 1.0X
+Parquet Vectorized (Pushdown) 544 / 556 28.9 34.6 13.5X
+Native ORC Vectorized 6740 / 6867 2.3 428.5 1.1X
+Native ORC Vectorized (Pushdown) 1230 / 1426 12.8 78.2 6.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 distinct string row ('100' <= value <= '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7427 / 7734 2.1 472.2 1.0X
+Parquet Vectorized (Pushdown) 556 / 568 28.3 35.4 13.3X
+Native ORC Vectorized 6847 / 7059 2.3 435.3 1.1X
+Native ORC Vectorized (Pushdown) 1226 / 1230 12.8 77.9 6.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select all distinct string rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 16998 / 17311 0.9 1080.7 1.0X
+Parquet Vectorized (Pushdown) 16977 / 17250 0.9 1079.4 1.0X
+Native ORC Vectorized 18447 / 19852 0.9 1172.8 0.9X
+Native ORC Vectorized (Pushdown) 16614 / 17102 0.9 1056.3 1.0X
+
+
+================================================================================================
+Pushdown benchmark for StringStartsWith
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+StringStartsWith filter: (value like '10%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 9705 / 10814 1.6 617.0 1.0X
+Parquet Vectorized (Pushdown) 3086 / 3574 5.1 196.2 3.1X
+Native ORC Vectorized 10094 / 10695 1.6 641.8 1.0X
+Native ORC Vectorized (Pushdown) 9611 / 9999 1.6 611.0 1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+StringStartsWith filter: (value like '1000%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 8016 / 8183 2.0 509.7 1.0X
+Parquet Vectorized (Pushdown) 444 / 457 35.4 28.2 18.0X
+Native ORC Vectorized 6970 / 7169 2.3 443.2 1.2X
+Native ORC Vectorized (Pushdown) 7447 / 7503 2.1 473.5 1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+StringStartsWith filter: (value like '786432%'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7908 / 8046 2.0 502.8 1.0X
+Parquet Vectorized (Pushdown) 408 / 429 38.6 25.9 19.4X
+Native ORC Vectorized 7021 / 7100 2.2 446.4 1.1X
+Native ORC Vectorized (Pushdown) 7310 / 7490 2.2 464.8 1.1X
+
+
+================================================================================================
+Pushdown benchmark for decimal
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 decimal(9, 2) row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 3785 / 3867 4.2 240.6 1.0X
+Parquet Vectorized (Pushdown) 3820 / 3928 4.1 242.9 1.0X
+Native ORC Vectorized 3981 / 4049 4.0 253.1 1.0X
+Native ORC Vectorized (Pushdown) 702 / 735 22.4 44.6 5.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 10% decimal(9, 2) rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 4694 / 4813 3.4 298.4 1.0X
+Parquet Vectorized (Pushdown) 4839 / 4907 3.3 307.6 1.0X
+Native ORC Vectorized 4943 / 5032 3.2 314.2 0.9X
+Native ORC Vectorized (Pushdown) 2043 / 2085 7.7 129.9 2.3X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 50% decimal(9, 2) rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 8321 / 8472 1.9 529.0 1.0X
+Parquet Vectorized (Pushdown) 8125 / 8471 1.9 516.6 1.0X
+Native ORC Vectorized 8524 / 8616 1.8 541.9 1.0X
+Native ORC Vectorized (Pushdown) 7961 / 8383 2.0 506.1 1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 90% decimal(9, 2) rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 9587 / 10112 1.6 609.5 1.0X
+Parquet Vectorized (Pushdown) 9726 / 10370 1.6 618.3 1.0X
+Native ORC Vectorized 10119 / 11147 1.6 643.4 0.9X
+Native ORC Vectorized (Pushdown) 9366 / 9497 1.7 595.5 1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 decimal(18, 2) row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 4060 / 4093 3.9 258.1 1.0X
+Parquet Vectorized (Pushdown) 4037 / 4125 3.9 256.6 1.0X
+Native ORC Vectorized 4756 / 4811 3.3 302.4 0.9X
+Native ORC Vectorized (Pushdown) 824 / 889 19.1 52.4 4.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 10% decimal(18, 2) rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 5157 / 5271 3.0 327.9 1.0X
+Parquet Vectorized (Pushdown) 5051 / 5141 3.1 321.1 1.0X
+Native ORC Vectorized 5723 / 6146 2.7 363.9 0.9X
+Native ORC Vectorized (Pushdown) 2198 / 2317 7.2 139.8 2.3X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 50% decimal(18, 2) rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 8608 / 8647 1.8 547.3 1.0X
+Parquet Vectorized (Pushdown) 8471 / 8584 1.9 538.6 1.0X
+Native ORC Vectorized 9249 / 10048 1.7 588.0 0.9X
+Native ORC Vectorized (Pushdown) 7645 / 8091 2.1 486.1 1.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 90% decimal(18, 2) rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 11658 / 11888 1.3 741.2 1.0X
+Parquet Vectorized (Pushdown) 11812 / 12098 1.3 751.0 1.0X
+Native ORC Vectorized 12943 / 13312 1.2 822.9 0.9X
+Native ORC Vectorized (Pushdown) 13139 / 13465 1.2 835.4 0.9X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 decimal(38, 2) row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 5491 / 5716 2.9 349.1 1.0X
+Parquet Vectorized (Pushdown) 5515 / 5615 2.9 350.6 1.0X
+Native ORC Vectorized 4582 / 4654 3.4 291.3 1.2X
+Native ORC Vectorized (Pushdown) 815 / 861 19.3 51.8 6.7X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 10% decimal(38, 2) rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 6432 / 6527 2.4 409.0 1.0X
+Parquet Vectorized (Pushdown) 6513 / 6607 2.4 414.1 1.0X
+Native ORC Vectorized 5618 / 6085 2.8 357.2 1.1X
+Native ORC Vectorized (Pushdown) 2403 / 2443 6.5 152.8 2.7X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 50% decimal(38, 2) rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 11041 / 11467 1.4 701.9 1.0X
+Parquet Vectorized (Pushdown) 10909 / 11484 1.4 693.5 1.0X
+Native ORC Vectorized 9860 / 10436 1.6 626.9 1.1X
+Native ORC Vectorized (Pushdown) 7908 / 8069 2.0 502.8 1.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 90% decimal(38, 2) rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 14816 / 16877 1.1 942.0 1.0X
+Parquet Vectorized (Pushdown) 15383 / 15740 1.0 978.0 1.0X
+Native ORC Vectorized 14408 / 14771 1.1 916.0 1.0X
+Native ORC Vectorized (Pushdown) 13968 / 14805 1.1 888.1 1.1X
+
+
+================================================================================================
+Pushdown benchmark for InSet -> InFilters
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 5, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7477 / 7587 2.1 475.4 1.0X
+Parquet Vectorized (Pushdown) 7862 / 8346 2.0 499.9 1.0X
+Native ORC Vectorized 6447 / 7021 2.4 409.9 1.2X
+Native ORC Vectorized (Pushdown) 983 / 1003 16.0 62.5 7.6X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 5, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7107 / 7290 2.2 451.9 1.0X
+Parquet Vectorized (Pushdown) 7196 / 7258 2.2 457.5 1.0X
+Native ORC Vectorized 6102 / 6222 2.6 388.0 1.2X
+Native ORC Vectorized (Pushdown) 926 / 958 17.0 58.9 7.7X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 5, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7374 / 7692 2.1 468.8 1.0X
+Parquet Vectorized (Pushdown) 7771 / 7848 2.0 494.1 0.9X
+Native ORC Vectorized 6184 / 6356 2.5 393.2 1.2X
+Native ORC Vectorized (Pushdown) 920 / 963 17.1 58.5 8.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 10, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7073 / 7326 2.2 449.7 1.0X
+Parquet Vectorized (Pushdown) 7304 / 7647 2.2 464.4 1.0X
+Native ORC Vectorized 6222 / 6579 2.5 395.6 1.1X
+Native ORC Vectorized (Pushdown) 958 / 994 16.4 60.9 7.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 10, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7121 / 7501 2.2 452.7 1.0X
+Parquet Vectorized (Pushdown) 7751 / 8334 2.0 492.8 0.9X
+Native ORC Vectorized 6225 / 6680 2.5 395.8 1.1X
+Native ORC Vectorized (Pushdown) 998 / 1020 15.8 63.5 7.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 10, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7157 / 7399 2.2 455.1 1.0X
+Parquet Vectorized (Pushdown) 7806 / 7911 2.0 496.3 0.9X
+Native ORC Vectorized 6548 / 6720 2.4 416.3 1.1X
+Native ORC Vectorized (Pushdown) 1016 / 1050 15.5 64.6 7.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 50, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7662 / 7805 2.1 487.1 1.0X
+Parquet Vectorized (Pushdown) 7590 / 7861 2.1 482.5 1.0X
+Native ORC Vectorized 6840 / 8073 2.3 434.9 1.1X
+Native ORC Vectorized (Pushdown) 1041 / 1075 15.1 66.2 7.4X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 50, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 8230 / 9266 1.9 523.2 1.0X
+Parquet Vectorized (Pushdown) 7735 / 7960 2.0 491.8 1.1X
+Native ORC Vectorized 6945 / 7109 2.3 441.6 1.2X
+Native ORC Vectorized (Pushdown) 1123 / 1144 14.0 71.4 7.3X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 50, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7656 / 8058 2.1 486.7 1.0X
+Parquet Vectorized (Pushdown) 7860 / 8247 2.0 499.7 1.0X
+Native ORC Vectorized 6684 / 7003 2.4 424.9 1.1X
+Native ORC Vectorized (Pushdown) 1085 / 1172 14.5 69.0 7.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 100, distribution: 10): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7594 / 8128 2.1 482.8 1.0X
+Parquet Vectorized (Pushdown) 7845 / 7923 2.0 498.8 1.0X
+Native ORC Vectorized 5859 / 6421 2.7 372.5 1.3X
+Native ORC Vectorized (Pushdown) 1037 / 1054 15.2 66.0 7.3X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 100, distribution: 50): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 6762 / 6775 2.3 429.9 1.0X
+Parquet Vectorized (Pushdown) 6911 / 6970 2.3 439.4 1.0X
+Native ORC Vectorized 5884 / 5960 2.7 374.1 1.1X
+Native ORC Vectorized (Pushdown) 1028 / 1052 15.3 65.4 6.6X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+InSet -> InFilters (values count: 100, distribution: 90): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 6718 / 6767 2.3 427.1 1.0X
+Parquet Vectorized (Pushdown) 6812 / 6909 2.3 433.1 1.0X
+Native ORC Vectorized 5842 / 5883 2.7 371.4 1.1X
+Native ORC Vectorized (Pushdown) 1040 / 1058 15.1 66.1 6.5X
+
+
+================================================================================================
+Pushdown benchmark for tinyint
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 1 tinyint row (value = CAST(63 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 3726 / 3775 4.2 236.9 1.0X
+Parquet Vectorized (Pushdown) 3741 / 3789 4.2 237.9 1.0X
+Native ORC Vectorized 2793 / 2909 5.6 177.6 1.3X
+Native ORC Vectorized (Pushdown) 530 / 561 29.7 33.7 7.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 10% tinyint rows (value < CAST(12 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 4385 / 4406 3.6 278.8 1.0X
+Parquet Vectorized (Pushdown) 4398 / 4454 3.6 279.6 1.0X
+Native ORC Vectorized 3420 / 3501 4.6 217.4 1.3X
+Native ORC Vectorized (Pushdown) 1395 / 1432 11.3 88.7 3.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 50% tinyint rows (value < CAST(63 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 7307 / 7394 2.2 464.6 1.0X
+Parquet Vectorized (Pushdown) 7411 / 7461 2.1 471.2 1.0X
+Native ORC Vectorized 6501 / 7814 2.4 413.4 1.1X
+Native ORC Vectorized (Pushdown) 7341 / 8637 2.1 466.7 1.0X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+
+Select 90% tinyint rows (value < CAST(114 AS tinyint)): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Parquet Vectorized 11886 / 13122 1.3 755.7 1.0X
+Parquet Vectorized (Pushdown) 12557 / 14173 1.3 798.4 0.9X
+Native ORC Vectorized 10758 / 11971 1.5 684.0 1.1X
+Native ORC Vectorized (Pushdown) 10564 / 10713 1.5 671.6 1.1X
+
+
http://git-wip-us.apache.org/repos/asf/spark/blob/bf67f70c/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
index 6d7c7de..fc716de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
@@ -17,25 +17,30 @@
package org.apache.spark.sql.execution.benchmark
-import java.io.File
+import java.io.{File, FileOutputStream, OutputStream}
import scala.util.{Random, Try}
+import org.scalatest.{BeforeAndAfterEachTestData, Suite, TestData}
+
import org.apache.spark.SparkConf
+import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.functions.monotonically_increasing_id
import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType}
import org.apache.spark.util.{Benchmark, Utils}
-
/**
* Benchmark to measure read performance with Filter pushdown.
* To run this:
- * spark-submit --class <this class> <spark sql test jar>
+ * build/sbt "sql/test-only *FilterPushdownBenchmark"
+ *
+ * Results will be written to "benchmarks/FilterPushdownBenchmark-results.txt".
*/
-object FilterPushdownBenchmark {
- val conf = new SparkConf()
- .setAppName("FilterPushdownBenchmark")
+class FilterPushdownBenchmark extends SparkFunSuite with BenchmarkBeforeAndAfterEachTest {
+ private val conf = new SparkConf()
+ .setAppName(this.getClass.getSimpleName)
// Since `spark.master` always exists, overrides this value
.set("spark.master", "local[1]")
.setIfMissing("spark.driver.memory", "3g")
@@ -44,8 +49,40 @@ object FilterPushdownBenchmark {
.setIfMissing("orc.compression", "snappy")
.setIfMissing("spark.sql.parquet.compression.codec", "snappy")
+ private val numRows = 1024 * 1024 * 15
+ private val width = 5
+ private val mid = numRows / 2
+ private val blockSize = 1048576
+
private val spark = SparkSession.builder().config(conf).getOrCreate()
+ private var out: OutputStream = _
+
+ override def beforeAll() {
+ super.beforeAll()
+ out = new FileOutputStream(new File("benchmarks/FilterPushdownBenchmark-results.txt"))
+ }
+
+ override def beforeEach(td: TestData) {
+ super.beforeEach(td)
+ val separator = "=" * 96
+ val testHeader = (separator + '\n' + td.name + '\n' + separator + '\n' + '\n').getBytes
+ out.write(testHeader)
+ }
+
+ override def afterEach(td: TestData) {
+ out.write('\n')
+ super.afterEach(td)
+ }
+
+ override def afterAll() {
+ try {
+ out.close()
+ } finally {
+ super.afterAll()
+ }
+ }
+
def withTempPath(f: File => Unit): Unit = {
val path = Utils.createTempDir()
path.delete()
@@ -81,8 +118,7 @@ object FilterPushdownBenchmark {
.withColumn("value", valueCol)
.sort("value")
- saveAsOrcTable(df, dir.getCanonicalPath + "/orc")
- saveAsParquetTable(df, dir.getCanonicalPath + "/parquet")
+ saveAsTable(df, dir)
}
private def prepareStringDictTable(
@@ -93,19 +129,22 @@ object FilterPushdownBenchmark {
}
val df = spark.range(numRows).selectExpr(selectExpr: _*).sort("value")
- saveAsOrcTable(df, dir.getCanonicalPath + "/orc")
- saveAsParquetTable(df, dir.getCanonicalPath + "/parquet")
+ saveAsTable(df, dir)
}
- private def saveAsOrcTable(df: DataFrame, dir: String): Unit = {
- // To always turn on dictionary encoding, we set 1.0 at the threshold (the default is 0.8)
- df.write.mode("overwrite").option("orc.dictionary.key.threshold", 1.0).orc(dir)
- spark.read.orc(dir).createOrReplaceTempView("orcTable")
- }
+ private def saveAsTable(df: DataFrame, dir: File): Unit = {
+ val orcPath = dir.getCanonicalPath + "/orc"
+ val parquetPath = dir.getCanonicalPath + "/parquet"
- private def saveAsParquetTable(df: DataFrame, dir: String): Unit = {
- df.write.mode("overwrite").parquet(dir)
- spark.read.parquet(dir).createOrReplaceTempView("parquetTable")
+ // To always turn on dictionary encoding, we set 1.0 at the threshold (the default is 0.8)
+ df.write.mode("overwrite")
+ .option("orc.dictionary.key.threshold", 1.0)
+ .option("orc.stripe.size", blockSize).orc(orcPath)
+ spark.read.orc(orcPath).createOrReplaceTempView("orcTable")
+
+ df.write.mode("overwrite")
+ .option("parquet.block.size", blockSize).parquet(parquetPath)
+ spark.read.parquet(parquetPath).createOrReplaceTempView("parquetTable")
}
def filterPushDownBenchmark(
@@ -113,7 +152,7 @@ object FilterPushdownBenchmark {
title: String,
whereExpr: String,
selectExpr: String = "*"): Unit = {
- val benchmark = new Benchmark(title, values, minNumIters = 5)
+ val benchmark = new Benchmark(title, values, minNumIters = 5, output = Some(out))
Seq(false, true).foreach { pushDownEnabled =>
val name = s"Parquet Vectorized ${if (pushDownEnabled) s"(Pushdown)" else ""}"
@@ -133,214 +172,6 @@ object FilterPushdownBenchmark {
}
}
- /*
- OpenJDK 64-Bit Server VM 1.8.0_171-b10 on Linux 4.14.33-51.37.amzn1.x86_64
- Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
- Select 0 string row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 9201 / 9300 1.7 585.0 1.0X
- Parquet Vectorized (Pushdown) 89 / 105 176.3 5.7 103.1X
- Native ORC Vectorized 8886 / 8898 1.8 564.9 1.0X
- Native ORC Vectorized (Pushdown) 110 / 128 143.4 7.0 83.9X
-
-
- Select 0 string row
- ('7864320' < value < '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 9336 / 9357 1.7 593.6 1.0X
- Parquet Vectorized (Pushdown) 927 / 937 17.0 58.9 10.1X
- Native ORC Vectorized 9026 / 9041 1.7 573.9 1.0X
- Native ORC Vectorized (Pushdown) 257 / 272 61.1 16.4 36.3X
-
-
- Select 1 string row (value = '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 9209 / 9223 1.7 585.5 1.0X
- Parquet Vectorized (Pushdown) 908 / 925 17.3 57.7 10.1X
- Native ORC Vectorized 8878 / 8904 1.8 564.4 1.0X
- Native ORC Vectorized (Pushdown) 248 / 261 63.4 15.8 37.1X
-
-
- Select 1 string row
- (value <=> '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 9194 / 9216 1.7 584.5 1.0X
- Parquet Vectorized (Pushdown) 899 / 908 17.5 57.2 10.2X
- Native ORC Vectorized 8934 / 8962 1.8 568.0 1.0X
- Native ORC Vectorized (Pushdown) 249 / 254 63.3 15.8 37.0X
-
-
- Select 1 string row
- ('7864320' <= value <= '7864320'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 9332 / 9351 1.7 593.3 1.0X
- Parquet Vectorized (Pushdown) 915 / 934 17.2 58.2 10.2X
- Native ORC Vectorized 9049 / 9057 1.7 575.3 1.0X
- Native ORC Vectorized (Pushdown) 248 / 258 63.5 15.8 37.7X
-
-
- Select all string rows
- (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 20478 / 20497 0.8 1301.9 1.0X
- Parquet Vectorized (Pushdown) 20461 / 20550 0.8 1300.9 1.0X
- Native ORC Vectorized 27464 / 27482 0.6 1746.1 0.7X
- Native ORC Vectorized (Pushdown) 27454 / 27488 0.6 1745.5 0.7X
-
-
- Select 0 int row (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 8489 / 8519 1.9 539.7 1.0X
- Parquet Vectorized (Pushdown) 64 / 69 246.1 4.1 132.8X
- Native ORC Vectorized 8064 / 8099 2.0 512.7 1.1X
- Native ORC Vectorized (Pushdown) 88 / 94 178.6 5.6 96.4X
-
-
- Select 0 int row
- (7864320 < value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 8494 / 8514 1.9 540.0 1.0X
- Parquet Vectorized (Pushdown) 835 / 840 18.8 53.1 10.2X
- Native ORC Vectorized 8090 / 8106 1.9 514.4 1.0X
- Native ORC Vectorized (Pushdown) 249 / 257 63.2 15.8 34.1X
-
-
- Select 1 int row (value = 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 8552 / 8560 1.8 543.7 1.0X
- Parquet Vectorized (Pushdown) 837 / 841 18.8 53.2 10.2X
- Native ORC Vectorized 8178 / 8188 1.9 519.9 1.0X
- Native ORC Vectorized (Pushdown) 249 / 258 63.2 15.8 34.4X
-
-
- Select 1 int row (value <=> 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 8562 / 8580 1.8 544.3 1.0X
- Parquet Vectorized (Pushdown) 833 / 836 18.9 53.0 10.3X
- Native ORC Vectorized 8164 / 8185 1.9 519.0 1.0X
- Native ORC Vectorized (Pushdown) 245 / 254 64.3 15.6 35.0X
-
-
- Select 1 int row
- (7864320 <= value <= 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 8540 / 8555 1.8 542.9 1.0X
- Parquet Vectorized (Pushdown) 837 / 839 18.8 53.2 10.2X
- Native ORC Vectorized 8182 / 8231 1.9 520.2 1.0X
- Native ORC Vectorized (Pushdown) 250 / 259 62.9 15.9 34.1X
-
-
- Select 1 int row
- (7864319 < value < 7864321): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 8535 / 8555 1.8 542.6 1.0X
- Parquet Vectorized (Pushdown) 835 / 841 18.8 53.1 10.2X
- Native ORC Vectorized 8159 / 8179 1.9 518.8 1.0X
- Native ORC Vectorized (Pushdown) 244 / 250 64.5 15.5 35.0X
-
-
- Select 10% int rows (value < 1572864): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 9609 / 9634 1.6 610.9 1.0X
- Parquet Vectorized (Pushdown) 2663 / 2672 5.9 169.3 3.6X
- Native ORC Vectorized 9824 / 9850 1.6 624.6 1.0X
- Native ORC Vectorized (Pushdown) 2717 / 2722 5.8 172.7 3.5X
-
-
- Select 50% int rows (value < 7864320): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 13592 / 13613 1.2 864.2 1.0X
- Parquet Vectorized (Pushdown) 9720 / 9738 1.6 618.0 1.4X
- Native ORC Vectorized 16366 / 16397 1.0 1040.5 0.8X
- Native ORC Vectorized (Pushdown) 12437 / 12459 1.3 790.7 1.1X
-
-
- Select 90% int rows (value < 14155776): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 17580 / 17617 0.9 1117.7 1.0X
- Parquet Vectorized (Pushdown) 16803 / 16827 0.9 1068.3 1.0X
- Native ORC Vectorized 24169 / 24187 0.7 1536.6 0.7X
- Native ORC Vectorized (Pushdown) 22147 / 22341 0.7 1408.1 0.8X
-
-
- Select all int rows (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 18461 / 18491 0.9 1173.7 1.0X
- Parquet Vectorized (Pushdown) 18466 / 18530 0.9 1174.1 1.0X
- Native ORC Vectorized 24231 / 24270 0.6 1540.6 0.8X
- Native ORC Vectorized (Pushdown) 24207 / 24304 0.6 1539.0 0.8X
-
-
- Select all int rows (value > -1): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 18414 / 18453 0.9 1170.7 1.0X
- Parquet Vectorized (Pushdown) 18435 / 18464 0.9 1172.1 1.0X
- Native ORC Vectorized 24430 / 24454 0.6 1553.2 0.8X
- Native ORC Vectorized (Pushdown) 24410 / 24465 0.6 1552.0 0.8X
-
-
- Select all int rows (value != -1): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 18446 / 18457 0.9 1172.8 1.0X
- Parquet Vectorized (Pushdown) 18428 / 18440 0.9 1171.6 1.0X
- Native ORC Vectorized 24414 / 24450 0.6 1552.2 0.8X
- Native ORC Vectorized (Pushdown) 24385 / 24472 0.6 1550.4 0.8X
-
-
- Select 0 distinct string row
- (value IS NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 8322 / 8352 1.9 529.1 1.0X
- Parquet Vectorized (Pushdown) 53 / 57 296.3 3.4 156.7X
- Native ORC Vectorized 7903 / 7953 2.0 502.4 1.1X
- Native ORC Vectorized (Pushdown) 80 / 82 197.2 5.1 104.3X
-
-
- Select 0 distinct string row
- ('100' < value < '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 8712 / 8743 1.8 553.9 1.0X
- Parquet Vectorized (Pushdown) 995 / 1030 15.8 63.3 8.8X
- Native ORC Vectorized 8345 / 8362 1.9 530.6 1.0X
- Native ORC Vectorized (Pushdown) 84 / 87 187.6 5.3 103.9X
-
-
- Select 1 distinct string row
- (value = '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 8574 / 8610 1.8 545.1 1.0X
- Parquet Vectorized (Pushdown) 1127 / 1135 14.0 71.6 7.6X
- Native ORC Vectorized 8163 / 8181 1.9 519.0 1.1X
- Native ORC Vectorized (Pushdown) 426 / 433 36.9 27.1 20.1X
-
-
- Select 1 distinct string row
- (value <=> '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 8549 / 8568 1.8 543.5 1.0X
- Parquet Vectorized (Pushdown) 1124 / 1131 14.0 71.4 7.6X
- Native ORC Vectorized 8163 / 8210 1.9 519.0 1.0X
- Native ORC Vectorized (Pushdown) 426 / 436 36.9 27.1 20.1X
-
-
- Select 1 distinct string row
- ('100' <= value <= '100'): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 8889 / 8896 1.8 565.2 1.0X
- Parquet Vectorized (Pushdown) 1161 / 1168 13.6 73.8 7.7X
- Native ORC Vectorized 8519 / 8554 1.8 541.6 1.0X
- Native ORC Vectorized (Pushdown) 430 / 437 36.6 27.3 20.7X
-
-
- Select all distinct string rows
- (value IS NOT NULL): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Parquet Vectorized 20433 / 20533 0.8 1299.1 1.0X
- Parquet Vectorized (Pushdown) 20433 / 20456 0.8 1299.1 1.0X
- Native ORC Vectorized 25435 / 25513 0.6 1617.1 0.8X
- Native ORC Vectorized (Pushdown) 25435 / 25507 0.6 1617.1 0.8X
- */
-
benchmark.run()
}
@@ -408,14 +239,8 @@ object FilterPushdownBenchmark {
}
}
- def main(args: Array[String]): Unit = {
- val numRows = 1024 * 1024 * 15
- val width = 5
-
- // Pushdown for many distinct value case
+ ignore("Pushdown for many distinct value case") {
withTempPath { dir =>
- val mid = numRows / 2
-
withTempTable("orcTable", "patquetTable") {
Seq(true, false).foreach { useStringForValue =>
prepareTable(dir, numRows, width, useStringForValue)
@@ -427,16 +252,122 @@ object FilterPushdownBenchmark {
}
}
}
+ }
- // Pushdown for few distinct value case (use dictionary encoding)
+ ignore("Pushdown for few distinct value case (use dictionary encoding)") {
withTempPath { dir =>
val numDistinctValues = 200
- val mid = numDistinctValues / 2
withTempTable("orcTable", "patquetTable") {
prepareStringDictTable(dir, numRows, numDistinctValues, width)
- runStringBenchmark(numRows, width, mid, "distinct string")
+ runStringBenchmark(numRows, width, numDistinctValues / 2, "distinct string")
}
}
}
+
+ ignore("Pushdown benchmark for StringStartsWith") {
+ withTempPath { dir =>
+ withTempTable("orcTable", "patquetTable") {
+ prepareTable(dir, numRows, width, true)
+ Seq(
+ "value like '10%'",
+ "value like '1000%'",
+ s"value like '${mid.toString.substring(0, mid.toString.length - 1)}%'"
+ ).foreach { whereExpr =>
+ val title = s"StringStartsWith filter: ($whereExpr)"
+ filterPushDownBenchmark(numRows, title, whereExpr)
+ }
+ }
+ }
+ }
+
+ ignore(s"Pushdown benchmark for ${DecimalType.simpleString}") {
+ withTempPath { dir =>
+ Seq(
+ s"decimal(${Decimal.MAX_INT_DIGITS}, 2)",
+ s"decimal(${Decimal.MAX_LONG_DIGITS}, 2)",
+ s"decimal(${DecimalType.MAX_PRECISION}, 2)"
+ ).foreach { dt =>
+ val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
+ val df = spark.range(numRows).selectExpr(columns: _*)
+ .withColumn("value", monotonically_increasing_id().cast(dt))
+ withTempTable("orcTable", "patquetTable") {
+ saveAsTable(df, dir)
+
+ Seq(s"value = $mid").foreach { whereExpr =>
+ val title = s"Select 1 $dt row ($whereExpr)".replace("value AND value", "value")
+ filterPushDownBenchmark(numRows, title, whereExpr)
+ }
+
+ val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ", MAX(value)")
+ Seq(10, 50, 90).foreach { percent =>
+ filterPushDownBenchmark(
+ numRows,
+ s"Select $percent% $dt rows (value < ${numRows * percent / 100})",
+ s"value < ${numRows * percent / 100}",
+ selectExpr
+ )
+ }
+ }
+ }
+ }
+ }
+
+ ignore("Pushdown benchmark for InSet -> InFilters") {
+ withTempPath { dir =>
+ withTempTable("orcTable", "patquetTable") {
+ prepareTable(dir, numRows, width, false)
+ Seq(5, 10, 50, 100).foreach { count =>
+ Seq(10, 50, 90).foreach { distribution =>
+ val filter =
+ Range(0, count).map(r => scala.util.Random.nextInt(numRows * distribution / 100))
+ val whereExpr = s"value in(${filter.mkString(",")})"
+ val title = s"InSet -> InFilters (values count: $count, distribution: $distribution)"
+ filterPushDownBenchmark(numRows, title, whereExpr)
+ }
+ }
+ }
+ }
+ }
+
+ ignore(s"Pushdown benchmark for ${ByteType.simpleString}") {
+ withTempPath { dir =>
+ val columns = (1 to width).map(i => s"CAST(id AS string) c$i")
+ val df = spark.range(numRows).selectExpr(columns: _*)
+ .withColumn("value", (monotonically_increasing_id() % Byte.MaxValue).cast(ByteType))
+ .orderBy("value")
+ withTempTable("orcTable", "patquetTable") {
+ saveAsTable(df, dir)
+
+ Seq(s"value = CAST(${Byte.MaxValue / 2} AS ${ByteType.simpleString})")
+ .foreach { whereExpr =>
+ val title = s"Select 1 ${ByteType.simpleString} row ($whereExpr)"
+ .replace("value AND value", "value")
+ filterPushDownBenchmark(numRows, title, whereExpr)
+ }
+
+ val selectExpr = (1 to width).map(i => s"MAX(c$i)").mkString("", ",", ", MAX(value)")
+ Seq(10, 50, 90).foreach { percent =>
+ filterPushDownBenchmark(
+ numRows,
+ s"Select $percent% ${ByteType.simpleString} rows " +
+ s"(value < CAST(${Byte.MaxValue * percent / 100} AS ${ByteType.simpleString}))",
+ s"value < CAST(${Byte.MaxValue * percent / 100} AS ${ByteType.simpleString})",
+ selectExpr
+ )
+ }
+ }
+ }
+ }
+}
+
+trait BenchmarkBeforeAndAfterEachTest extends BeforeAndAfterEachTestData { this: Suite =>
+
+ override def beforeEach(td: TestData) {
+ super.beforeEach(td)
+ }
+
+ override def afterEach(td: TestData) {
+ super.afterEach(td)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org