You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/08/08 07:37:29 UTC
[06/51] [partial] hive git commit: HIVE-20315: Vectorization: Fix
more NULL / Wrong Results issues and avoid unnecessary casts/conversions
(Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query8.q.out b/ql/src/test/results/clientpositive/perf/spark/query8.q.out
index 6b14eb9..c231df7 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query8.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query8.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select s_store_name
,sum(ss_net_profit)
from store_sales
@@ -105,7 +105,7 @@ select s_store_name
order by s_store_name
limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select s_store_name
,sum(ss_net_profit)
from store_sales
@@ -212,6 +212,10 @@ select s_store_name
order by s_store_name
limit 100
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-3 is a root stage
Stage-2 depends on stages: Stage-3
@@ -235,34 +239,76 @@ STAGE PLANS:
alias: customer
filterExpr: ((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 10:string, val Y), SelectColumnIsNotNull(col 4:int))
predicate: ((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) (type: boolean)
Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: c_current_addr_sk (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [4]
Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 40000000 Data size: 34400807926 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 6
Map Operator Tree:
TableScan
alias: customer_address
filterExpr: ((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57
648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '
14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15799', '86709', '37931', '74703',
'83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) (type: boolean)
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterStringColumnInList(col 14, values 89436, 30868, 65085, 22977, 83927, 77557, 58429, 40697, 80614, 10502, 32779, 91137, 61265, 98294, 17921, 18427, 21203, 59362, 87291, 84093, 21505, 17184, 10866, 67898, 25797, 28055, 18377, 80332, 74535, 21757, 29742, 90885, 29898, 17819, 40811, 25990, 47513, 89531, 91068, 10391, 18846, 99223, 82637, 41368, 83658, 86199, 81625, 26696, 89338, 88425, 32200, 81427, 19053, 77471, 36610, 99823, 43276, 41249, 48584, 83550, 82276, 18842, 78890, 14090, 38123, 40936, 34425, 19850, 43286, 80072, 79188, 54191, 11395, 50497, 84861, 90733, 21068, 57666, 37119, 25004, 57835, 70067, 62878, 95806, 19303, 18840, 19124, 29785, 16737, 16022, 49613, 89977, 68310, 60069, 98360, 48649, 39050, 41793, 25002, 27413, 39736, 47208, 16515, 94808, 57648, 15009, 80015, 42961, 63982, 21744, 71853, 81087, 67468, 34175, 64008, 20261, 11201, 51799, 48043, 45645, 61163, 48375, 36447, 57042, 21218, 41100, 89
951, 22745, 35851, 83326, 61125, 78298, 80752, 49858, 52940, 96976, 63792, 11376, 53582, 18717, 90226, 50530, 94203, 99447, 27670, 96577, 57856, 56372, 16165, 23427, 54561, 28806, 44439, 22926, 30123, 61451, 92397, 56979, 92309, 70873, 13355, 21801, 46346, 37562, 56458, 28286, 47306, 99555, 69399, 26234, 47546, 49661, 88601, 35943, 39936, 25632, 24611, 44166, 56648, 30379, 59785, 11110, 14329, 93815, 52226, 71381, 13842, 25612, 63294, 14664, 21077, 82626, 18799, 60915, 81020, 56447, 76619, 11433, 13414, 42548, 92713, 70467, 30884, 47484, 16072, 38936, 13036, 88376, 45539, 35901, 19506, 65690, 73957, 71850, 49231, 14276, 20005, 18384, 76615, 11635, 38177, 55607, 41369, 95447, 58581, 58149, 91946, 33790, 76232, 75692, 95464, 22246, 51061, 56692, 53121, 77209, 15482, 10688, 14868, 45907, 73520, 72666, 25734, 17959, 24677, 66446, 94627, 53535, 15560, 41967, 69297, 11929, 59403, 33283, 52232, 57350, 43933, 40921, 36635, 10827, 71286, 19736, 80619, 25251, 95042, 15526, 36496, 55854, 49124
, 81980, 35375, 49157, 63512, 28944, 14946, 36503, 54010, 18767, 23969, 43905, 66979, 33113, 21286, 58471, 59080, 13395, 79144, 70373, 67031, 38360, 26705, 50906, 52406, 26066, 73146, 15884, 31897, 30045, 61068, 45550, 92454, 13376, 14354, 19770, 22928, 97790, 50723, 46081, 30202, 14410, 20223, 88500, 67298, 13261, 14172, 81410, 93578, 83583, 46047, 94167, 82564, 21156, 15799, 86709, 37931, 74703, 83103, 23054, 70470, 72008, 49247, 91911, 69998, 20961, 70070, 63197, 54853, 88191, 91830, 49521, 19454, 81450, 89091, 62378, 25683, 61869, 51744, 36580, 85778, 36871, 48121, 28810, 83712, 45486, 67393, 26935, 42393, 20132, 55349, 86057, 21309, 80218, 10094, 11357, 48819, 39734, 40758, 30432, 21204, 29467, 30214, 61024, 55307, 74621, 11622, 68908, 33032, 52868, 99194, 99900, 84936, 69036, 99149, 45013, 32895, 59004, 32322, 14933, 32936, 33562, 72550, 27385, 58049, 58200, 16808, 21360, 32961, 18586, 79307, 15492)(children: StringSubstrColStartLen(col 9:string, start 0, length 5) -> 14:strin
g), SelectColumnIsNotNull(col 15:string)(children: StringSubstrColStartLen(col 14:string, start 0, length 2)(children: StringSubstrColStartLen(col 9:string, start 0, length 5) -> 14:string) -> 15:string))
predicate: ((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '5
7648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231',
'14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15799', '86709', '37931', '74703'
, '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) (type: boolean)
Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: substr(ca_zip, 1, 5) (type: string)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [14]
+ selectExpressions: StringSubstrColStartLen(col 9:string, start 0, length 5) -> 14:string
Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 14:string
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
@@ -271,30 +317,71 @@ STAGE PLANS:
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 9
Map Operator Tree:
TableScan
alias: customer_address
filterExpr: (ca_address_sk is not null and substr(substr(ca_zip, 1, 5), 1, 2) is not null) (type: boolean)
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 15:string)(children: StringSubstrColStartLen(col 14:string, start 0, length 2)(children: StringSubstrColStartLen(col 9:string, start 0, length 5) -> 14:string) -> 15:string))
predicate: (ca_address_sk is not null and substr(substr(ca_zip, 1, 5), 1, 2) is not null) (type: boolean)
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ca_address_sk (type: int), ca_zip (type: string)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 9]
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 10
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ notVectorizedReason: Tagging not supported
+ vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -318,22 +405,53 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Reducer 11
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColGreaterLongScalar(col 1:bigint, val 10)
predicate: (_col1 > 10L) (type: boolean)
Statistics: Num rows: 7333333 Data size: 7442452291 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: substr(_col0, 1, 5) (type: string)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [2]
+ selectExpressions: StringSubstrColStartLen(col 0:string, start 0, length 5) -> 2:string
Statistics: Num rows: 7333333 Data size: 7442452291 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 2:string
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
@@ -342,19 +460,45 @@ STAGE PLANS:
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 7333333 Data size: 7442452291 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reducer 12
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 3666666 Data size: 3721225638 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(_col1)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(col 1:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
@@ -363,19 +507,45 @@ STAGE PLANS:
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reducer 7
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(_col1)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(col 1:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
@@ -384,27 +554,56 @@ STAGE PLANS:
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 13666666 Data size: 13870024459 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reducer 8
Execution mode: vectorized
Local Work:
Map Reduce Local Work
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 6833333 Data size: 6935012229 Basic stats: COMPLETE Column stats: NONE
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColEqualLongScalar(col 1:bigint, val 2)
predicate: (_col1 = 2L) (type: boolean)
Statistics: Num rows: 1 Data size: 1014 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
Statistics: Num rows: 1 Data size: 1014 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
+ Spark Hash Table Sink Vectorization:
+ className: VectorSparkHashTableSinkOperator
+ native: true
keys:
0 substr(_col0, 1, 2) (type: string)
1 substr(_col2, 1, 2) (type: string)
@@ -419,12 +618,22 @@ STAGE PLANS:
alias: store
filterExpr: (s_store_sk is not null and substr(s_zip, 1, 2) is not null) (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 30:string)(children: StringSubstrColStartLen(col 25:string, start 0, length 2) -> 30:string))
predicate: (s_store_sk is not null and substr(s_zip, 1, 2) is not null) (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s_store_sk (type: int), s_store_name (type: string), s_zip (type: string)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 5, 25]
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -432,15 +641,32 @@ STAGE PLANS:
keys:
0 substr(_col0, 1, 2) (type: string)
1 substr(_col2, 1, 2) (type: string)
+ Map Join Vectorization:
+ bigTableKeyExpressions: StringSubstrColStartLen(col 25:string, start 0, length 2) -> 30:string
+ className: VectorMapJoinInnerBigOnlyStringOperator
+ native: true
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
outputColumnNames: _col1, _col2
input vertices:
0 Reducer 8
Statistics: Num rows: 1874 Data size: 3581903 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
+ Spark Hash Table Sink Vectorization:
+ className: VectorSparkHashTableSinkOperator
+ native: true
keys:
0 _col1 (type: int)
1 _col1 (type: int)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Local Work:
Map Reduce Local Work
@@ -458,42 +684,93 @@ STAGE PLANS:
alias: store_sales
filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:int))
predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_net_profit (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 7, 22]
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 5
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_qoy = 1) and (d_year = 2002) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 10:int, val 1), FilterLongColEqualLongScalar(col 6:int, val 2002), SelectColumnIsNotNull(col 0:int))
predicate: ((d_qoy = 1) and (d_year = 2002) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Local Work:
Map Reduce Local Work
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ notVectorizedReason: Tagging not supported
+ vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -528,9 +805,23 @@ STAGE PLANS:
value expressions: _col1 (type: decimal(17,2))
Reducer 3
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(17,2)
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
@@ -538,21 +829,41 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col1 (type: decimal(17,2))
Reducer 4
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2))
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat