You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/07/20 10:16:51 UTC
[25/36] hive git commit: HIVE-16369: Vectorization: Support PTF (Part
1: No Custom Window Framing -- Default Only) (Matt McCline,
reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out
new file mode 100644
index 0000000..5ab77c8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out
@@ -0,0 +1,2022 @@
+PREHOOK: query: drop table over10k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table over10k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ `dec` decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over10k
+POSTHOOK: query: create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ `dec` decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over10k
+PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over10k
+POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over10k
+PREHOOK: query: explain vectorization detail
+select p_mfgr, p_retailprice, p_size,
+round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
+max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
+from part
+window w1 as (distribute by p_mfgr sort by p_retailprice)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select p_mfgr, p_retailprice, p_size,
+round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
+max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
+from part
+window w1 as (distribute by p_mfgr sort by p_retailprice)
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_retailprice (type: double)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [2, 7]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [2]
+ valueColumns: [5]
+ Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: p_size (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [2, 5, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: lead and lag function not supported in argument expression of aggregation function sum
+ vectorized: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double)
+ outputColumnNames: _col2, _col5, _col7
+ Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col2: string, _col5: int, _col7: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col7 ASC NULLS FIRST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col7
+ name: sum
+ window function: GenericUDAFSumDouble
+ window frame: RANGE PRECEDING(MAX)~CURRENT
+ window function definition
+ alias: sum_window_1
+ arguments: lag(...)
+ name: sum
+ window function: GenericUDAFSumDouble
+ window frame: RANGE PRECEDING(MAX)~CURRENT
+ window function definition
+ alias: last_value_window_2
+ arguments: _col7
+ name: last_value
+ window function: GenericUDAFLastValueEvaluator
+ window frame: RANGE PRECEDING(MAX)~CURRENT
+ window function definition
+ alias: max_window_3
+ arguments: _col7
+ name: max
+ window function: GenericUDAFMaxEvaluator
+ window frame: RANGE PRECEDING(MAX)~CURRENT
+ window function definition
+ alias: min_window_4
+ arguments: _col7
+ name: min
+ window function: GenericUDAFMinEvaluator
+ window frame: RANGE PRECEDING(MAX)~CURRENT
+ window function definition
+ alias: first_value_window_5
+ arguments: _col7
+ name: first_value
+ window function: GenericUDAFFirstValueEvaluator
+ window frame: RANGE PRECEDING(MAX)~CURRENT
+ Lead/Lag information: lag(...) (type: double)
+ Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), (round(sum_window_0, 2) = round((sum_window_1 + last_value_window_2), 2)) (type: boolean), ((max_window_3 - min_window_4) = (last_value_window_2 - first_value_window_5)) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 26 Data size: 3068 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 26 Data size: 3068 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_mfgr, p_retailprice, p_size,
+round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
+max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
+from part
+window w1 as (distribute by p_mfgr sort by p_retailprice)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_mfgr, p_retailprice, p_size,
+round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
+max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
+from part
+window w1 as (distribute by p_mfgr sort by p_retailprice)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+p_mfgr p_retailprice p_size _c3 _c4
+Manufacturer#1 1173.15 2 true true
+Manufacturer#1 1173.15 2 true true
+Manufacturer#1 1414.42 28 true true
+Manufacturer#1 1602.59 6 true true
+Manufacturer#1 1632.66 42 true true
+Manufacturer#1 1753.76 34 true true
+Manufacturer#2 1690.68 14 true true
+Manufacturer#2 1698.66 25 true true
+Manufacturer#2 1701.6 18 true true
+Manufacturer#2 1800.7 40 true true
+Manufacturer#2 2031.98 2 true true
+Manufacturer#3 1190.27 14 true true
+Manufacturer#3 1337.29 45 true true
+Manufacturer#3 1410.39 19 true true
+Manufacturer#3 1671.68 17 true true
+Manufacturer#3 1922.98 1 true true
+Manufacturer#4 1206.26 27 true true
+Manufacturer#4 1290.35 12 true true
+Manufacturer#4 1375.42 39 true true
+Manufacturer#4 1620.67 10 true true
+Manufacturer#4 1844.92 7 true true
+Manufacturer#5 1018.1 46 true true
+Manufacturer#5 1464.48 23 true true
+Manufacturer#5 1611.66 6 true true
+Manufacturer#5 1788.73 2 true true
+Manufacturer#5 1789.69 31 true true
+PREHOOK: query: explain vectorization detail
+select p_mfgr, p_retailprice, p_size,
+rank() over (distribute by p_mfgr sort by p_retailprice) as r,
+sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
+sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
+from part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select p_mfgr, p_retailprice, p_size,
+rank() over (distribute by p_mfgr sort by p_retailprice) as r,
+sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
+sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
+from part
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_retailprice (type: double)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [2, 7]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [2]
+ valueColumns: [5]
+ Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: p_size (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [2, 5, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type
+ vectorized: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double)
+ outputColumnNames: _col2, _col5, _col7
+ Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col2: string, _col5: int, _col7: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col7 ASC NULLS FIRST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col7
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ window function definition
+ alias: sum_window_1
+ arguments: _col7
+ name: sum
+ window function: GenericUDAFSumDouble
+ window frame: ROWS PRECEDING(MAX)~CURRENT
+ Statistics: Num rows: 26 Data size: 9828 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), rank_window_0 (type: int), sum_window_1 (type: double), (sum_window_1 - 5.0) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 26 Data size: 3380 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_mfgr, p_retailprice, p_size,
+rank() over (distribute by p_mfgr sort by p_retailprice) as r,
+sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
+sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
+from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_mfgr, p_retailprice, p_size,
+rank() over (distribute by p_mfgr sort by p_retailprice) as r,
+sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
+sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
+from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+p_mfgr p_retailprice p_size r s2 s1
+Manufacturer#1 1173.15 2 1 1173.15 1168.15
+Manufacturer#1 1173.15 2 1 2346.3 2341.3
+Manufacturer#1 1414.42 28 3 3760.7200000000003 3755.7200000000003
+Manufacturer#1 1602.59 6 4 5363.31 5358.31
+Manufacturer#1 1632.66 42 5 6995.97 6990.97
+Manufacturer#1 1753.76 34 6 8749.73 8744.73
+Manufacturer#2 1690.68 14 1 1690.68 1685.68
+Manufacturer#2 1698.66 25 2 3389.34 3384.34
+Manufacturer#2 1701.6 18 3 5090.9400000000005 5085.9400000000005
+Manufacturer#2 1800.7 40 4 6891.64 6886.64
+Manufacturer#2 2031.98 2 5 8923.62 8918.62
+Manufacturer#3 1190.27 14 1 1190.27 1185.27
+Manufacturer#3 1337.29 45 2 2527.56 2522.56
+Manufacturer#3 1410.39 19 3 3937.95 3932.95
+Manufacturer#3 1671.68 17 4 5609.63 5604.63
+Manufacturer#3 1922.98 1 5 7532.610000000001 7527.610000000001
+Manufacturer#4 1206.26 27 1 1206.26 1201.26
+Manufacturer#4 1290.35 12 2 2496.6099999999997 2491.6099999999997
+Manufacturer#4 1375.42 39 3 3872.0299999999997 3867.0299999999997
+Manufacturer#4 1620.67 10 4 5492.7 5487.7
+Manufacturer#4 1844.92 7 5 7337.62 7332.62
+Manufacturer#5 1018.1 46 1 1018.1 1013.1
+Manufacturer#5 1464.48 23 2 2482.58 2477.58
+Manufacturer#5 1611.66 6 3 4094.24 4089.24
+Manufacturer#5 1788.73 2 4 5882.969999999999 5877.969999999999
+Manufacturer#5 1789.69 31 5 7672.66 7667.66
+PREHOOK: query: explain vectorization detail
+select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 8771 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ Reduce Output Operator
+ key expressions: t (type: tinyint), bo (type: boolean), s (type: string), si (type: smallint), f (type: float)
+ sort order: ++++-
+ Map-reduce partition columns: t (type: tinyint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [0, 6, 7, 1, 4]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [0]
+ valueColumns: []
+ Statistics: Num rows: 8771 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 11
+ includeColumns: [0, 1, 4, 6, 7]
+ dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: lead not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum]
+ vectorized: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: string)
+ outputColumnNames: _col0, _col1, _col4, _col6, _col7
+ Statistics: Num rows: 8771 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: tinyint, _col1: smallint, _col4: float, _col6: boolean, _col7: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col6 ASC NULLS FIRST, _col7 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 DESC NULLS LAST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: lead_window_0
+ arguments: _col4, 3
+ name: lead
+ window function: GenericUDAFLeadEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 8771 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col7 (type: string), _col1 (type: smallint), _col4 (type: float), (UDFToFloat(_col1) - lead_window_0) (type: float)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 8771 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 11600 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 11600 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+s si f _c3
+alice allen 400 76.31 337.23
+alice davidson 384 71.97 357.79
+alice king 455 2.48 395.93
+alice king 458 62.77 384.16998
+alice xylophone 485 26.21 464.05
+bob falkner 260 59.07 242.4
+bob ichabod 454 73.83 381.7
+bob polk 264 20.95 257.17
+bob underhill 454 17.6 424.94
+bob underhill 465 72.3 453.17
+bob van buren 433 6.83 398.4
+calvin ichabod 431 29.06 334.22
+david garcia 485 11.83 421.51
+ethan steinbeck 298 34.6 288.14
+fred ellison 376 96.78 330.76
+holly steinbeck 384 63.49 293.7
+holly underhill 318 9.86 269.91
+irene ellison 458 45.24 365.29
+irene underhill 307 90.3 244.19
+jessica johnson 494 48.09 490.18
+jessica king 459 92.71 452.2
+jessica white 284 62.81 209.08
+luke garcia 311 3.82 267.27
+luke young 451 6.8 429.0
+mike king 275 74.92 211.81
+oscar garcia 362 43.73 340.66
+priscilla laertes 316 22.0 296.06
+priscilla quirinius 423 63.19 362.72
+priscilla zipper 485 21.34 400.61
+quinn ellison 266 19.94 209.95
+quinn polk 507 60.28 447.66
+sarah robinson 320 84.39 309.74
+tom polk 346 56.05 320.33
+ulysses ellison 381 59.34 358.66
+ulysses quirinius 303 10.26 259.6
+ulysses robinson 313 25.67 269.31
+ulysses steinbeck 333 22.34 270.61
+victor allen 337 43.4 311.5
+victor hernandez 447 43.69 375.22
+victor xylophone 438 62.39 424.33
+wendy quirinius 279 25.5 250.25
+wendy robinson 275 71.78 262.88
+wendy xylophone 314 13.67 295.73
+xavier garcia 493 28.75 474.56
+zach thompson 386 12.12 377.63
+zach young 286 18.27 263.65
+alice falkner 280 18.44 227.7
+bob ellison 339 8.37 300.95
+bob johnson 374 22.35 326.49
+calvin white 280 52.3 198.32
+david carson 270 38.05 255.77
+david falkner 469 47.51 388.35
+david hernandez 408 81.68 339.27
+ethan underhill 339 14.23 256.26
+gabriella brown 498 80.65 413.25
+holly nixon 505 68.73 440.71
+holly polk 268 82.74 182.04001
+holly thompson 387 84.75 298.22
+irene young 458 64.29 401.8
+jessica miller 299 85.96 243.41
+katie ichabod 469 88.78 385.61
+luke ichabod 289 56.2 286.74
+luke king 337 55.59 274.88
+mike allen 465 83.39 383.03
+mike polk 500 2.26 427.74
+mike white 454 62.12 430.78
+mike xylophone 448 81.97 447.17
+nick nixon 335 72.26 240.78
+nick robinson 350 23.22 294.59
+oscar davidson 432 0.83 420.93
+oscar johnson 315 94.22 233.05
+oscar johnson 469 55.41 468.44
+oscar miller 324 11.07 265.19
+rachel davidson 507 81.95 468.78
+rachel thompson 344 0.56 246.12
+sarah miller 386 58.81 304.36
+sarah xylophone 275 38.22 177.48999
+sarah zipper 376 97.88 294.61
+tom hernandez 467 81.64 459.9
+tom hernandez 477 97.51 415.19
+tom steinbeck 414 81.39 361.87
+ulysses carson 343 7.1 314.22
+victor robinson 415 61.81 349.5
+victor thompson 344 52.13 NULL
+xavier ovid 280 28.78 NULL
+yuri xylophone 430 65.5 NULL
+alice underhill 389 26.68 368.06
+alice underhill 446 6.49 444.21
+bob ovid 331 67.12 236.43
+bob van buren 406 20.94 383.32
+david falkner 406 1.79 374.34
+david miller 450 94.57 380.13
+ethan allen 380 22.68 375.6
+ethan king 395 31.66 361.51
+ethan nixon 475 69.87 431.39
+ethan polk 283 4.4 243.82
+fred allen 331 33.49 281.68
+fred king 511 43.61 457.22
+fred polk 261 39.18 248.73
+fred young 303 49.32 221.51001
+PREHOOK: query: explain vectorization detail
+select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ Reduce Output Operator
+ key expressions: si (type: smallint), i (type: int), s (type: string)
+ sort order: +++
+ Map-reduce partition columns: si (type: smallint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [1, 2, 7]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [1]
+ valueColumns: []
+ Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 11
+ includeColumns: [1, 2, 7]
+ dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: lead not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum]
+ vectorized: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string)
+ outputColumnNames: _col1, _col2, _col7
+ Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: smallint, _col2: int, _col7: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS FIRST, _col7 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: lead_window_0
+ arguments: _col2, 3, 0
+ name: lead
+ window function: GenericUDAFLeadEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col7 (type: string), _col2 (type: int), (_col2 - lead_window_0) (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 9421 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 10800 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+s i _c2
+wendy garcia 65540 -18
+ethan thompson 65543 -20
+zach nixon 65549 -31
+alice robinson 65558 -28
+wendy nixon 65563 -33
+victor robinson 65580 -19
+ethan falkner 65586 -18
+victor davidson 65596 -17
+xavier quirinius 65599 -14
+fred quirinius 65604 -11
+nick zipper 65613 -3
+xavier van buren 65613 -7
+victor johnson 65615 -12
+alice ovid 65616 -24
+xavier ovid 65620 -23
+ulysses white 65627 -24
+sarah white 65640 -13
+calvin young 65643 -25
+victor thompson 65651 -42
+calvin johnson 65653 -53
+irene polk 65668 -45
+zach underhill 65693 -38
+quinn hernandez 65706 -27
+rachel ovid 65713 -24
+gabriella falkner 65731 -7
+zach white 65733 -8
+fred hernandez 65737 -7
+rachel ellison 65738 -6
+oscar steinbeck 65741 -6
+alice ellison 65744 -8
+tom allen 65744 -19
+quinn quirinius 65747 -31
+victor hernandez 65752 -26
+holly xylophone 65763 -26
+david davidson 65778 65778
+ulysses young 65778 65778
+sarah brown 65789 65789
+xavier brown 65541 -16
+zach hernandez 65542 -18
+katie ichabod 65547 -19
+oscar young 65557 -15
+holly white 65560 -14
+priscilla laertes 65566 -9
+ethan king 65572 -6
+zach hernandez 65574 -10
+oscar thompson 65575 -13
+victor xylophone 65578 -16
+gabriella ellison 65584 -26
+nick quirinius 65588 -22
+holly robinson 65594 -18
+alice xylophone 65610 -16
+yuri brown 65610 -21
+sarah hernandez 65612 -26
+katie garcia 65626 -28
+jessica laertes 65631 -23
+ethan underhill 65638 -17
+irene young 65654 -37
+priscilla thompson 65654 -40
+luke quirinius 65655 -44
+david brown 65691 -20
+luke falkner 65694 -18
+priscilla miller 65699 -20
+rachel robinson 65711 -9
+ethan polk 65712 -10
+wendy brown 65719 -13
+mike underhill 65720 -18
+zach underhill 65722 -26
+nick zipper 65732 -20
+fred brown 65738 -18
+ulysses young 65748 -23
+nick davidson 65752 -19
+fred zipper 65756 -15
+yuri nixon 65771 -10
+zach hernandez 65771 -19
+zach zipper 65771 65771
+alice underhill 65781 65781
+oscar laertes 65790 65790
+sarah zipper 65546 -19
+bob falkner 65551 -17
+luke ovid 65551 -17
+katie allen 65565 -4
+nick falkner 65568 -5
+zach steinbeck 65568 -11
+oscar van buren 65569 -13
+gabriella young 65573 -11
+jessica ichabod 65579 -24
+david garcia 65582 -24
+nick xylophone 65584 -27
+calvin johnson 65603 -14
+xavier zipper 65606 -50
+alice nixon 65611 -58
+jessica laertes 65617 -62
+fred king 65656 -61
+priscilla underhill 65669 -48
+priscilla zipper 65679 -45
+nick king 65717 -11
+sarah polk 65717 -17
+irene quirinius 65724 -28
+tom laertes 65728 -25
+yuri johnson 65734 -27
+PREHOOK: query: explain vectorization detail
+select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 8479 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ Reduce Output Operator
+ key expressions: b (type: bigint), si (type: smallint), s (type: string), d (type: double)
+ sort order: ++++
+ Map-reduce partition columns: b (type: bigint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [3, 1, 7, 5]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [3]
+ valueColumns: []
+ Statistics: Num rows: 8479 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 11
+ includeColumns: [1, 3, 5, 7]
+ dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum]
+ vectorized: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey2 (type: string)
+ outputColumnNames: _col1, _col3, _col5, _col7
+ Statistics: Num rows: 8479 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: smallint, _col3: bigint, _col5: double, _col7: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST, _col7 ASC NULLS FIRST, _col5 ASC NULLS FIRST
+ partition by: _col3
+ raw input shape:
+ window functions:
+ window function definition
+ alias: lag_window_0
+ arguments: _col5, 3
+ name: lag
+ window function: GenericUDAFLagEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 8479 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col7 (type: string), _col1 (type: smallint), _col5 (type: double), (UDFToDouble(_col1) - lag_window_0) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 8479 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 12000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 12000 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+s si d _c3
+jessica ellison 262 30.41 NULL
+david young 266 45.12 NULL
+jessica steinbeck 274 2.15 NULL
+david zipper 275 43.45 244.59
+zach nixon 283 15.95 237.88
+holly allen 285 24.37 282.85
+irene garcia 292 33.54 248.55
+ulysses xylophone 292 44.66 276.05
+irene van buren 309 35.81 284.63
+sarah miller 312 6.65 278.46
+victor garcia 312 39.14 267.34000000000003
+ethan ichabod 319 29.4 283.19
+wendy falkner 322 10.02 315.35
+oscar miller 324 25.95 284.86
+david ovid 332 28.34 302.6
+alice zipper 333 3.38 322.98
+yuri nixon 333 8.28 307.05
+ulysses nixon 335 18.48 306.66
+david ovid 336 9.36 332.62
+calvin falkner 337 17.63 328.72
+katie quirinius 349 11.3 330.52
+quinn miller 351 22.46 341.64
+victor xylophone 357 38.58 339.37
+ethan garcia 368 9.2 356.7
+nick steinbeck 395 37.54 372.54
+ulysses ichabod 415 47.61 376.42
+rachel thompson 416 37.99 406.8
+calvin young 418 47.22 380.46
+katie xylophone 425 32.59 377.39
+nick quirinius 429 19.63 391.01
+ethan ellison 453 47.92 405.78
+irene nixon 454 48.03 421.40999999999997
+bob steinbeck 462 47.04 442.37
+luke robinson 462 47.48 414.08
+gabriella steinbeck 467 9.35 418.97
+tom hernandez 467 29.36 419.96
+irene polk 485 14.26 437.52
+mike xylophone 494 36.92 484.65
+calvin allen 499 39.99 469.64
+quinn steinbeck 503 16.62 488.74
+calvin thompson 263 30.87 NULL
+rachel quirinius 263 29.46 NULL
+ulysses garcia 263 31.85 NULL
+mike steinbeck 266 48.57 235.13
+rachel young 275 14.75 245.54
+tom king 278 31.11 246.15
+oscar robinson 283 30.35 234.43
+zach allen 284 1.88 269.25
+bob king 308 27.61 276.89
+ulysses allen 310 22.77 279.65
+fred nixon 317 0.48 315.12
+gabriella robinson 321 0.33 293.39
+bob johnson 325 9.61 302.23
+rachel davidson 335 2.34 334.52
+fred brown 337 5.8 336.67
+wendy ellison 350 20.25 340.39
+zach falkner 391 13.67 388.66
+katie xylophone 410 39.09 404.2
+holly king 413 3.56 392.75
+sarah van buren 417 7.81 403.33
+calvin van buren 430 36.01 390.90999999999997
+katie white 434 33.56 430.44
+oscar quirinius 454 7.03 446.19
+zach young 505 18.19 468.99
+gabriella robinson 506 12.8 472.44
+sarah xylophone 507 16.09 499.97
+rachel thompson 267 46.87 NULL
+gabriella van buren 271 41.04 NULL
+mike steinbeck 284 11.44 NULL
+ethan ovid 293 2.08 246.13
+luke falkner 293 40.67 251.96
+irene nixon 321 24.35 309.56
+mike van buren 327 2.58 324.92
+ulysses robinson 329 26.64 288.33
+quinn laertes 332 10.71 307.65
+tom polk 346 34.03 343.42
+jessica johnson 352 45.71 325.36
+xavier davidson 354 33.9 343.29
+wendy nixon 364 29.42 329.97
+jessica quirinius 375 47.33 329.29
+xavier brown 376 26.17 342.1
+gabriella davidson 383 18.87 353.58
+jessica brown 388 34.09 340.67
+gabriella garcia 391 32.44 364.83
+ethan miller 396 49.07 377.13
+bob garcia 416 7.82 381.90999999999997
+priscilla hernandez 416 29.94 383.56
+holly nixon 419 17.81 369.93
+nick underhill 429 39.54 421.18
+xavier falkner 434 0.88 404.06
+luke robinson 461 44.02 443.19
+bob underhill 465 22.58 425.46
+ulysses king 483 37.98 482.12
+jessica miller 486 26.14 441.98
+bob ovid 493 9.7 470.42
+alice falkner 500 37.85 462.02
+quinn xylophone 267 49.8 NULL
+gabriella thompson 268 17.15 NULL
+calvin xylophone 275 49.32 NULL
+gabriella zipper 279 30.41 229.2
+PREHOOK: query: explain vectorization detail
+select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ Reduce Output Operator
+ key expressions: f (type: float), b (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: f (type: float)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [4, 3]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [4]
+ valueColumns: [7]
+ Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ value expressions: s (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 11
+ includeColumns: [3, 4, 7]
+ dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum]
+ vectorized: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey0 (type: float), VALUE._col5 (type: string)
+ outputColumnNames: _col3, _col4, _col7
+ Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col3: bigint, _col4: float, _col7: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col3 ASC NULLS FIRST
+ partition by: _col4
+ raw input shape:
+ window functions:
+ window function definition
+ alias: lag_window_0
+ arguments: _col7, 3, 'fred'
+ name: lag
+ window function: GenericUDAFLagEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col7 (type: string), lag_window_0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 9085 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 100
+ Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 100
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+s lag_window_0
+yuri thompson fred
+bob ichabod fred
+luke king fred
+luke steinbeck fred
+fred zipper fred
+quinn miller fred
+calvin van buren fred
+holly steinbeck fred
+david davidson fred
+calvin thompson fred
+calvin quirinius fred
+david ovid fred
+holly thompson fred
+nick zipper fred
+victor steinbeck fred
+victor robinson fred
+zach ovid fred
+ulysses zipper fred
+luke falkner fred
+irene thompson fred
+yuri johnson fred
+ulysses falkner fred
+gabriella robinson fred
+alice robinson fred
+priscilla xylophone fred
+david laertes fred
+mike underhill fred
+victor van buren fred
+holly falkner fred
+priscilla falkner fred
+ethan ovid fred
+luke zipper fred
+mike steinbeck fred
+calvin white fred
+alice quirinius fred
+irene miller fred
+wendy polk fred
+nick young fred
+yuri davidson fred
+ethan ellison fred
+zach hernandez fred
+wendy miller fred
+katie underhill fred
+irene zipper fred
+holly allen fred
+quinn brown fred
+calvin ovid fred
+zach robinson fred
+nick miller fred
+mike allen fred
+yuri van buren fred
+priscilla young fred
+zach miller fred
+victor xylophone fred
+sarah falkner fred
+rachel ichabod fred
+alice robinson fred
+calvin ovid fred
+calvin ovid fred
+luke laertes fred
+david hernandez fred
+alice ovid fred
+luke quirinius fred
+oscar white fred
+zach falkner fred
+rachel thompson fred
+priscilla king fred
+xavier polk fred
+wendy ichabod fred
+rachel ovid fred
+wendy allen fred
+luke brown fred
+mike brown fred
+oscar ichabod fred
+xavier garcia fred
+yuri brown fred
+bob xylophone fred
+luke davidson fred
+ethan quirinius fred
+zach davidson fred
+irene miller fred
+wendy king fred
+bob zipper fred
+sarah thompson fred
+bob carson fred
+bob laertes fred
+xavier allen fred
+sarah robinson fred
+david king fred
+oscar davidson fred
+victor hernandez fred
+wendy polk fred
+david ellison fred
+ulysses johnson fred
+jessica ovid fred
+bob king fred
+ulysses garcia fred
+irene falkner fred
+holly robinson fred
+yuri white fred
+PREHOOK: query: explain vectorization detail
+select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_type (type: string)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string), p_type (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumns: [2, 4]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumns: [7]
+ Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: p_retailprice (type: double)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [2, 4, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col5:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: double
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double)
+ outputColumnNames: _col2, _col4, _col7
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2]
+ Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col2: string, _col4: string, _col7: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS FIRST
+ partition by: _col2, _col4
+ raw input shape:
+ window functions:
+ window function definition
+ alias: avg_window_0
+ arguments: _col7
+ name: avg
+ window function: GenericUDAFAverageEvaluatorDouble
+ window frame: RANGE PRECEDING(MAX)~CURRENT
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorDoubleAvg]
+ functionInputExpressions: [col 2]
+ functionNames: [avg]
+ keyInputColumns: [0, 1]
+ native: true
+ nonKeyInputColumns: [2]
+ orderExpressions: [col 0]
+ outputColumns: [3, 0, 1, 2]
+ outputTypes: [double, string, string, double]
+ partitionExpressions: [col 0, col 1]
+ streamingColumns: []
+ Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: string), avg_window_0 (type: double)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 3]
+ Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+p_mfgr avg_window_0
+Manufacturer#2 1800.7
+Manufacturer#4 1375.42
+Manufacturer#4 1620.67
+Manufacturer#4 1206.26
+Manufacturer#5 1788.73
+Manufacturer#1 1632.66
+Manufacturer#2 1690.68
+Manufacturer#2 1698.66
+Manufacturer#2 1701.6
+Manufacturer#3 1337.29
+Manufacturer#4 1844.92
+Manufacturer#4 1290.35
+Manufacturer#5 1018.1
+Manufacturer#5 1789.69
+Manufacturer#1 1753.76
+Manufacturer#1 1602.59
+Manufacturer#1 1173.15
+Manufacturer#1 1173.15
+Manufacturer#1 1414.42
+Manufacturer#2 2031.98
+Manufacturer#3 1922.98
+Manufacturer#3 1410.39
+Manufacturer#3 1190.27
+Manufacturer#5 1464.48
+Manufacturer#5 1611.66
+Manufacturer#3 1671.68
+PREHOOK: query: explain vectorization detail
+select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_type (type: string)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [2, 4]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [2]
+ valueColumns: [7]
+ Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: p_retailprice (type: double)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [2, 4, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: avg UNBOUNDED end frame is not supported for ROWS window type
+ vectorized: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double)
+ outputColumnNames: _col2, _col4, _col7
+ Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col2: string, _col4: string, _col7: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: avg_window_0
+ arguments: _col7
+ name: avg
+ window function: GenericUDAFAverageEvaluatorDouble
+ window frame: ROWS PRECEDING(MAX)~CURRENT
+ Statistics: Num rows: 26 Data size: 12428 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: string), avg_window_0 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+p_mfgr avg_window_0
+Manufacturer#1 1753.76
+Manufacturer#1 1693.21
+Manufacturer#1 1663.0033333333333
+Manufacturer#1 1540.54
+Manufacturer#1 1467.062
+Manufacturer#1 1458.2883333333332
+Manufacturer#2 1800.7
+Manufacturer#2 1745.69
+Manufacturer#2 1841.1200000000001
+Manufacturer#2 1805.505
+Manufacturer#2 1784.7240000000002
+Manufacturer#3 1922.98
+Manufacturer#3 1666.685
+Manufacturer#3 1668.3500000000001
+Manufacturer#3 1548.83
+Manufacturer#3 1506.522
+Manufacturer#4 1844.92
+Manufacturer#4 1610.17
+Manufacturer#4 1613.67
+Manufacturer#4 1511.8175
+Manufacturer#4 1467.5240000000001
+Manufacturer#5 1018.1
+Manufacturer#5 1241.29
+Manufacturer#5 1424.0900000000001
+Manufacturer#5 1515.25
+Manufacturer#5 1534.532
+PREHOOK: query: create table t1 (a1 int, b1 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a1 int, b1 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 (a1 int, b1 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 (a1 int, b1 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: explain vectorization detail
+from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select *
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select *
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+ Stage-4 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-3
+ Stage-5 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ Reduce Output Operator
+ key expressions: ts (type: timestamp), i (type: int)
+ sort order: ++
+ Map-reduce partition columns: ts (type: timestamp)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [8, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [8]
+ valueColumns: [7]
+ Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ value expressions: s (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 11
+ includeColumns: [2, 7, 8]
+ dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:int, VALUE._col6:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: bigint
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: int), VALUE._col6 (type: string), KEY.reducesinkkey0 (type: timestamp)
+ outputColumnNames: _col2, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 2, 0]
+ Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col2: int, _col7: string, _col8: timestamp
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS FIRST
+ partition by: _col8
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col2
+ name: sum
+ window function: GenericUDAFSumLong
+ window frame: RANGE PRECEDING(MAX)~CURRENT
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorLongSum]
+ functionInputExpressions: [col 1]
+ functionNames: [sum]
+ keyInputColumns: [1, 0]
+ native: true
+ nonKeyInputColumns: [2]
+ orderExpressions: [col 1]
+ outputColumns: [3, 1, 2, 0]
+ outputTypes: [bigint, int, string, timestamp]
+ partitionExpressions: [col 0]
+ streamingColumns: []
+ Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: sum_window_0 (type: bigint), _col7 (type: string)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [3, 2]
+ Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [3, 2]
+ Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [3, 2]
+ Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 7066 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t2
+
+ Stage: Stage-3
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t2
+
+ Stage: Stage-5
+ Stats-Aggr Operator
+
+PREHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select *
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+PREHOOK: Output: default@t1
+PREHOOK: Output: default@t2
+POSTHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select *
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+POSTHOOK: Output: default@t1
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: t1.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
+POSTHOOK: Lineage: t2.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ]
+_col0 _col1
+PREHOOK: query: select * from t1 limit 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t1 limit 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+t1.a1 t1.b1
+65542 rachel thompson
+131088 oscar brown
+262258 wendy steinbeck
+PREHOOK: query: select * from t2 limit 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t2 limit 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+t2.a1 t2.b1
+65542 rachel thompson
+131088 oscar brown
+262258 wendy steinbeck
+PREHOOK: query: explain vectorization detail
+select p_mfgr, p_retailprice, p_size,
+round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2)
+from part
+window w1 as (distribute by p_mfgr sort by p_retailprice)
+limit 11
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select p_mfgr, p_retailprice, p_size,
+round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2)
+from part
+window w1 as (distribute by p_mfgr sort by p_retailprice)
+limit 11
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_retailprice (type: double)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [2, 7]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [2]
+ valueColumns: [5]
+ Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: p_size (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [2, 5, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: lead and lag function not supported in argument expression of aggregation function sum
+
<TRUNCATED>