You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/11/15 05:52:30 UTC
[09/12] hive git commit: HIVE-18067 : Remove extraneous golden files
http://git-wip-us.apache.org/repos/asf/hive/blob/afa9ffee/ql/src/test/results/clientpositive/vector_windowing_expressions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_windowing_expressions.q.out b/ql/src/test/results/clientpositive/vector_windowing_expressions.q.out
deleted file mode 100644
index 7a26191..0000000
--- a/ql/src/test/results/clientpositive/vector_windowing_expressions.q.out
+++ /dev/null
@@ -1,2077 +0,0 @@
-PREHOOK: query: drop table over10k
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table over10k
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: create table over10k(
- t tinyint,
- si smallint,
- i int,
- b bigint,
- f float,
- d double,
- bo boolean,
- s string,
- ts timestamp,
- `dec` decimal(4,2),
- bin binary)
- row format delimited
- fields terminated by '|'
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@over10k
-POSTHOOK: query: create table over10k(
- t tinyint,
- si smallint,
- i int,
- b bigint,
- f float,
- d double,
- bo boolean,
- s string,
- ts timestamp,
- `dec` decimal(4,2),
- bin binary)
- row format delimited
- fields terminated by '|'
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@over10k
-PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
-PREHOOK: type: LOAD
-#### A masked pattern was here ####
-PREHOOK: Output: default@over10k
-POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
-POSTHOOK: type: LOAD
-#### A masked pattern was here ####
-POSTHOOK: Output: default@over10k
-PREHOOK: query: explain vectorization detail
-select p_mfgr, p_retailprice, p_size,
-round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
-max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
-from part
-window w1 as (distribute by p_mfgr sort by p_retailprice)
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select p_mfgr, p_retailprice, p_size,
-round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
-max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
-from part
-window w1 as (distribute by p_mfgr sort by p_retailprice)
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: part
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
- projectedColumns: [p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string]
- Reduce Output Operator
- key expressions: p_mfgr (type: string), p_retailprice (type: double)
- sort order: ++
- Map-reduce partition columns: p_mfgr (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- value expressions: p_size (type: int)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 9
- includeColumns: [2, 5, 7]
- dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double)
- outputColumnNames: _col2, _col5, _col7
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col2: string, _col5: int, _col7: double
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col7 ASC NULLS FIRST
- partition by: _col2
- raw input shape:
- window functions:
- window function definition
- alias: sum_window_0
- arguments: _col7
- name: sum
- window function: GenericUDAFSumDouble
- window frame: RANGE PRECEDING(MAX)~CURRENT
- window function definition
- alias: sum_window_1
- arguments: lag(...)
- name: sum
- window function: GenericUDAFSumDouble
- window frame: RANGE PRECEDING(MAX)~CURRENT
- window function definition
- alias: last_value_window_2
- arguments: _col7
- name: last_value
- window function: GenericUDAFLastValueEvaluator
- window frame: RANGE PRECEDING(MAX)~CURRENT
- window function definition
- alias: max_window_3
- arguments: _col7
- name: max
- window function: GenericUDAFMaxEvaluator
- window frame: RANGE PRECEDING(MAX)~CURRENT
- window function definition
- alias: min_window_4
- arguments: _col7
- name: min
- window function: GenericUDAFMinEvaluator
- window frame: RANGE PRECEDING(MAX)~CURRENT
- window function definition
- alias: first_value_window_5
- arguments: _col7
- name: first_value
- window function: GenericUDAFFirstValueEvaluator
- window frame: RANGE PRECEDING(MAX)~CURRENT
- Lead/Lag information: lag(...) (type: double)
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), (round(sum_window_0, 2) = round((sum_window_1 + last_value_window_2), 2)) (type: boolean), ((max_window_3 - min_window_4) = (last_value_window_2 - first_value_window_5)) (type: boolean)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select p_mfgr, p_retailprice, p_size,
-round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
-max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
-from part
-window w1 as (distribute by p_mfgr sort by p_retailprice)
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
-POSTHOOK: query: select p_mfgr, p_retailprice, p_size,
-round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
-max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
-from part
-window w1 as (distribute by p_mfgr sort by p_retailprice)
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part
-#### A masked pattern was here ####
-p_mfgr p_retailprice p_size _c3 _c4
-Manufacturer#1 1173.15 2 true true
-Manufacturer#1 1173.15 2 true true
-Manufacturer#1 1414.42 28 true true
-Manufacturer#1 1602.59 6 true true
-Manufacturer#1 1632.66 42 true true
-Manufacturer#1 1753.76 34 true true
-Manufacturer#2 1690.68 14 true true
-Manufacturer#2 1698.66 25 true true
-Manufacturer#2 1701.6 18 true true
-Manufacturer#2 1800.7 40 true true
-Manufacturer#2 2031.98 2 true true
-Manufacturer#3 1190.27 14 true true
-Manufacturer#3 1337.29 45 true true
-Manufacturer#3 1410.39 19 true true
-Manufacturer#3 1671.68 17 true true
-Manufacturer#3 1922.98 1 true true
-Manufacturer#4 1206.26 27 true true
-Manufacturer#4 1290.35 12 true true
-Manufacturer#4 1375.42 39 true true
-Manufacturer#4 1620.67 10 true true
-Manufacturer#4 1844.92 7 true true
-Manufacturer#5 1018.1 46 true true
-Manufacturer#5 1464.48 23 true true
-Manufacturer#5 1611.66 6 true true
-Manufacturer#5 1788.73 2 true true
-Manufacturer#5 1789.69 31 true true
-PREHOOK: query: explain vectorization detail
-select p_mfgr, p_retailprice, p_size,
-rank() over (distribute by p_mfgr sort by p_retailprice) as r,
-sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
-sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
-from part
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select p_mfgr, p_retailprice, p_size,
-rank() over (distribute by p_mfgr sort by p_retailprice) as r,
-sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
-sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
-from part
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: part
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
- projectedColumns: [p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string]
- Reduce Output Operator
- key expressions: p_mfgr (type: string), p_retailprice (type: double)
- sort order: ++
- Map-reduce partition columns: p_mfgr (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- value expressions: p_size (type: int)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 9
- includeColumns: [2, 5, 7]
- dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double)
- outputColumnNames: _col2, _col5, _col7
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col2: string, _col5: int, _col7: double
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col7 ASC NULLS FIRST
- partition by: _col2
- raw input shape:
- window functions:
- window function definition
- alias: rank_window_0
- arguments: _col7
- name: rank
- window function: GenericUDAFRankEvaluator
- window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
- isPivotResult: true
- window function definition
- alias: sum_window_1
- arguments: _col7
- name: sum
- window function: GenericUDAFSumDouble
- window frame: ROWS PRECEDING(MAX)~CURRENT
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), rank_window_0 (type: int), sum_window_1 (type: double), (sum_window_1 - 5.0) (type: double)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select p_mfgr, p_retailprice, p_size,
-rank() over (distribute by p_mfgr sort by p_retailprice) as r,
-sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
-sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
-from part
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
-POSTHOOK: query: select p_mfgr, p_retailprice, p_size,
-rank() over (distribute by p_mfgr sort by p_retailprice) as r,
-sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
-sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
-from part
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part
-#### A masked pattern was here ####
-p_mfgr p_retailprice p_size r s2 s1
-Manufacturer#1 1173.15 2 1 1173.15 1168.15
-Manufacturer#1 1173.15 2 1 2346.3 2341.3
-Manufacturer#1 1414.42 28 3 3760.7200000000003 3755.7200000000003
-Manufacturer#1 1602.59 6 4 5363.31 5358.31
-Manufacturer#1 1632.66 42 5 6995.97 6990.97
-Manufacturer#1 1753.76 34 6 8749.73 8744.73
-Manufacturer#2 1690.68 14 1 1690.68 1685.68
-Manufacturer#2 1698.66 25 2 3389.34 3384.34
-Manufacturer#2 1701.6 18 3 5090.9400000000005 5085.9400000000005
-Manufacturer#2 1800.7 40 4 6891.64 6886.64
-Manufacturer#2 2031.98 2 5 8923.62 8918.62
-Manufacturer#3 1190.27 14 1 1190.27 1185.27
-Manufacturer#3 1337.29 45 2 2527.56 2522.56
-Manufacturer#3 1410.39 19 3 3937.95 3932.95
-Manufacturer#3 1671.68 17 4 5609.63 5604.63
-Manufacturer#3 1922.98 1 5 7532.610000000001 7527.610000000001
-Manufacturer#4 1206.26 27 1 1206.26 1201.26
-Manufacturer#4 1290.35 12 2 2496.6099999999997 2491.6099999999997
-Manufacturer#4 1375.42 39 3 3872.0299999999997 3867.0299999999997
-Manufacturer#4 1620.67 10 4 5492.7 5487.7
-Manufacturer#4 1844.92 7 5 7337.62 7332.62
-Manufacturer#5 1018.1 46 1 1018.1 1013.1
-Manufacturer#5 1464.48 23 2 2482.58 2477.58
-Manufacturer#5 1611.66 6 3 4094.24 4089.24
-Manufacturer#5 1788.73 2 4 5882.969999999999 5877.969999999999
-Manufacturer#5 1789.69 31 5 7672.66 7667.66
-PREHOOK: query: explain vectorization detail
-select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: over10k
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- projectedColumns: [t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary]
- Reduce Output Operator
- key expressions: t (type: tinyint), bo (type: boolean), s (type: string), si (type: smallint), f (type: float)
- sort order: ++++-
- Map-reduce partition columns: t (type: tinyint)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- includeColumns: [0, 1, 4, 6, 7]
- dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: string)
- outputColumnNames: _col0, _col1, _col4, _col6, _col7
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col0: tinyint, _col1: smallint, _col4: float, _col6: boolean, _col7: string
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col6 ASC NULLS FIRST, _col7 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 DESC NULLS LAST
- partition by: _col0
- raw input shape:
- window functions:
- window function definition
- alias: lead_window_0
- arguments: _col4, 3
- name: lead
- window function: GenericUDAFLeadEvaluator
- window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
- isPivotResult: true
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col7 (type: string), _col1 (type: smallint), _col4 (type: float), (UDFToFloat(_col1) - lead_window_0) (type: float)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 100
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 100
- Processor Tree:
- ListSink
-
-PREHOOK: query: select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@over10k
-#### A masked pattern was here ####
-POSTHOOK: query: select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@over10k
-#### A masked pattern was here ####
-s si f _c3
-alice allen 400 76.31 337.23
-alice davidson 384 71.97 357.79
-alice king 455 2.48 395.93
-alice king 458 62.77 384.16998
-alice xylophone 485 26.21 464.05
-bob falkner 260 59.07 242.4
-bob ichabod 454 73.83 381.7
-bob polk 264 20.95 257.17
-bob underhill 454 17.6 424.94
-bob underhill 465 72.3 453.17
-bob van buren 433 6.83 398.4
-calvin ichabod 431 29.06 334.22
-david garcia 485 11.83 421.51
-ethan steinbeck 298 34.6 288.14
-fred ellison 376 96.78 330.76
-holly steinbeck 384 63.49 293.7
-holly underhill 318 9.86 269.91
-irene ellison 458 45.24 365.29
-irene underhill 307 90.3 244.19
-jessica johnson 494 48.09 490.18
-jessica king 459 92.71 452.2
-jessica white 284 62.81 209.08
-luke garcia 311 3.82 267.27
-luke young 451 6.8 429.0
-mike king 275 74.92 211.81
-oscar garcia 362 43.73 340.66
-priscilla laertes 316 22.0 296.06
-priscilla quirinius 423 63.19 362.72
-priscilla zipper 485 21.34 400.61
-quinn ellison 266 19.94 209.95
-quinn polk 507 60.28 447.66
-sarah robinson 320 84.39 309.74
-tom polk 346 56.05 320.33
-ulysses ellison 381 59.34 358.66
-ulysses quirinius 303 10.26 259.6
-ulysses robinson 313 25.67 269.31
-ulysses steinbeck 333 22.34 270.61
-victor allen 337 43.4 311.5
-victor hernandez 447 43.69 375.22
-victor xylophone 438 62.39 424.33
-wendy quirinius 279 25.5 250.25
-wendy robinson 275 71.78 262.88
-wendy xylophone 314 13.67 295.73
-xavier garcia 493 28.75 474.56
-zach thompson 386 12.12 377.63
-zach young 286 18.27 263.65
-alice falkner 280 18.44 227.7
-bob ellison 339 8.37 300.95
-bob johnson 374 22.35 326.49
-calvin white 280 52.3 198.32
-david carson 270 38.05 255.77
-david falkner 469 47.51 388.35
-david hernandez 408 81.68 339.27
-ethan underhill 339 14.23 256.26
-gabriella brown 498 80.65 413.25
-holly nixon 505 68.73 440.71
-holly polk 268 82.74 182.04001
-holly thompson 387 84.75 298.22
-irene young 458 64.29 401.8
-jessica miller 299 85.96 243.41
-katie ichabod 469 88.78 385.61
-luke ichabod 289 56.2 286.74
-luke king 337 55.59 274.88
-mike allen 465 83.39 383.03
-mike polk 500 2.26 427.74
-mike white 454 62.12 430.78
-mike xylophone 448 81.97 447.17
-nick nixon 335 72.26 240.78
-nick robinson 350 23.22 294.59
-oscar davidson 432 0.83 420.93
-oscar johnson 315 94.22 233.05
-oscar johnson 469 55.41 468.44
-oscar miller 324 11.07 265.19
-rachel davidson 507 81.95 468.78
-rachel thompson 344 0.56 246.12
-sarah miller 386 58.81 304.36
-sarah xylophone 275 38.22 177.48999
-sarah zipper 376 97.88 294.61
-tom hernandez 467 81.64 459.9
-tom hernandez 477 97.51 415.19
-tom steinbeck 414 81.39 361.87
-ulysses carson 343 7.1 314.22
-victor robinson 415 61.81 349.5
-victor thompson 344 52.13 NULL
-xavier ovid 280 28.78 NULL
-yuri xylophone 430 65.5 NULL
-alice underhill 389 26.68 368.06
-alice underhill 446 6.49 444.21
-bob ovid 331 67.12 236.43
-bob van buren 406 20.94 383.32
-david falkner 406 1.79 374.34
-david miller 450 94.57 380.13
-ethan allen 380 22.68 375.6
-ethan king 395 31.66 361.51
-ethan nixon 475 69.87 431.39
-ethan polk 283 4.4 243.82
-fred allen 331 33.49 281.68
-fred king 511 43.61 457.22
-fred polk 261 39.18 248.73
-fred young 303 49.32 221.51001
-PREHOOK: query: explain vectorization detail
-select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: over10k
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- projectedColumns: [t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary]
- Reduce Output Operator
- key expressions: si (type: smallint), i (type: int), s (type: string)
- sort order: +++
- Map-reduce partition columns: si (type: smallint)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- includeColumns: [1, 2, 7]
- dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string)
- outputColumnNames: _col1, _col2, _col7
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col1: smallint, _col2: int, _col7: string
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col2 ASC NULLS FIRST, _col7 ASC NULLS FIRST
- partition by: _col1
- raw input shape:
- window functions:
- window function definition
- alias: lead_window_0
- arguments: _col2, 3, 0
- name: lead
- window function: GenericUDAFLeadEvaluator
- window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
- isPivotResult: true
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col7 (type: string), _col2 (type: int), (_col2 - lead_window_0) (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 100
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 100
- Processor Tree:
- ListSink
-
-PREHOOK: query: select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@over10k
-#### A masked pattern was here ####
-POSTHOOK: query: select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@over10k
-#### A masked pattern was here ####
-s i _c2
-wendy garcia 65540 -18
-ethan thompson 65543 -20
-zach nixon 65549 -31
-alice robinson 65558 -28
-wendy nixon 65563 -33
-victor robinson 65580 -19
-ethan falkner 65586 -18
-victor davidson 65596 -17
-xavier quirinius 65599 -14
-fred quirinius 65604 -11
-nick zipper 65613 -3
-xavier van buren 65613 -7
-victor johnson 65615 -12
-alice ovid 65616 -24
-xavier ovid 65620 -23
-ulysses white 65627 -24
-sarah white 65640 -13
-calvin young 65643 -25
-victor thompson 65651 -42
-calvin johnson 65653 -53
-irene polk 65668 -45
-zach underhill 65693 -38
-quinn hernandez 65706 -27
-rachel ovid 65713 -24
-gabriella falkner 65731 -7
-zach white 65733 -8
-fred hernandez 65737 -7
-rachel ellison 65738 -6
-oscar steinbeck 65741 -6
-alice ellison 65744 -8
-tom allen 65744 -19
-quinn quirinius 65747 -31
-victor hernandez 65752 -26
-holly xylophone 65763 -26
-david davidson 65778 65778
-ulysses young 65778 65778
-sarah brown 65789 65789
-xavier brown 65541 -16
-zach hernandez 65542 -18
-katie ichabod 65547 -19
-oscar young 65557 -15
-holly white 65560 -14
-priscilla laertes 65566 -9
-ethan king 65572 -6
-zach hernandez 65574 -10
-oscar thompson 65575 -13
-victor xylophone 65578 -16
-gabriella ellison 65584 -26
-nick quirinius 65588 -22
-holly robinson 65594 -18
-alice xylophone 65610 -16
-yuri brown 65610 -21
-sarah hernandez 65612 -26
-katie garcia 65626 -28
-jessica laertes 65631 -23
-ethan underhill 65638 -17
-irene young 65654 -37
-priscilla thompson 65654 -40
-luke quirinius 65655 -44
-david brown 65691 -20
-luke falkner 65694 -18
-priscilla miller 65699 -20
-rachel robinson 65711 -9
-ethan polk 65712 -10
-wendy brown 65719 -13
-mike underhill 65720 -18
-zach underhill 65722 -26
-nick zipper 65732 -20
-fred brown 65738 -18
-ulysses young 65748 -23
-nick davidson 65752 -19
-fred zipper 65756 -15
-yuri nixon 65771 -10
-zach hernandez 65771 -19
-zach zipper 65771 65771
-alice underhill 65781 65781
-oscar laertes 65790 65790
-sarah zipper 65546 -19
-bob falkner 65551 -17
-luke ovid 65551 -17
-katie allen 65565 -4
-nick falkner 65568 -5
-zach steinbeck 65568 -11
-oscar van buren 65569 -13
-gabriella young 65573 -11
-jessica ichabod 65579 -24
-david garcia 65582 -24
-nick xylophone 65584 -27
-calvin johnson 65603 -14
-xavier zipper 65606 -50
-alice nixon 65611 -58
-jessica laertes 65617 -62
-fred king 65656 -61
-priscilla underhill 65669 -48
-priscilla zipper 65679 -45
-nick king 65717 -11
-sarah polk 65717 -17
-irene quirinius 65724 -28
-tom laertes 65728 -25
-yuri johnson 65734 -27
-PREHOOK: query: explain vectorization detail
-select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: over10k
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- projectedColumns: [t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary]
- Reduce Output Operator
- key expressions: b (type: bigint), si (type: smallint), s (type: string), d (type: double)
- sort order: ++++
- Map-reduce partition columns: b (type: bigint)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- includeColumns: [1, 3, 5, 7]
- dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey3 (type: double), KEY.reducesinkkey2 (type: string)
- outputColumnNames: _col1, _col3, _col5, _col7
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col1: smallint, _col3: bigint, _col5: double, _col7: string
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col1 ASC NULLS FIRST, _col7 ASC NULLS FIRST, _col5 ASC NULLS FIRST
- partition by: _col3
- raw input shape:
- window functions:
- window function definition
- alias: lag_window_0
- arguments: _col5, 3
- name: lag
- window function: GenericUDAFLagEvaluator
- window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
- isPivotResult: true
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col7 (type: string), _col1 (type: smallint), _col5 (type: double), (UDFToDouble(_col1) - lag_window_0) (type: double)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 100
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 100
- Processor Tree:
- ListSink
-
-PREHOOK: query: select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@over10k
-#### A masked pattern was here ####
-POSTHOOK: query: select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@over10k
-#### A masked pattern was here ####
-s si d _c3
-jessica ellison 262 30.41 NULL
-david young 266 45.12 NULL
-jessica steinbeck 274 2.15 NULL
-david zipper 275 43.45 244.59
-zach nixon 283 15.95 237.88
-holly allen 285 24.37 282.85
-irene garcia 292 33.54 248.55
-ulysses xylophone 292 44.66 276.05
-irene van buren 309 35.81 284.63
-sarah miller 312 6.65 278.46
-victor garcia 312 39.14 267.34000000000003
-ethan ichabod 319 29.4 283.19
-wendy falkner 322 10.02 315.35
-oscar miller 324 25.95 284.86
-david ovid 332 28.34 302.6
-alice zipper 333 3.38 322.98
-yuri nixon 333 8.28 307.05
-ulysses nixon 335 18.48 306.66
-david ovid 336 9.36 332.62
-calvin falkner 337 17.63 328.72
-katie quirinius 349 11.3 330.52
-quinn miller 351 22.46 341.64
-victor xylophone 357 38.58 339.37
-ethan garcia 368 9.2 356.7
-nick steinbeck 395 37.54 372.54
-ulysses ichabod 415 47.61 376.42
-rachel thompson 416 37.99 406.8
-calvin young 418 47.22 380.46
-katie xylophone 425 32.59 377.39
-nick quirinius 429 19.63 391.01
-ethan ellison 453 47.92 405.78
-irene nixon 454 48.03 421.40999999999997
-bob steinbeck 462 47.04 442.37
-luke robinson 462 47.48 414.08
-gabriella steinbeck 467 9.35 418.97
-tom hernandez 467 29.36 419.96
-irene polk 485 14.26 437.52
-mike xylophone 494 36.92 484.65
-calvin allen 499 39.99 469.64
-quinn steinbeck 503 16.62 488.74
-calvin thompson 263 30.87 NULL
-rachel quirinius 263 29.46 NULL
-ulysses garcia 263 31.85 NULL
-mike steinbeck 266 48.57 235.13
-rachel young 275 14.75 245.54
-tom king 278 31.11 246.15
-oscar robinson 283 30.35 234.43
-zach allen 284 1.88 269.25
-bob king 308 27.61 276.89
-ulysses allen 310 22.77 279.65
-fred nixon 317 0.48 315.12
-gabriella robinson 321 0.33 293.39
-bob johnson 325 9.61 302.23
-rachel davidson 335 2.34 334.52
-fred brown 337 5.8 336.67
-wendy ellison 350 20.25 340.39
-zach falkner 391 13.67 388.66
-katie xylophone 410 39.09 404.2
-holly king 413 3.56 392.75
-sarah van buren 417 7.81 403.33
-calvin van buren 430 36.01 390.90999999999997
-katie white 434 33.56 430.44
-oscar quirinius 454 7.03 446.19
-zach young 505 18.19 468.99
-gabriella robinson 506 12.8 472.44
-sarah xylophone 507 16.09 499.97
-rachel thompson 267 46.87 NULL
-gabriella van buren 271 41.04 NULL
-mike steinbeck 284 11.44 NULL
-ethan ovid 293 2.08 246.13
-luke falkner 293 40.67 251.96
-irene nixon 321 24.35 309.56
-mike van buren 327 2.58 324.92
-ulysses robinson 329 26.64 288.33
-quinn laertes 332 10.71 307.65
-tom polk 346 34.03 343.42
-jessica johnson 352 45.71 325.36
-xavier davidson 354 33.9 343.29
-wendy nixon 364 29.42 329.97
-jessica quirinius 375 47.33 329.29
-xavier brown 376 26.17 342.1
-gabriella davidson 383 18.87 353.58
-jessica brown 388 34.09 340.67
-gabriella garcia 391 32.44 364.83
-ethan miller 396 49.07 377.13
-bob garcia 416 7.82 381.90999999999997
-priscilla hernandez 416 29.94 383.56
-holly nixon 419 17.81 369.93
-nick underhill 429 39.54 421.18
-xavier falkner 434 0.88 404.06
-luke robinson 461 44.02 443.19
-bob underhill 465 22.58 425.46
-ulysses king 483 37.98 482.12
-jessica miller 486 26.14 441.98
-bob ovid 493 9.7 470.42
-alice falkner 500 37.85 462.02
-quinn xylophone 267 49.8 NULL
-gabriella thompson 268 17.15 NULL
-calvin xylophone 275 49.32 NULL
-gabriella zipper 279 30.41 229.2
-PREHOOK: query: explain vectorization detail
-select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: over10k
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- projectedColumns: [t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary]
- Reduce Output Operator
- key expressions: f (type: float), b (type: bigint)
- sort order: ++
- Map-reduce partition columns: f (type: float)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- value expressions: s (type: string)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- includeColumns: [3, 4, 7]
- dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey0 (type: float), VALUE._col5 (type: string)
- outputColumnNames: _col3, _col4, _col7
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col3: bigint, _col4: float, _col7: string
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col3 ASC NULLS FIRST
- partition by: _col4
- raw input shape:
- window functions:
- window function definition
- alias: lag_window_0
- arguments: _col7, 3, 'fred'
- name: lag
- window function: GenericUDAFLagEvaluator
- window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
- isPivotResult: true
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col7 (type: string), lag_window_0 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 100
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 100
- Processor Tree:
- ListSink
-
-PREHOOK: query: select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@over10k
-#### A masked pattern was here ####
-POSTHOOK: query: select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@over10k
-#### A masked pattern was here ####
-s lag_window_0
-yuri thompson fred
-bob ichabod fred
-luke king fred
-luke steinbeck fred
-fred zipper fred
-quinn miller fred
-calvin van buren fred
-holly steinbeck fred
-david davidson fred
-calvin thompson fred
-calvin quirinius fred
-david ovid fred
-holly thompson fred
-nick zipper fred
-victor steinbeck fred
-victor robinson fred
-zach ovid fred
-ulysses zipper fred
-luke falkner fred
-irene thompson fred
-yuri johnson fred
-ulysses falkner fred
-gabriella robinson fred
-alice robinson fred
-priscilla xylophone fred
-david laertes fred
-mike underhill fred
-victor van buren fred
-holly falkner fred
-priscilla falkner fred
-ethan ovid fred
-luke zipper fred
-mike steinbeck fred
-calvin white fred
-alice quirinius fred
-irene miller fred
-wendy polk fred
-nick young fred
-yuri davidson fred
-ethan ellison fred
-zach hernandez fred
-wendy miller fred
-katie underhill fred
-irene zipper fred
-holly allen fred
-quinn brown fred
-calvin ovid fred
-zach robinson fred
-nick miller fred
-mike allen fred
-yuri van buren fred
-priscilla young fred
-zach miller fred
-victor xylophone fred
-sarah falkner fred
-rachel ichabod fred
-alice robinson fred
-calvin ovid fred
-calvin ovid fred
-luke laertes fred
-david hernandez fred
-alice ovid fred
-luke quirinius fred
-oscar white fred
-zach falkner fred
-rachel thompson fred
-priscilla king fred
-xavier polk fred
-wendy ichabod fred
-rachel ovid fred
-wendy allen fred
-luke brown fred
-mike brown fred
-oscar ichabod fred
-xavier garcia fred
-yuri brown fred
-bob xylophone fred
-luke davidson fred
-ethan quirinius fred
-zach davidson fred
-irene miller fred
-wendy king fred
-bob zipper fred
-sarah thompson fred
-bob carson fred
-bob laertes fred
-xavier allen fred
-sarah robinson fred
-david king fred
-oscar davidson fred
-victor hernandez fred
-wendy polk fred
-david ellison fred
-ulysses johnson fred
-jessica ovid fred
-bob king fred
-ulysses garcia fred
-irene falkner fred
-holly robinson fred
-yuri white fred
-PREHOOK: query: explain vectorization detail
-select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: part
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
- projectedColumns: [p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string]
- Reduce Output Operator
- key expressions: p_mfgr (type: string), p_type (type: string)
- sort order: ++
- Map-reduce partition columns: p_mfgr (type: string), p_type (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- value expressions: p_retailprice (type: double)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 9
- includeColumns: [2, 4, 7]
- dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double)
- outputColumnNames: _col2, _col4, _col7
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col2: string, _col4: string, _col7: double
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col2 ASC NULLS FIRST
- partition by: _col2, _col4
- raw input shape:
- window functions:
- window function definition
- alias: avg_window_0
- arguments: _col7
- name: avg
- window function: GenericUDAFAverageEvaluatorDouble
- window frame: RANGE PRECEDING(MAX)~CURRENT
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col2 (type: string), avg_window_0 (type: double)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
-POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part
-#### A masked pattern was here ####
-p_mfgr avg_window_0
-Manufacturer#1 1753.76
-Manufacturer#1 1632.66
-Manufacturer#1 1602.59
-Manufacturer#1 1173.15
-Manufacturer#1 1173.15
-Manufacturer#1 1414.42
-Manufacturer#2 1800.7
-Manufacturer#2 1690.68
-Manufacturer#2 2031.98
-Manufacturer#2 1698.66
-Manufacturer#2 1701.6
-Manufacturer#3 1922.98
-Manufacturer#3 1410.39
-Manufacturer#3 1671.68
-Manufacturer#3 1190.27
-Manufacturer#3 1337.29
-Manufacturer#4 1844.92
-Manufacturer#4 1375.42
-Manufacturer#4 1620.67
-Manufacturer#4 1206.26
-Manufacturer#4 1290.35
-Manufacturer#5 1018.1
-Manufacturer#5 1464.48
-Manufacturer#5 1789.69
-Manufacturer#5 1788.73
-Manufacturer#5 1611.66
-PREHOOK: query: explain vectorization detail
-select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: part
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
- projectedColumns: [p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string]
- Reduce Output Operator
- key expressions: p_mfgr (type: string), p_type (type: string)
- sort order: ++
- Map-reduce partition columns: p_mfgr (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- value expressions: p_retailprice (type: double)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 9
- includeColumns: [2, 4, 7]
- dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col5 (type: double)
- outputColumnNames: _col2, _col4, _col7
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col2: string, _col4: string, _col7: double
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST
- partition by: _col2
- raw input shape:
- window functions:
- window function definition
- alias: avg_window_0
- arguments: _col7
- name: avg
- window function: GenericUDAFAverageEvaluatorDouble
- window frame: ROWS PRECEDING(MAX)~CURRENT
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col2 (type: string), avg_window_0 (type: double)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
-POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part
-#### A masked pattern was here ####
-p_mfgr avg_window_0
-Manufacturer#1 1753.76
-Manufacturer#1 1693.21
-Manufacturer#1 1663.0033333333333
-Manufacturer#1 1540.54
-Manufacturer#1 1467.062
-Manufacturer#1 1458.2883333333332
-Manufacturer#2 1800.7
-Manufacturer#2 1745.69
-Manufacturer#2 1841.1200000000001
-Manufacturer#2 1805.505
-Manufacturer#2 1784.7240000000002
-Manufacturer#3 1922.98
-Manufacturer#3 1666.685
-Manufacturer#3 1668.3500000000001
-Manufacturer#3 1548.83
-Manufacturer#3 1506.522
-Manufacturer#4 1844.92
-Manufacturer#4 1610.17
-Manufacturer#4 1613.67
-Manufacturer#4 1511.8175
-Manufacturer#4 1467.5240000000001
-Manufacturer#5 1018.1
-Manufacturer#5 1241.29
-Manufacturer#5 1424.0900000000001
-Manufacturer#5 1515.25
-Manufacturer#5 1534.532
-PREHOOK: query: create table t1 (a1 int, b1 string)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@t1
-POSTHOOK: query: create table t1 (a1 int, b1 string)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@t1
-PREHOOK: query: create table t2 (a1 int, b1 string)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@t2
-POSTHOOK: query: create table t2 (a1 int, b1 string)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@t2
-PREHOOK: query: explain vectorization detail
-from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select *
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select *
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-0 depends on stages: Stage-2
- Stage-3 depends on stages: Stage-0
- Stage-1 depends on stages: Stage-2
- Stage-4 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: over10k
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- projectedColumns: [t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary]
- Reduce Output Operator
- key expressions: ts (type: timestamp), i (type: int)
- sort order: ++
- Map-reduce partition columns: ts (type: timestamp)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- value expressions: s (type: string)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- includeColumns: [2, 7, 8]
- dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2)/DECIMAL_64, bin:binary
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey1 (type: int), VALUE._col6 (type: string), KEY.reducesinkkey0 (type: timestamp)
- outputColumnNames: _col2, _col7, _col8
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col2: int, _col7: string, _col8: timestamp
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col2 ASC NULLS FIRST
- partition by: _col8
- raw input shape:
- window functions:
- window function definition
- alias: sum_window_0
- arguments: _col2
- name: sum
- window function: GenericUDAFSumLong
- window frame: RANGE PRECEDING(MAX)~CURRENT
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: sum_window_0 (type: bigint), _col7 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.t1
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.t2
-
- Stage: Stage-0
- Move Operator
- tables:
- replace: true
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.t1
-
- Stage: Stage-3
- Stats Work
- Basic Stats Work:
-
- Stage: Stage-1
- Move Operator
- tables:
- replace: true
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.t2
-
- Stage: Stage-4
- Stats Work
- Basic Stats Work:
-
-PREHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select *
-PREHOOK: type: QUERY
-PREHOOK: Input: default@over10k
-PREHOOK: Output: default@t1
-PREHOOK: Output: default@t2
-POSTHOOK: query: from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select *
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@over10k
-POSTHOOK: Output: default@t1
-POSTHOOK: Output: default@t2
-POSTHOOK: Lineage: t1.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
-POSTHOOK: Lineage: t1.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ]
-POSTHOOK: Lineage: t2.a1 SCRIPT [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), (over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), (over10k)over10k.FieldSchema(name:i, type:int, comment:null), (over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), (over10k)over10k.FieldSchema(name:f, type:float, comment:null), (over10k)over10k.FieldSchema(name:d, type:double, comment:null), (over10k)over10k.FieldSchema(name:bo, type:boolean, comment:null), (over10k)over10k.FieldSchema(name:s, type:string, comment:null), (over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), (over10k)over10k.FieldSchema(name:dec, type:decimal(4,2), comment:null), (over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
-POSTHOOK: Lineage: t2.b1 SIMPLE [(over10k)over10k.FieldSchema(name:s, type:string, comment:null), ]
-_col0 _col1
-PREHOOK: query: select * from t1 limit 3
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: select * from t1 limit 3
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-t1.a1 t1.b1
-65542 rachel thompson
-131088 oscar brown
-262258 wendy steinbeck
-PREHOOK: query: select * from t2 limit 3
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t2
-#### A masked pattern was here ####
-POSTHOOK: query: select * from t2 limit 3
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t2
-#### A masked pattern was here ####
-t2.a1 t2.b1
-65542 rachel thompson
-131088 oscar brown
-262258 wendy steinbeck
-PREHOOK: query: explain vectorization detail
-select p_mfgr, p_retailprice, p_size,
-round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2)
-from part
-window w1 as (distribute by p_mfgr sort by p_retailprice)
-limit 11
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select p_mfgr, p_retailprice, p_size,
-round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2)
-from part
-window w1 as (distribute by p_mfgr sort by p_retailprice)
-limit 11
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: part
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
- projectedColumns: [p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string]
- Reduce Output Operator
- key expressions: p_mfgr (type: string), p_retailprice (type: double)
- sort order: ++
- Map-reduce partition columns: p_mfgr (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- value expressions: p_size (type: int)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 9
- includeColumns: [2, 5, 7]
- dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int), KEY.reducesinkkey1 (type: double)
- outputColumnNames: _col2, _col5, _col7
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col2: string, _col5: int, _col7: double
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col7 ASC NULLS FIRST
- partition by: _col2
- raw input shape:
- window functions:
- window function definition
- alias: sum_window_0
- arguments: _col7
- name: sum
- window function: GenericUDAFSumDouble
- window frame: RANGE PRECEDING(MAX)~CURRENT
- window function definition
- alias: sum_window_1
- arguments: lag(...)
- name: sum
- window function: GenericUDAFSumDouble
- window frame: RANGE PRECEDING(MAX)~CURRENT
- window function definition
- alias: last_value_window_2
- arguments: _col7
- name: last_value
- window function: GenericUDAFLastValueEvaluator
- window frame: RANGE PRECEDING(MAX)~CURRENT
- Lead/Lag information: lag(...) (type: double)
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col2 (type: string), _col7 (type: double), _col5 (type: int), ((round(sum_window_0, 2) + 50.0) = round((sum_window_1 + last_value_window_2), 2)) (type: boolean)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 11
- Statistics: Num rows: 11 Data size: 1331 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 11 Data size: 1331 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: 11
- Processor Tree:
- ListSink
-
-PREHOOK: query: select p_mfgr, p_retailprice, p_size,
-round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2)
-from part
-window w1 as (distribute by p_mfgr sort by p_retailprice)
-limit 11
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
-POSTHOOK: query: select p_mfgr, p_retailprice, p_size,
-round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2)
-from part
-window w1 as (distribute by p_mfgr sort by p_retailprice)
-limit 11
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part
-#### A masked pattern was here ####
-p_mfgr p_retailprice p_size _c3
-Manufacturer#1 1173.15 2 true
-Manufacturer#1 1173.15 2 true
-Manufacturer#1 1414.42 28 true
-Manufacturer#1 1602.59 6 true
-Manufacturer#1 1632.66 42 true
-Manufacturer#1 1753.76 34 true
-Manufacturer#2 1690.68 14 true
-Manufacturer#2 1698.66 25 true
-Manufacturer#2 1701.6 18 true
-Manufacturer#2 1800.7 40 true
-Manufacturer#2 2031.98 2 true
-PREHOOK: query: select p_mfgr, p_retailprice, p_size,
-round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
-max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
-from part
-window w1 as (distribute by p_mfgr sort by p_retailprice)
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
-POSTHOOK: query: select p_mfgr, p_retailprice, p_size,
-round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2),
-max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1
-from part
-window w1 as (distribute by p_mfgr sort by p_retailprice)
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part
-#### A masked pattern was here ####
-p_mfgr p_retailprice p_size _c3 _c4
-Manufacturer#1 1173.15 2 true true
-Manufacturer#1 1173.15 2 true true
-Manufacturer#1 1414.42 28 true true
-Manufacturer#1 1602.59 6 true true
-Manufacturer#1 1632.66 42 true true
-Manufacturer#1 1753.76 34 true true
-Manufacturer#2 1690.68 14 true true
-Manufacturer#2 1698.66 25 true true
-Manufacturer#2 1701.6 18 true true
-Manufacturer#2 1800.7 40 true true
-Manufacturer#2 2031.98 2 true true
-Manufacturer#3 1190.27 14 true true
-Manufacturer#3 1337.29 45 true true
-Manufacturer#3 1410.39 19 true true
-Manufacturer#3 1671.68 17 true true
-Manufacturer#3 1922.98 1 true true
-Manufacturer#4 1206.26 27 true true
-Manufacturer#4 1290.35 12 true true
-Manufacturer#4 1375.42 39 true true
-Manufacturer#4 1620.67 10 true true
-Manufacturer#4 1844.92 7 true true
-Manufacturer#5 1018.1 46 true true
-Manufacturer#5 1464.48 23 true true
-Manufacturer#5 1611.66 6 true true
-Manufacturer#5 1788.73 2 true true
-Manufacturer#5 1789.69 31 true true
-PREHOOK: query: select p_mfgr, p_retailprice, p_size,
-rank() over (distribute by p_mfgr sort by p_retailprice) as r,
-sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
-sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
-from part
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
-POSTHOOK: query: select p_mfgr, p_retailprice, p_size,
-rank() over (distribute by p_mfgr sort by p_retailprice) as r,
-sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2,
-sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1
-from part
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part
-#### A masked pattern was here ####
-p_mfgr p_retailprice p_size r s2 s1
-Manufacturer#1 1173.15 2 1 1173.15 1168.15
-Manufacturer#1 1173.15 2 1 2346.3 2341.3
-Manufacturer#1 1414.42 28 3 3760.7200000000003 3755.7200000000003
-Manufacturer#1 1602.59 6 4 5363.31 5358.31
-Manufacturer#1 1632.66 42 5 6995.97 6990.97
-Manufacturer#1 1753.76 34 6 8749.73 8744.73
-Manufacturer#2 1690.68 14 1 1690.68 1685.68
-Manufacturer#2 1698.66 25 2 3389.34 3384.34
-Manufacturer#2 1701.6 18 3 5090.9400000000005 5085.9400000000005
-Manufacturer#2 1800.7 40 4 6891.64 6886.64
-Manufacturer#2 2031.98 2 5 8923.62 8918.62
-Manufacturer#3 1190.27 14 1 1190.27 1185.27
-Manufacturer#3 1337.29 45 2 2527.56 2522.56
-Manufacturer#3 1410.39 19 3 3937.95 3932.95
-Manufacturer#3 1671.68 17 4 5609.63 5604.63
-Manufacturer#3 1922.98 1 5 7532.610000000001 7527.610000000001
-Manufacturer#4 1206.26 27 1 1206.26 1201.26
-Manufacturer#4 1290.35 12 2 2496.6099999999997 2491.6099999999997
-Manufacturer#4 1375.42 39 3 3872.0299999999997 3867.0299999999997
-Manufacturer#4 1620.67 10 4 5492.7 5487.7
-Manufacturer#4 1844.92 7 5 7337.62 7332.62
-Manufacturer#5 1018.1 46 1 1018.1 1013.1
-Manufacturer#5 1464.48 23 2 2482.58 2477.58
-Manufacturer#5 1611.66 6 3 4094.24 4089.24
-Manufacturer#5 1788.73 2 4 5882.969999999999 5877.969999999999
-Manufacturer#5 1789.69 31 5 7672.66 7667.66
-PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
-POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part
-#### A masked pattern was here ####
-p_mfgr avg_window_0
-Manufacturer#1 1753.76
-Manufacturer#1 1632.66
-Manufacturer#1 1602.59
-Manufacturer#1 1173.15
-Manufacturer#1 1173.15
-Manufacturer#1 1414.42
-Manufacturer#2 1800.7
-Manufacturer#2 1690.68
-Manufacturer#2 2031.98
-Manufacturer#2 1698.66
-Manufacturer#2 1701.6
-Manufacturer#3 1922.98
-Manufacturer#3 1410.39
-Manufacturer#3 1671.68
-Manufacturer#3 1190.27
-Manufacturer#3 1337.29
-Manufacturer#4 1844.92
-Manufacturer#4 1375.42
-Manufacturer#4 1620.67
-Manufacturer#4 1206.26
-Manufacturer#4 1290.35
-Manufacturer#5 1018.1
-Manufacturer#5 1464.48
-Manufacturer#5 1789.69
-Manufacturer#5 1788.73
-Manufacturer#5 1611.66
-PREHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
-PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
-POSTHOOK: query: select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@part
-#### A masked pattern was here ####
-p_mfgr avg_window_0
-Manufacturer#1 1753.76
-Manufacturer#1 1693.21
-Manufacturer#1 1663.0033333333333
-Manufacturer#1 1540.54
-Manufacturer#1 1467.062
-Manufacturer#1 1458.2883333333332
-Manufacturer#2 1800.7
-Manufacturer#2 1745.69
-Manufacturer#2 1841.1200000000001
-Manufacturer#2 1805.505
-Manufacturer#2 1784.7240000000002
-Manufacturer#3 1922.98
-Manufacturer#3 1666.685
-Manufacturer#3 1668.3500000000001
-Manufacturer#3 1548.83
-Manufacturer#3 1506.522
-Manufacturer#4 18
<TRUNCATED>