You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/07/20 10:16:52 UTC
[26/36] hive git commit: HIVE-16369: Vectorization: Support PTF (Part
1: No Custom Window Framing -- Default Only) (Matt McCline,
reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
new file mode 100644
index 0000000..e3d7897
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
@@ -0,0 +1,9768 @@
+PREHOOK: query: explain vectorization detail
+select p_mfgr, p_name, p_size,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1
+from part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select p_mfgr, p_name, p_size,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1
+from part
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_name (type: string)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [2, 1]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [2]
+ valueColumns: [5, 7]
+ Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: p_size (type: int), p_retailprice (type: double)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type
+ vectorized: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
+ outputColumnNames: _col1, _col2, _col5, _col7
+ Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: string, _col2: string, _col5: int, _col7: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ window function definition
+ alias: dense_rank_window_1
+ arguments: _col1
+ name: dense_rank
+ window function: GenericUDAFDenseRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ window function definition
+ alias: sum_window_2
+ arguments: _col7
+ name: sum
+ window function: GenericUDAFSumDouble
+ window frame: ROWS PRECEDING(MAX)~CURRENT
+ Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_mfgr, p_name, p_size,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1
+from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_mfgr, p_name, p_size,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1
+from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+p_mfgr p_name p_size r dr s1
+Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15
+Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3
+Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06
+Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.65
+Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.07
+Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.73
+Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68
+Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38
+Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.36
+Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02
+Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62
+Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68
+Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95
+Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34
+Manufacturer#3 almond antique misty red olive 1 4 4 6195.32
+Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61
+Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67
+Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09
+Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35
+Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27
+Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.62
+Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69
+Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35
+Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
+Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
+Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
+PREHOOK: query: explain vectorization detail
+select p_mfgr, p_name, p_size,
+min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name)as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select p_mfgr, p_name, p_size,
+min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name)as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Select Operator
+ expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double)
+ outputColumnNames: p_name, p_mfgr, p_size, p_retailprice
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 2, 5, 7]
+ Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(p_retailprice)
+ Group By Vectorization:
+ aggregators: VectorUDAFMinDouble(col 7) -> double
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ vectorOutput: true
+ keyExpressions: col 2, col 1, col 5
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumns: [0]
+ keys: p_mfgr (type: string), p_name (type: string), p_size (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [0]
+ valueColumns: [3]
+ Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col3 (type: double)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum]
+ vectorized: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: int, _col3: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ window function definition
+ alias: dense_rank_window_1
+ arguments: _col1
+ name: dense_rank
+ window function: GenericUDAFDenseRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ window function definition
+ alias: lag_window_2
+ arguments: _col2, 1, _col2
+ name: lag
+ window function: GenericUDAFLagEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_mfgr, p_name, p_size,
+min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name)as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_mfgr, p_name, p_size,
+min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name)as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+p_mfgr p_name p_size _c3 r dr p_size deltasz
+Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0
+Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32
+Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28
+Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22
+Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14
+Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0
+Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26
+Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38
+Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23
+Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7
+Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0
+Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3
+Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5
+Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18
+Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44
+Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0
+Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29
+Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12
+Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20
+Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5
+Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0
+Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25
+Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4
+Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44
+Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23
+PREHOOK: query: explain vectorization detail
+select p_mfgr, p_name, p_size, min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+having p_size > 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select p_mfgr, p_name, p_size, min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+having p_size > 0
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColGreaterLongScalar(col 5, val 0) -> boolean
+ predicate: (p_size > 0) (type: boolean)
+ Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(p_retailprice)
+ Group By Vectorization:
+ aggregators: VectorUDAFMinDouble(col 7) -> double
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ vectorOutput: true
+ keyExpressions: col 2, col 1, col 5
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumns: [0]
+ keys: p_mfgr (type: string), p_name (type: string), p_size (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [0]
+ valueColumns: [3]
+ Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col3 (type: double)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: lag not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum]
+ vectorized: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: int, _col3: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ window function definition
+ alias: dense_rank_window_1
+ arguments: _col1
+ name: dense_rank
+ window function: GenericUDAFDenseRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ window function definition
+ alias: lag_window_2
+ arguments: _col2, 1, _col2
+ name: lag
+ window function: GenericUDAFLagEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+having p_size > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_mfgr, p_name, p_size, min(p_retailprice),
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+group by p_mfgr, p_name, p_size
+having p_size > 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+p_mfgr p_name p_size _c3 r dr p_size deltasz
+Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0
+Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32
+Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28
+Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22
+Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14
+Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0
+Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26
+Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38
+Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23
+Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7
+Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0
+Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3
+Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5
+Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18
+Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44
+Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0
+Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29
+Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12
+Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20
+Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5
+Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0
+Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25
+Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4
+Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44
+Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23
+PREHOOK: query: explain vectorization detail
+select p_mfgr, p_name,
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd
+from part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select p_mfgr, p_name,
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd
+from part
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_name (type: string)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [2, 1]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [2]
+ valueColumns: [5]
+ Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: p_size (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: bigint
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
+ outputColumnNames: _col1, _col2, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 0, 2]
+ Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: string, _col2: string, _col5: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: count_window_0
+ arguments: _col5
+ name: count
+ window function: GenericUDAFCountEvaluator
+ window frame: RANGE PRECEDING(MAX)~CURRENT
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorCount]
+ functionInputExpressions: [col 2]
+ functionNames: [count]
+ keyInputColumns: [1, 0]
+ native: true
+ nonKeyInputColumns: [2]
+ orderExpressions: [col 1]
+ outputColumns: [3, 1, 0, 2]
+ outputTypes: [bigint, string, string, int]
+ partitionExpressions: [col 0]
+ streamingColumns: []
+ Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 3]
+ Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_mfgr, p_name,
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd
+from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_mfgr, p_name,
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd
+from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+p_mfgr p_name cd
+Manufacturer#1 almond antique burnished rose metallic 2
+Manufacturer#1 almond antique burnished rose metallic 2
+Manufacturer#1 almond antique chartreuse lavender yellow 3
+Manufacturer#1 almond antique salmon chartreuse burlywood 4
+Manufacturer#1 almond aquamarine burnished black steel 5
+Manufacturer#1 almond aquamarine pink moccasin thistle 6
+Manufacturer#2 almond antique violet chocolate turquoise 1
+Manufacturer#2 almond antique violet turquoise frosted 2
+Manufacturer#2 almond aquamarine midnight light salmon 3
+Manufacturer#2 almond aquamarine rose maroon antique 4
+Manufacturer#2 almond aquamarine sandy cyan gainsboro 5
+Manufacturer#3 almond antique chartreuse khaki white 1
+Manufacturer#3 almond antique forest lavender goldenrod 2
+Manufacturer#3 almond antique metallic orange dim 3
+Manufacturer#3 almond antique misty red olive 4
+Manufacturer#3 almond antique olive coral navajo 5
+Manufacturer#4 almond antique gainsboro frosted violet 1
+Manufacturer#4 almond antique violet mint lemon 2
+Manufacturer#4 almond aquamarine floral ivory bisque 3
+Manufacturer#4 almond aquamarine yellow dodger mint 4
+Manufacturer#4 almond azure aquamarine papaya violet 5
+Manufacturer#5 almond antique blue firebrick mint 1
+Manufacturer#5 almond antique medium spring khaki 2
+Manufacturer#5 almond antique sky peru orange 3
+Manufacturer#5 almond aquamarine dodger light gainsboro 4
+Manufacturer#5 almond azure blanched chiffon midnight 5
+PREHOOK: query: explain vectorization detail
+select p_mfgr, p_name,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd,
+p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select p_mfgr, p_name,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd,
+p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_name (type: string)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [2, 1]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [2]
+ valueColumns: [5, 7]
+ Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: p_size (type: int), p_retailprice (type: double)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type
+ vectorized: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
+ outputColumnNames: _col1, _col2, _col5, _col7
+ Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: string, _col2: string, _col5: int, _col7: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ window function definition
+ alias: dense_rank_window_1
+ arguments: _col1
+ name: dense_rank
+ window function: GenericUDAFDenseRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ window function definition
+ alias: count_window_2
+ arguments: _col5
+ name: count
+ window function: GenericUDAFCountEvaluator
+ window frame: RANGE PRECEDING(MAX)~CURRENT
+ window function definition
+ alias: sum_window_3
+ arguments: _col7
+ name: sum
+ window function: GenericUDAFSumDouble
+ window frame: ROWS PRECEDING(MAX)~CURRENT
+ window function definition
+ alias: lag_window_4
+ arguments: _col5, 1, _col5
+ name: lag
+ window function: GenericUDAFLagEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), round(sum_window_3, 2) (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 26 Data size: 6734 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 26 Data size: 6734 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_mfgr, p_name,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd,
+p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_mfgr, p_name,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd,
+p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+p_mfgr p_name r dr cd p_retailprice s1 p_size deltasz
+Manufacturer#1 almond antique burnished rose metallic 1 1 2 1173.15 1173.15 2 0
+Manufacturer#1 almond antique burnished rose metallic 1 1 2 1173.15 2346.3 2 0
+Manufacturer#1 almond antique chartreuse lavender yellow 3 2 3 1753.76 4100.06 34 32
+Manufacturer#1 almond antique salmon chartreuse burlywood 4 3 4 1602.59 5702.65 6 -28
+Manufacturer#1 almond aquamarine burnished black steel 5 4 5 1414.42 7117.07 28 22
+Manufacturer#1 almond aquamarine pink moccasin thistle 6 5 6 1632.66 8749.73 42 14
+Manufacturer#2 almond antique violet chocolate turquoise 1 1 1 1690.68 1690.68 14 0
+Manufacturer#2 almond antique violet turquoise frosted 2 2 2 1800.7 3491.38 40 26
+Manufacturer#2 almond aquamarine midnight light salmon 3 3 3 2031.98 5523.36 2 -38
+Manufacturer#2 almond aquamarine rose maroon antique 4 4 4 1698.66 7222.02 25 23
+Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 5 1701.6 8923.62 18 -7
+Manufacturer#3 almond antique chartreuse khaki white 1 1 1 1671.68 1671.68 17 0
+Manufacturer#3 almond antique forest lavender goldenrod 2 2 2 1190.27 2861.95 14 -3
+Manufacturer#3 almond antique metallic orange dim 3 3 3 1410.39 4272.34 19 5
+Manufacturer#3 almond antique misty red olive 4 4 4 1922.98 6195.32 1 -18
+Manufacturer#3 almond antique olive coral navajo 5 5 5 1337.29 7532.61 45 44
+Manufacturer#4 almond antique gainsboro frosted violet 1 1 1 1620.67 1620.67 10 0
+Manufacturer#4 almond antique violet mint lemon 2 2 2 1375.42 2996.09 39 29
+Manufacturer#4 almond aquamarine floral ivory bisque 3 3 3 1206.26 4202.35 27 -12
+Manufacturer#4 almond aquamarine yellow dodger mint 4 4 4 1844.92 6047.27 7 -20
+Manufacturer#4 almond azure aquamarine papaya violet 5 5 5 1290.35 7337.62 12 5
+Manufacturer#5 almond antique blue firebrick mint 1 1 1 1789.69 1789.69 31 0
+Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.35 6 -25
+Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4
+Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44
+Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23
+PREHOOK: query: explain vectorization detail
+select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz
+from (select p_mfgr, p_name,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd,
+p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+) sub1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz
+from (select p_mfgr, p_name,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd,
+p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+) sub1
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_name (type: string)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [2, 1]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [2]
+ valueColumns: [5, 7]
+ Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: p_size (type: int), p_retailprice (type: double)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type
+ vectorized: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
+ outputColumnNames: _col1, _col2, _col5, _col7
+ Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: string, _col2: string, _col5: int, _col7: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ window function definition
+ alias: dense_rank_window_1
+ arguments: _col1
+ name: dense_rank
+ window function: GenericUDAFDenseRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ window function definition
+ alias: count_window_2
+ arguments: _col5
+ name: count
+ window function: GenericUDAFCountEvaluator
+ window frame: RANGE PRECEDING(MAX)~CURRENT
+ window function definition
+ alias: sum_window_3
+ arguments: _col7
+ name: sum
+ window function: GenericUDAFSumDouble
+ window frame: ROWS PRECEDING(MAX)~CURRENT
+ window function definition
+ alias: lag_window_4
+ arguments: _col5, 1, _col5
+ name: lag
+ window function: GenericUDAFLagEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), round(sum_window_3, 2) (type: double), (_col5 - lag_window_4) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 26 Data size: 728 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 26 Data size: 728 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz
+from (select p_mfgr, p_name,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd,
+p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+) sub1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz
+from (select p_mfgr, p_name,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+count(p_size) over(distribute by p_mfgr sort by p_name) as cd,
+p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1,
+p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
+from part
+) sub1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+sub1.r sub1.dr sub1.cd sub1.s1 sub1.deltasz
+1 1 1 1620.67 0
+1 1 1 1671.68 0
+1 1 1 1690.68 0
+1 1 1 1789.69 0
+1 1 2 1173.15 0
+1 1 2 2346.3 0
+2 2 2 2861.95 -3
+2 2 2 2996.09 29
+2 2 2 3401.35 -25
+2 2 2 3491.38 26
+3 2 3 4100.06 32
+3 3 3 4202.35 -12
+3 3 3 4272.34 5
+3 3 3 5190.08 -4
+3 3 3 5523.36 -38
+4 3 4 5702.65 -28
+4 4 4 6047.27 -20
+4 4 4 6195.32 -18
+4 4 4 6208.18 44
+4 4 4 7222.02 23
+5 4 5 7117.07 22
+5 5 5 7337.62 5
+5 5 5 7532.61 44
+5 5 5 7672.66 -23
+5 5 5 8923.62 -7
+6 5 6 8749.73 14
+PREHOOK: query: explain vectorization detail
+select abc.p_mfgr, abc.p_name,
+rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r,
+dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr,
+abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1,
+abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz
+from noop(on part
+partition by p_mfgr
+order by p_name
+) abc join part p1 on abc.p_partkey = p1.p_partkey
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select abc.p_mfgr, abc.p_name,
+rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r,
+dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr,
+abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1,
+abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz
+from noop(on part
+partition by p_mfgr
+order by p_name
+) abc join part p1 on abc.p_partkey = p1.p_partkey
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_name (type: string)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [2, 1]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [2]
+ valueColumns: [0, 5, 7]
+ Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0, 1, 2, 5, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: p1
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
+ predicate: p_partkey is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: p_partkey (type: int)
+ sort order: +
+ Map-reduce partition columns: p_partkey (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [0]
+ valueColumns: []
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: NOOP not supported
+ vectorized: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col5, _col7
+ Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: part
+ output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double
+ type: TABLE
+ Partition table definition
+ input alias: abc
+ name: noop
+ order by: _col1 ASC NULLS FIRST
+ output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double
+ partition by: _col2
+ raw input shape:
+ Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 p_partkey (type: int)
+ outputColumnNames: _col1, _col2, _col5, _col7
+ Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col2 (type: string)
+ Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col5 (type: int), _col7 (type: double)
+ Reducer 4
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type
+ vectorized: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double)
+ outputColumnNames: _col1, _col2, _col5, _col7
+ Statistics: Num rows: 27 Data size: 20709 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: string, _col2: string, _col5: int, _col7: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ window function definition
+ alias: dense_rank_window_1
+ arguments: _col1
+ name: dense_rank
+ window function: GenericUDAFDenseRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ window function definition
+ alias: sum_window_2
+ arguments: _col7
+ name: sum
+ window function: GenericUDAFSumDouble
+ window frame: ROWS PRECEDING(MAX)~CURRENT
+ window function definition
+ alias: lag_window_3
+ arguments: _col5, 1, _col5
+ name: lag
+ window function: GenericUDAFLagEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 27 Data size: 20709 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col7 (type: double), round(sum_window_2, 2) (type: double), _col5 (type: int), (_col5 - lag_window_3) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 27 Data size: 6777 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 27 Data size: 6777 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select abc.p_mfgr, abc.p_name,
+rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r,
+dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr,
+abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1,
+abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz
+from noop(on part
+partition by p_mfgr
+order by p_name
+) abc join part p1 on abc.p_partkey = p1.p_partkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select abc.p_mfgr, abc.p_name,
+rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r,
+dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr,
+abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1,
+abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz
+from noop(on part
+partition by p_mfgr
+order by p_name
+) abc join part p1 on abc.p_partkey = p1.p_partkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+abc.p_mfgr abc.p_name r dr abc.p_retailprice s1 abc.p_size deltasz
+Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 1173.15 2 0
+Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 2346.3 2 0
+Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 3519.45 2 0
+Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 4692.6 2 0
+Manufacturer#1 almond antique chartreuse lavender yellow 5 2 1753.76 6446.36 34 32
+Manufacturer#1 almond antique salmon chartreuse burlywood 6 3 1602.59 8048.95 6 -28
+Manufacturer#1 almond aquamarine burnished black steel 7 4 1414.42 9463.37 28 22
+Manufacturer#1 almond aquamarine pink moccasin thistle 8 5 1632.66 11096.03 42 14
+Manufacturer#2 almond antique violet chocolate turquoise 1 1 1690.68 1690.68 14 0
+Manufacturer#2 almond antique violet turquoise frosted 2 2 1800.7 3491.38 40 26
+Manufacturer#2 almond aquamarine midnight light salmon 3 3 2031.98 5523.36 2 -38
+Manufacturer#2 almond aquamarine rose maroon antique 4 4 1698.66 7222.02 25 23
+Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 1701.6 8923.62 18 -7
+Manufacturer#3 almond antique chartreuse khaki white 1 1 1671.68 1671.68 17 0
+Manufacturer#3 almond antique forest lavender goldenrod 2 2 1190.27 2861.95 14 -3
+Manufacturer#3 almond antique metallic orange dim 3 3 1410.39 4272.34 19 5
+Manufacturer#3 almond antique misty red olive 4 4 1922.98 6195.32 1 -18
+Manufacturer#3 almond antique olive coral navajo 5 5 1337.29 7532.61 45 44
+Manufacturer#4 almond antique gainsboro frosted violet 1 1 1620.67 1620.67 10 0
+Manufacturer#4 almond antique violet mint lemon 2 2 1375.42 2996.09 39 29
+Manufacturer#4 almond aquamarine floral ivory bisque 3 3 1206.26 4202.35 27 -12
+Manufacturer#4 almond aquamarine yellow dodger mint 4 4 1844.92 6047.27 7 -20
+Manufacturer#4 almond azure aquamarine papaya violet 5 5 1290.35 7337.62 12 5
+Manufacturer#5 almond antique blue firebrick mint 1 1 1789.69 1789.69 31 0
+Manufacturer#5 almond antique medium spring khaki 2 2 1611.66 3401.35 6 -25
+Manufacturer#5 almond antique sky peru orange 3 3 1788.73 5190.08 2 -4
+Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 1018.1 6208.18 46 44
+Manufacturer#5 almond azure blanched chiffon midnight 5 5 1464.48 7672.66 23 -23
+PREHOOK: query: explain vectorization detail
+select p_mfgr, p_name, p_size,
+rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R
+from part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select p_mfgr, p_name, p_size,
+rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R
+from part
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int)
+ sort order: ++-
+ Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [2, 1, 5]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [2]
+ valueColumns: []
+ Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
+ vectorized: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int)
+ outputColumnNames: _col1, _col2, _col5
+ Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: string, _col2: string, _col5: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST, _col5 DESC NULLS LAST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1, _col5
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_mfgr, p_name, p_size,
+rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R
+from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_mfgr, p_name, p_size,
+rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R
+from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+p_mfgr p_name p_size r
+Manufacturer#1 almond antique burnished rose metallic 2 1
+Manufacturer#1 almond antique burnished rose metallic 2 1
+Manufacturer#1 almond antique chartreuse lavender yellow 34 3
+Manufacturer#1 almond antique salmon chartreuse burlywood 6 4
+Manufacturer#1 almond aquamarine burnished black steel 28 5
+Manufacturer#1 almond aquamarine pink moccasin thistle 42 6
+Manufacturer#2 almond antique violet chocolate turquoise 14 1
+Manufacturer#2 almond antique violet turquoise frosted 40 2
+Manufacturer#2 almond aquamarine midnight light salmon 2 3
+Manufacturer#2 almond aquamarine rose maroon antique 25 4
+Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5
+Manufacturer#3 almond antique chartreuse khaki white 17 1
+Manufacturer#3 almond antique forest lavender goldenrod 14 2
+Manufacturer#3 almond antique metallic orange dim 19 3
+Manufacturer#3 almond antique misty red olive 1 4
+Manufacturer#3 almond antique olive coral navajo 45 5
+Manufacturer#4 almond antique gainsboro frosted violet 10 1
+Manufacturer#4 almond antique violet mint lemon 39 2
+Manufacturer#4 almond aquamarine floral ivory bisque 27 3
+Manufacturer#4 almond aquamarine yellow dodger mint 7 4
+Manufacturer#4 almond azure aquamarine papaya violet 12 5
+Manufacturer#5 almond antique blue firebrick mint 31 1
+Manufacturer#5 almond antique medium spring khaki 6 2
+Manufacturer#5 almond antique sky peru orange 2 3
+Manufacturer#5 almond aquamarine dodger light gainsboro 46 4
+Manufacturer#5 almond azure blanched chiffon midnight 23 5
+PREHOOK: query: explain vectorization detail
+select p_mfgr, p_name, p_size,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1
+from part
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select p_mfgr, p_name, p_size,
+rank() over(distribute by p_mfgr sort by p_name) as r,
+dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
+round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1
+from part
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+
<TRUNCATED>