You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/07/20 10:16:34 UTC
[08/36] hive git commit: HIVE-16369: Vectorization: Support PTF (Part
1: No Custom Window Framing -- Default Only) (Matt McCline,
reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/vector_windowing_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_windowing_gby.q.out b/ql/src/test/results/clientpositive/vector_windowing_gby.q.out
new file mode 100644
index 0000000..8ddd2ff
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_windowing_gby.q.out
@@ -0,0 +1,252 @@
+PREHOOK: query: explain vectorization detail
+ select rank() over (order by return_ratio) as return_rank from
+ (select sum(wr.cint)/sum(ws.c_int) as return_ratio
+ from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+ group by ws.c_boolean ) in_web
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+ select rank() over (order by return_ratio) as return_rank from
+ (select sum(wr.cint)/sum(ws.c_int) as return_ratio
+ from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+ group by ws.c_boolean ) in_web
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: ws
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string), c_int (type: int), c_boolean (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: boolean)
+ TableScan
+ alias: wr
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cstring1 is not null (type: boolean)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cint (type: int), cstring1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3), sum(_col1)
+ Group By Vectorization:
+ groupByMode: HASH
+ vectorOutput: false
+ native: false
+ vectorProcessingMode: NONE
+ projectedOutputColumns: null
+ keys: _col2 (type: boolean)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2]
+ Reduce Output Operator
+ key expressions: _col0 (type: boolean)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: boolean)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1, 2]
+ dataColumns: _col0:boolean, _col1:bigint, _col2:bigint
+ partitionColumnCount: 0
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ Group By Vectorization:
+ groupByMode: MERGEPARTIAL
+ vectorOutput: false
+ native: false
+ vectorProcessingMode: NONE
+ projectedOutputColumns: null
+ keys: KEY._col0 (type: boolean)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: bigint), _col2 (type: bigint)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
+ Reduce Output Operator
+ key expressions: 0 (type: int), (UDFToDouble(_col1) / UDFToDouble(_col2)) (type: double)
+ sort order: ++
+ Map-reduce partition columns: 0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: _col1:bigint, _col2:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: bigint, double, double, double, bigint
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: bigint, _col2: bigint
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: (UDFToDouble(_col1) / UDFToDouble(_col2)) ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: (UDFToDouble(_col1) / UDFToDouble(_col2))
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rank_window_0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select rank() over (order by return_ratio) as return_rank from
+ (select sum(wr.cint)/sum(ws.c_int) as return_ratio
+ from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+ group by ws.c_boolean ) in_web
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank() over (order by return_ratio) as return_rank from
+ (select sum(wr.cint)/sum(ws.c_int) as return_ratio
+ from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+ group by ws.c_boolean ) in_web
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+return_rank
http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/vector_windowing_gby2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_windowing_gby2.q.out b/ql/src/test/results/clientpositive/vector_windowing_gby2.q.out
new file mode 100644
index 0000000..b063d3a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_windowing_gby2.q.out
@@ -0,0 +1,1015 @@
+PREHOOK: query: explain vectorization detail
+select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: ws
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4]
+ Select Operator
+ expressions: key (type: string), c_int (type: int)
+ outputColumnNames: key, c_int
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 2]
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(c_int)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 2) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumns: [0]
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 5
+ includeColumns: [0, 2]
+ dataColumns: key:string, value:string, c_int:int, c_float:float, c_boolean:boolean
+ partitionColumnCount: 0
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ groupByMode: MERGEPARTIAL
+ vectorOutput: false
+ native: false
+ vectorProcessingMode: NONE
+ projectedOutputColumns: null
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0]
+ Reduce Output Operator
+ key expressions: 0 (type: int), _col1 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: 0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ includeColumns: [0]
+ dataColumns: _col1:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: bigint, bigint
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: bigint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: bigint
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rank_window_0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+return_rank
+1
+2
+2
+2
+5
+5
+7
+PREHOOK: query: explain vectorization detail
+select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: ws
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4]
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), value (type: string), c_int (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [5, 1, 2]
+ selectExpressions: CastStringToLong(col 0) -> 5:int
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col1), sum(_col2)
+ Group By Vectorization:
+ aggregators: VectorUDAFMinString(col 1) -> string, VectorUDAFSumLong(col 2) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ vectorOutput: true
+ keyExpressions: col 5
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumns: [0, 1]
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: bigint)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 5
+ includeColumns: [0, 1, 2]
+ dataColumns: key:string, value:string, c_int:int, c_float:float, c_boolean:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: bigint
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), sum(VALUE._col1)
+ Group By Vectorization:
+ groupByMode: MERGEPARTIAL
+ vectorOutput: false
+ native: false
+ vectorProcessingMode: NONE
+ projectedOutputColumns: null
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2]
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col2 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1, 2]
+ dataColumns: _col0:int, _col1:string, _col2:bigint
+ partitionColumnCount: 0
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: string, _col2: bigint
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: avg_window_0
+ arguments: _col0
+ name: avg
+ window function: GenericUDAFAverageEvaluatorDouble
+ window frame: RANGE PRECEDING(MAX)~CURRENT
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: avg_window_0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+return_rank
+NULL
+1.0
+2.0
+3.0
+PREHOOK: query: explain vectorization detail
+select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-4
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cbo_t3
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4]
+ Select Operator
+ expressions: key (type: string), value (type: string), (UDFToFloat(c_int) - c_float) (type: float), (UDFToDouble(c_float) / UDFToDouble(c_int)) (type: double), c_int (type: int), ((UDFToDouble(c_float) / UDFToDouble(c_int)) - UDFToDouble(c_int)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 6, 7, 2, 9]
+ selectExpressions: DoubleColSubtractDoubleColumn(col 5, col 3)(children: CastLongToFloatViaLongToDouble(col 2) -> 5:double) -> 6:double, DoubleColDivideDoubleColumn(col 3, col 5)(children: col 3, CastLongToDouble(col 2) -> 5:double) -> 7:double, DoubleColSubtractDoubleColumn(col 8, col 5)(children: DoubleColDivideDoubleColumn(col 3, col 5)(children: col 3, CastLongToDouble(col 2) -> 5:double) -> 8:double, CastLongToDouble(col 2) -> 5:double) -> 9:double
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2), sum(_col3), max(_col4), sum(_col5)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDouble(col 6) -> double, VectorUDAFSumDouble(col 7) -> double, VectorUDAFMaxLong(col 2) -> int, VectorUDAFSumDouble(col 9) -> double
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ vectorOutput: true
+ keyExpressions: col 0, col 1
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumns: [0, 1, 2, 3]
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: double)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 5
+ includeColumns: [0, 1, 2, 3]
+ dataColumns: key:string, value:string, c_int:int, c_float:float, c_boolean:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: double, double, double, double, double
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), sum(VALUE._col3)
+ Group By Vectorization:
+ groupByMode: MERGEPARTIAL
+ vectorOutput: false
+ native: false
+ vectorProcessingMode: NONE
+ projectedOutputColumns: null
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5]
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col2 (type: double)
+ sort order: +-
+ Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col3 (type: double), _col4 (type: int), _col5 (type: double)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 6
+ includeColumns: [0, 1, 2, 3, 4, 5]
+ dataColumns: _col0:string, _col1:string, _col2:double, _col3:double, _col4:int, _col5:double
+ partitionColumnCount: 0
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: int), VALUE._col3 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string, _col2: double, _col3: double, _col4: int, _col5: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col2 DESC NULLS LAST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col2
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rank_window_0 (type: int), _col1 (type: string), _col3 (type: double), _col4 (type: int), _col5 (type: double)
+ outputColumnNames: rank_window_0, _col1, _col3, _col4, _col5
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4]
+ Reduce Output Operator
+ key expressions: lower(_col1) (type: string), _col3 (type: double)
+ sort order: ++
+ Map-reduce partition columns: lower(_col1) (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ value expressions: rank_window_0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: double)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 5
+ includeColumns: [0, 1, 2, 3, 4]
+ dataColumns: rank_window_0:int, _col1:string, _col3:double, _col4:int, _col5:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: string, string
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col2 (type: string), KEY.reducesinkkey1 (type: double), VALUE._col4 (type: int), VALUE._col5 (type: double)
+ outputColumnNames: _col0, _col2, _col4, _col5, _col6
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col2: string, _col4: double, _col5: int, _col6: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col4 ASC NULLS FIRST
+ partition by: lower(_col2)
+ raw input shape:
+ window functions:
+ window function definition
+ alias: dense_rank_window_1
+ arguments: _col4
+ name: dense_rank
+ window function: GenericUDAFDenseRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: dense_rank_window_1 (type: int), _col0 (type: int), _col5 (type: int), _col6 (type: double)
+ outputColumnNames: dense_rank_window_1, _col0, _col5, _col6
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
+ Reduce Output Operator
+ key expressions: _col5 (type: int), _col6 (type: double)
+ sort order: ++
+ Map-reduce partition columns: _col5 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ value expressions: dense_rank_window_1 (type: int), _col0 (type: int)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ includeColumns: [0, 1, 2, 3]
+ dataColumns: dense_rank_window_1:int, _col0:int, _col5:int, _col6:double
+ partitionColumnCount: 0
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double)
+ outputColumnNames: _col0, _col1, _col6, _col7
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: int, _col6: int, _col7: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col7 ASC NULLS FIRST
+ partition by: _col6
+ raw input shape:
+ window functions:
+ window function definition
+ alias: percent_rank_window_2
+ arguments: _col7
+ name: percent_rank
+ window function: GenericUDAFPercentRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: int), percent_rank_window_2 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc)
+from cbo_t3
+group by key, value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+_c0 _c1 _c2
+1 1 0.0
+1 1 0.0
+1 1 0.0
+1 1 0.0
+1 1 0.0
+1 1 0.0
+1 1 0.0
+PREHOOK: query: explain vectorization detail
+select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: ws
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string), c_int (type: int), c_boolean (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int), _col2 (type: boolean)
+ TableScan
+ alias: wr
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cstring1 is not null (type: boolean)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cint (type: int), cstring1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col3), sum(_col1)
+ Group By Vectorization:
+ groupByMode: HASH
+ vectorOutput: false
+ native: false
+ vectorProcessingMode: NONE
+ projectedOutputColumns: null
+ keys: _col2 (type: boolean)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2]
+ Reduce Output Operator
+ key expressions: _col0 (type: boolean)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: boolean)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1, 2]
+ dataColumns: _col0:boolean, _col1:bigint, _col2:bigint
+ partitionColumnCount: 0
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ Group By Vectorization:
+ groupByMode: MERGEPARTIAL
+ vectorOutput: false
+ native: false
+ vectorProcessingMode: NONE
+ projectedOutputColumns: null
+ keys: KEY._col0 (type: boolean)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: bigint), _col2 (type: bigint)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
+ Reduce Output Operator
+ key expressions: 0 (type: int), (UDFToDouble(_col1) / UDFToDouble(_col2)) (type: double)
+ sort order: ++
+ Map-reduce partition columns: 0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: _col1:bigint, _col2:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: bigint, double, double, double, bigint
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: bigint, _col2: bigint
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: (UDFToDouble(_col1) / UDFToDouble(_col2)) ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: (UDFToDouble(_col1) / UDFToDouble(_col2))
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rank_window_0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+return_rank