You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/07/20 10:16:44 UTC
[18/36] hive git commit: HIVE-16369: Vectorization: Support PTF (Part
1: No Custom Window Framing -- Default Only) (Matt McCline,
reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out
new file mode 100644
index 0000000..c678130
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out
@@ -0,0 +1,1036 @@
+PREHOOK: query: drop table over10k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table over10k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ `dec` decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over10k
+POSTHOOK: query: create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ `dec` decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over10k
+PREHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over10k
+POSTHOOK: query: load data local inpath '../../data/files/over10k' into table over10k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over10k
+PREHOOK: query: explain vectorization detail
+select *
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select *
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_name (type: string)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [2, 1]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: [2]
+ valueColumns: []
+ Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: bigint
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 0]
+ Statistics: Num rows: 26 Data size: 12662 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: string, _col2: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank]
+ functionInputExpressions: [col 1]
+ functionNames: [rank]
+ keyInputColumns: [1, 0]
+ native: true
+ nonKeyInputColumns: []
+ orderExpressions: [col 1]
+ outputColumns: [2, 1, 0]
+ outputTypes: [int, string, string]
+ partitionExpressions: [col 0]
+ streamingColumns: [2]
+ Statistics: Num rows: 26 Data size: 12662 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: string), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 2]
+ Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain vectorization detail
+select *
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a
+where r < 4
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select *
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a
+where r < 4
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_name (type: string)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: No PTF TopN IS false
+ Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.8
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: bigint
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 0]
+ Statistics: Num rows: 26 Data size: 12662 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: string, _col2: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col1
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank]
+ functionInputExpressions: [col 1]
+ functionNames: [rank]
+ keyInputColumns: [1, 0]
+ native: true
+ nonKeyInputColumns: []
+ orderExpressions: [col 1]
+ outputColumns: [2, 1, 0]
+ outputTypes: [int, string, string]
+ partitionExpressions: [col 0]
+ streamingColumns: [2]
+ Statistics: Num rows: 26 Data size: 12662 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColLessLongScalar(col 2, val 4) -> boolean
+ predicate: (rank_window_0 < 4) (type: boolean)
+ Statistics: Num rows: 8 Data size: 3896 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: string), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 2]
+ Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select *
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a
+where r < 4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a
+where r < 4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+a.p_mfgr a.r
+Manufacturer#1 1
+Manufacturer#1 1
+Manufacturer#1 3
+Manufacturer#2 1
+Manufacturer#2 2
+Manufacturer#2 3
+Manufacturer#3 1
+Manufacturer#3 2
+Manufacturer#3 3
+Manufacturer#4 1
+Manufacturer#4 2
+Manufacturer#4 3
+Manufacturer#5 1
+Manufacturer#5 2
+Manufacturer#5 3
+PREHOOK: query: select *
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a
+where r < 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a
+where r < 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+a.p_mfgr a.r
+Manufacturer#1 1
+Manufacturer#1 1
+Manufacturer#2 1
+Manufacturer#3 1
+Manufacturer#4 1
+Manufacturer#5 1
+PREHOOK: query: explain vectorization detail
+select *
+from (select t, f, rank() over(partition by t order by f) r from over10k) a
+where r < 6 and t < 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select *
+from (select t, f, rank() over(partition by t order by f) r from over10k) a
+where r < 6 and t < 5
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: over10k
+ Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColLessLongScalar(col 0, val 5) -> boolean
+ predicate: (t < 5) (type: boolean)
+ Statistics: Num rows: 42397 Data size: 339176 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: t (type: tinyint), f (type: float)
+ sort order: ++
+ Map-reduce partition columns: t (type: tinyint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: No PTF TopN IS false
+ Statistics: Num rows: 42397 Data size: 339176 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.8
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 11
+ includeColumns: [0, 4]
+ dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, bo:boolean, s:string, ts:timestamp, dec:decimal(4,2), bin:binary
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:float
+ partitionColumnCount: 0
+ scratchColumnTypeNames: bigint
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: float)
+ outputColumnNames: _col0, _col4
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
+ Statistics: Num rows: 42397 Data size: 339176 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: tinyint, _col4: float
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col4 ASC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col4
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank]
+ functionInputExpressions: [col 1]
+ functionNames: [rank]
+ keyInputColumns: [0, 1]
+ native: true
+ nonKeyInputColumns: []
+ orderExpressions: [col 1]
+ outputColumns: [2, 0, 1]
+ outputTypes: [int, tinyint, float]
+ partitionExpressions: [col 0]
+ streamingColumns: [2]
+ Statistics: Num rows: 42397 Data size: 339176 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColLessLongScalar(col 2, val 6) -> boolean
+ predicate: (rank_window_0 < 6) (type: boolean)
+ Statistics: Num rows: 14132 Data size: 113056 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: tinyint), _col4 (type: float), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2]
+ Statistics: Num rows: 14132 Data size: 113056 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 14132 Data size: 113056 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select *
+from (select t, f, rank() over(partition by t order by f) r from over10k) a
+where r < 6 and t < 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from (select t, f, rank() over(partition by t order by f) r from over10k) a
+where r < 6 and t < 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+a.t a.f a.r
+-3 0.56 1
+-3 0.83 2
+-3 2.26 3
+-3 2.48 4
+-3 3.82 5
+-2 1.55 1
+-2 1.65 2
+-2 1.79 3
+-2 4.06 4
+-2 4.4 5
+-1 0.79 1
+-1 0.95 2
+-1 1.27 3
+-1 1.49 4
+-1 2.8 5
+0 0.08 1
+0 0.94 2
+0 1.44 3
+0 2.0 4
+0 2.12 5
+1 0.13 1
+1 0.44 2
+1 1.04 3
+1 3.41 4
+1 3.45 5
+2 2.21 1
+2 3.1 2
+2 9.93 3
+2 11.43 4
+2 15.45 5
+3 0.12 1
+3 0.19 2
+3 7.14 3
+3 7.97 4
+3 8.95 5
+4 2.26 1
+4 5.51 2
+4 5.53 3
+4 5.76 4
+4 7.26 5
+PREHOOK: query: select *
+from (select t, f, row_number() over(partition by t order by f) r from over10k) a
+where r < 8 and t < 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from (select t, f, row_number() over(partition by t order by f) r from over10k) a
+where r < 8 and t < 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+a.t a.f a.r
+-3 0.56 1
+-3 0.83 2
+-3 2.26 3
+-3 2.48 4
+-3 3.82 5
+-3 6.8 6
+-3 6.83 7
+-2 1.55 1
+-2 1.65 2
+-2 1.79 3
+-2 4.06 4
+-2 4.4 5
+-2 5.43 6
+-2 5.59 7
+-1 0.79 1
+-1 0.95 2
+-1 1.27 3
+-1 1.49 4
+-1 2.8 5
+-1 4.08 6
+-1 4.31 7
+PREHOOK: query: explain vectorization detail
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: false
+ enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: ctinyint (type: tinyint), cdouble (type: double)
+ sort order: ++
+ Map-reduce partition columns: ctinyint (type: tinyint)
+ Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.8
+ Execution mode: llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double)
+ outputColumnNames: _col0, _col5
+ Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: tinyint, _col5: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col5 ASC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col5
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (rank_window_0 < 5) (type: boolean)
+ Statistics: Num rows: 4096 Data size: 1134436 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: tinyint), _col5 (type: double), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4096 Data size: 53092 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 4096 Data size: 53092 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: drop table if exists sB
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists sB
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table sB ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sB
+POSTHOOK: query: create table sB ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sB
+POSTHOOK: Lineage: sb.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: sb.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: sb.r SCRIPT [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null
), ]
+a.ctinyint a.cdouble a.r
+PREHOOK: query: select * from sB
+where ctinyint is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sb
+#### A masked pattern was here ####
+POSTHOOK: query: select * from sB
+where ctinyint is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sb
+#### A masked pattern was here ####
+sb.ctinyint sb.cdouble sb.r
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+PREHOOK: query: drop table if exists sD
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists sD
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: explain vectorization detail
+create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: query: explain vectorization detail
+create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-4 depends on stages: Stage-2, Stage-0
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+ Reduce Output Operator
+ key expressions: ctinyint (type: tinyint), cdouble (type: double)
+ sort order: ++
+ Map-reduce partition columns: ctinyint (type: tinyint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: No PTF TopN IS false
+ Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.8
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0, 5]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: bigint
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double)
+ outputColumnNames: _col0, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
+ Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: tinyint, _col5: double
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col5 ASC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: rank_window_0
+ arguments: _col5
+ name: rank
+ window function: GenericUDAFRankEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank]
+ functionInputExpressions: [col 1]
+ functionNames: [rank]
+ keyInputColumns: [0, 1]
+ native: true
+ nonKeyInputColumns: []
+ orderExpressions: [col 1]
+ outputColumns: [2, 0, 1]
+ outputTypes: [int, tinyint, double]
+ partitionExpressions: [col 0]
+ streamingColumns: [2]
+ Statistics: Num rows: 12288 Data size: 3403280 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColLessLongScalar(col 2, val 5) -> boolean
+ predicate: (rank_window_0 < 5) (type: boolean)
+ Statistics: Num rows: 4096 Data size: 1134436 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: tinyint), _col5 (type: double), rank_window_0 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2]
+ Statistics: Num rows: 4096 Data size: 53092 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 4096 Data size: 53092 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.sD
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-4
+ Create Table Operator:
+ Create Table
+ columns: ctinyint tinyint, cdouble double, r int
+ field delimiter: ,
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+ serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.sD
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-0
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sD
+POSTHOOK: query: create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as
+select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sD
+POSTHOOK: Lineage: sd.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: sd.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: sd.r SCRIPT [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), (alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null
), ]
+a.ctinyint a.cdouble a.r
+PREHOOK: query: select * from sD
+where ctinyint is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sd
+#### A masked pattern was here ####
+POSTHOOK: query: select * from sD
+where ctinyint is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sd
+#### A masked pattern was here ####
+sd.ctinyint sd.cdouble sd.r
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1
+NULL NULL 1