You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by tc...@apache.org on 2019/03/15 15:51:26 UTC
[hive] branch master updated: Remove unnecessary files from
HIVE-21368: Vectorization: Unnecessary Decimal64 -> HiveDecimal conversion
(Teddy Choi, reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository.
tchoi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 677eef8 Remove unnecessary files from HIVE-21368: Vectorization: Unnecessary Decimal64 -> HiveDecimal conversion (Teddy Choi, reviewed by Gopal V)
677eef8 is described below
commit 677eef81580ec0511d39aef1aef4cce6c34c5c0d
Author: Teddy Choi <pu...@gmail.com>
AuthorDate: Sat Mar 16 00:50:34 2019 +0900
Remove unnecessary files from HIVE-21368: Vectorization: Unnecessary Decimal64 -> HiveDecimal conversion (Teddy Choi, reviewed by Gopal V)
---
.../llap/vector_binary_join_groupby.q.out.orig | 645 ---------------------
.../vector_binary_join_groupby.q.out.orig | 573 ------------------
2 files changed, 1218 deletions(-)
diff --git a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out.orig b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out.orig
deleted file mode 100644
index 5881607..0000000
--- a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out.orig
+++ /dev/null
@@ -1,645 +0,0 @@
-PREHOOK: query: DROP TABLE over1k_n7
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: DROP TABLE over1k_n7
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: DROP TABLE hundredorc
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: DROP TABLE hundredorc
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: CREATE TABLE over1k_n7(t tinyint,
- si smallint,
- i int,
- b bigint,
- f float,
- d double,
- bo boolean,
- s string,
- ts timestamp,
- `dec` decimal(4,2),
- bin binary)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS TEXTFILE
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@over1k_n7
-POSTHOOK: query: CREATE TABLE over1k_n7(t tinyint,
- si smallint,
- i int,
- b bigint,
- f float,
- d double,
- bo boolean,
- s string,
- ts timestamp,
- `dec` decimal(4,2),
- bin binary)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS TEXTFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@over1k_n7
-PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k_n7
-PREHOOK: type: LOAD
-#### A masked pattern was here ####
-PREHOOK: Output: default@over1k_n7
-POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k_n7
-POSTHOOK: type: LOAD
-#### A masked pattern was here ####
-POSTHOOK: Output: default@over1k_n7
-PREHOOK: query: CREATE TABLE hundredorc(t tinyint,
- si smallint,
- i int,
- b bigint,
- f float,
- d double,
- bo boolean,
- s string,
- ts timestamp,
- `dec` decimal(4,2),
- bin binary)
-STORED AS ORC
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@hundredorc
-POSTHOOK: query: CREATE TABLE hundredorc(t tinyint,
- si smallint,
- i int,
- b bigint,
- f float,
- d double,
- bo boolean,
- s string,
- ts timestamp,
- `dec` decimal(4,2),
- bin binary)
-STORED AS ORC
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@hundredorc
-PREHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k_n7 LIMIT 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@over1k_n7
-PREHOOK: Output: default@hundredorc
-POSTHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k_n7 LIMIT 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@over1k_n7
-POSTHOOK: Output: default@hundredorc
-POSTHOOK: Lineage: hundredorc.b SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:b, type:bigint, comment:null), ]
-POSTHOOK: Lineage: hundredorc.bin SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:bin, type:binary, comment:null), ]
-POSTHOOK: Lineage: hundredorc.bo SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:bo, type:boolean, comment:null), ]
-POSTHOOK: Lineage: hundredorc.d SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:d, type:double, comment:null), ]
-POSTHOOK: Lineage: hundredorc.dec SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
-POSTHOOK: Lineage: hundredorc.f SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:f, type:float, comment:null), ]
-POSTHOOK: Lineage: hundredorc.i SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:i, type:int, comment:null), ]
-POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:s, type:string, comment:null), ]
-POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:si, type:smallint, comment:null), ]
-POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:t, type:tinyint, comment:null), ]
-POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:ts, type:timestamp, comment:null), ]
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
-SELECT sum(hash(*)) k
-FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
-order by k
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
-SELECT sum(hash(*)) k
-FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
-order by k
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Map 1 <- Map 3 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: t1
- filterExpr: bin is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 10:binary)
- predicate: bin is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col10 (type: binary)
- 1 _col10 (type: binary)
- Map Join Vectorization:
- bigTableValueExpressions: ConvertDecimal64ToDecimal(col 9:decimal(4,2)/DECIMAL_64) -> 12:decimal(4,2)
- className: VectorMapJoinInnerStringOperator
- native: true
- nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
- hashTableImplementationType: OPTIMIZED
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
- input vertices:
- 1 Map 3
- Statistics: Num rows: 10000 Data size: 6819968 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21) (type: int)
- outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [23]
- selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21)) -> 23:int
- Statistics: Num rows: 10000 Data size: 6819968 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 23:int) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
- Execution mode: vectorized, llap
- LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: true
- vectorized: true
- Map 3
- Map Operator Tree:
- TableScan
- alias: t2
- filterExpr: bin is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 10:binary)
- predicate: bin is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col10 (type: binary)
- sort order: +
- Map-reduce partition columns: _col10 (type: binary)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2))
- Execution mode: vectorized, llap
- LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
- Reducer 2
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0]
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT sum(hash(*)) k
-FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
-order by k
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT sum(hash(*)) k
-FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
-order by k
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
--27832781952
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
-SELECT count(*), bin
-FROM hundredorc
-GROUP BY bin
-order by bin
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
-SELECT count(*), bin
-FROM hundredorc
-GROUP BY bin
-order by bin
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: hundredorc
- Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- Select Operator
- expressions: bin (type: binary)
- outputColumnNames: bin
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [10]
- Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count()
- Group By Vectorization:
- aggregators: VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 10:binary
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
- keys: bin (type: binary)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 50 Data size: 3200 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: binary)
- sort order: +
- Map-reduce partition columns: _col0 (type: binary)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 50 Data size: 3200 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
- Execution mode: vectorized, llap
- LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reducer 2
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:binary
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
- keys: KEY._col0 (type: binary)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col1 (type: bigint), _col0 (type: binary)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 0]
- Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col1 (type: binary)
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
- Reducer 3
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: binary)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 0]
- Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT count(*), bin
-FROM hundredorc
-GROUP BY bin
-order by bin
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT count(*), bin
-FROM hundredorc
-GROUP BY bin
-order by bin
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-5 american history
-5 biology
-2 chemistry
-2 debate
-4 education
-5 forestry
-4 geology
-5 history
-6 industrial engineering
-3 joggying
-5 kindergarten
-1 linguistics
-9 mathematics
-8 nap time
-1 opthamology
-2 philosophy
-5 quiet hour
-4 religion
-3 study skills
-7 topology
-1 undecided
-2 values clariffication
-3 wind surfing
-3 xylophone band
-2 yard duty
-3 zync studies
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
-SELECT t1.i, t1.bin, t2.bin
-FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
-SELECT t1.i, t1.bin, t2.bin
-FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Map 1 <- Map 2 (BROADCAST_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: t1
- filterExpr: i is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 2:int)
- predicate: i is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: i (type: int), bin (type: binary)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [2, 10]
- Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Map Join Vectorization:
- className: VectorMapJoinInnerLongOperator
- native: true
- nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
- hashTableImplementationType: OPTIMIZED
- outputColumnNames: _col0, _col1, _col3
- input vertices:
- 1 Map 2
- Statistics: Num rows: 125 Data size: 14500 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary)
- outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [2, 10, 12]
- Statistics: Num rows: 125 Data size: 14500 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 125 Data size: 14500 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized, llap
- LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Map 2
- Map Operator Tree:
- TableScan
- alias: t2
- filterExpr: i is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 2:int)
- predicate: i is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: i (type: int), bin (type: binary)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [2, 10]
- Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: binary)
- Execution mode: vectorized, llap
- LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
diff --git a/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out.orig b/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out.orig
deleted file mode 100644
index 00200f9..0000000
--- a/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out.orig
+++ /dev/null
@@ -1,573 +0,0 @@
-PREHOOK: query: DROP TABLE over1k_n7
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: DROP TABLE over1k_n7
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: DROP TABLE hundredorc
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: DROP TABLE hundredorc
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: CREATE TABLE over1k_n7(t tinyint,
- si smallint,
- i int,
- b bigint,
- f float,
- d double,
- bo boolean,
- s string,
- ts timestamp,
- `dec` decimal(4,2),
- bin binary)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS TEXTFILE
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@over1k_n7
-POSTHOOK: query: CREATE TABLE over1k_n7(t tinyint,
- si smallint,
- i int,
- b bigint,
- f float,
- d double,
- bo boolean,
- s string,
- ts timestamp,
- `dec` decimal(4,2),
- bin binary)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
-STORED AS TEXTFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@over1k_n7
-PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k_n7
-PREHOOK: type: LOAD
-#### A masked pattern was here ####
-PREHOOK: Output: default@over1k_n7
-POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over1k' OVERWRITE INTO TABLE over1k_n7
-POSTHOOK: type: LOAD
-#### A masked pattern was here ####
-POSTHOOK: Output: default@over1k_n7
-PREHOOK: query: CREATE TABLE hundredorc(t tinyint,
- si smallint,
- i int,
- b bigint,
- f float,
- d double,
- bo boolean,
- s string,
- ts timestamp,
- `dec` decimal(4,2),
- bin binary)
-STORED AS ORC
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@hundredorc
-POSTHOOK: query: CREATE TABLE hundredorc(t tinyint,
- si smallint,
- i int,
- b bigint,
- f float,
- d double,
- bo boolean,
- s string,
- ts timestamp,
- `dec` decimal(4,2),
- bin binary)
-STORED AS ORC
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@hundredorc
-PREHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k_n7 LIMIT 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@over1k_n7
-PREHOOK: Output: default@hundredorc
-POSTHOOK: query: INSERT INTO TABLE hundredorc SELECT * FROM over1k_n7 LIMIT 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@over1k_n7
-POSTHOOK: Output: default@hundredorc
-POSTHOOK: Lineage: hundredorc.b SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:b, type:bigint, comment:null), ]
-POSTHOOK: Lineage: hundredorc.bin SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:bin, type:binary, comment:null), ]
-POSTHOOK: Lineage: hundredorc.bo SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:bo, type:boolean, comment:null), ]
-POSTHOOK: Lineage: hundredorc.d SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:d, type:double, comment:null), ]
-POSTHOOK: Lineage: hundredorc.dec SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
-POSTHOOK: Lineage: hundredorc.f SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:f, type:float, comment:null), ]
-POSTHOOK: Lineage: hundredorc.i SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:i, type:int, comment:null), ]
-POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:s, type:string, comment:null), ]
-POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:si, type:smallint, comment:null), ]
-POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:t, type:tinyint, comment:null), ]
-POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k_n7)over1k_n7.FieldSchema(name:ts, type:timestamp, comment:null), ]
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
-SELECT sum(hash(*)) k
-FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
-order by k
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
-SELECT sum(hash(*)) k
-FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
-order by k
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-2 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-2
-
-STAGE PLANS:
- Stage: Stage-5
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_0:$hdt$_0:t1
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_0:$hdt$_0:t1
- TableScan
- alias: t1
- filterExpr: bin is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: bin is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
- HashTable Sink Operator
- keys:
- 0 _col10 (type: binary)
- 1 _col10 (type: binary)
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: t2
- filterExpr: bin is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 10:binary)
- predicate: bin is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col10 (type: binary)
- 1 _col10 (type: binary)
- Map Join Vectorization:
- bigTableKeyExpressions: col 10:binary
- bigTableValueExpressions: col 0:tinyint, col 1:smallint, col 2:int, col 3:bigint, col 4:float, col 5:double, col 6:boolean, col 7:string, col 8:timestamp, ConvertDecimal64ToDecimal(col 9:decimal(4,2)/DECIMAL_64) -> 12:decimal(4,2), col 10:binary
- className: VectorMapJoinOperator
- native: false
- nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
- Statistics: Num rows: 10000 Data size: 6819968 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21) (type: int)
- outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [22]
- selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21)) -> 22:int
- Statistics: Num rows: 10000 Data size: 6819968 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 22:int) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: true
- vectorized: true
- Local Work:
- Map Reduce Local Work
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT sum(hash(*)) k
-FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
-order by k
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT sum(hash(*)) k
-FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
-order by k
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
--27832781952
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
-SELECT count(*), bin
-FROM hundredorc
-GROUP BY bin
-order by bin
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
-SELECT count(*), bin
-FROM hundredorc
-GROUP BY bin
-order by bin
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hundredorc
- Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- Select Operator
- expressions: bin (type: binary)
- outputColumnNames: bin
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [10]
- Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count()
- Group By Vectorization:
- aggregators: VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 10:binary
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
- keys: bin (type: binary)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 50 Data size: 3200 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: binary)
- sort order: +
- Map-reduce partition columns: _col0 (type: binary)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 50 Data size: 3200 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: binary)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col1 (type: bigint), _col0 (type: binary)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
- Stage: Stage-2
- Map Reduce
- Map Operator Tree:
- TableScan
- TableScan Vectorization:
- native: true
- Reduce Output Operator
- key expressions: _col1 (type: binary)
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
- inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: binary)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT count(*), bin
-FROM hundredorc
-GROUP BY bin
-order by bin
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT count(*), bin
-FROM hundredorc
-GROUP BY bin
-order by bin
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-5 american history
-5 biology
-2 chemistry
-2 debate
-4 education
-5 forestry
-4 geology
-5 history
-6 industrial engineering
-3 joggying
-5 kindergarten
-1 linguistics
-9 mathematics
-8 nap time
-1 opthamology
-2 philosophy
-5 quiet hour
-4 religion
-3 study skills
-7 topology
-1 undecided
-2 values clariffication
-3 wind surfing
-3 xylophone band
-2 yard duty
-3 zync studies
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
-SELECT t1.i, t1.bin, t2.bin
-FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
-SELECT t1.i, t1.bin, t2.bin
-FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hundredorc
-#### A masked pattern was here ####
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-4 is a root stage
- Stage-3 depends on stages: Stage-4
- Stage-0 depends on stages: Stage-3
-
-STAGE PLANS:
- Stage: Stage-4
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_0:t1
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_0:t1
- TableScan
- alias: t1
- filterExpr: i is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: i is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: i (type: int), bin (type: binary)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
- Stage: Stage-3
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: t2
- filterExpr: i is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 2:int)
- predicate: i is not null (type: boolean)
- Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: i (type: int), bin (type: binary)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [2, 10]
- Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Map Join Vectorization:
- bigTableKeyExpressions: col 2:int
- bigTableValueExpressions: col 10:binary
- className: VectorMapJoinOperator
- native: false
- nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 125 Data size: 14500 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary)
- outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2]
- Statistics: Num rows: 125 Data size: 14500 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 125 Data size: 14500 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- Local Work:
- Map Reduce Local Work
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-