You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/04/26 02:27:57 UTC
[3/5] hive git commit: HIVE-16510: Vectorization: Add vectorized PTF
tests in preparation for HIVE-16369 (Matt McCline,
reviewed by Gopal Vijayaraghavan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b271bcb7/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
index 6bd8a29..df4b0d8 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
@@ -104,7 +104,7 @@ POSTHOOK: Lineage: part_orc.p_partkey SIMPLE [(part_staging)part_staging.FieldSc
POSTHOOK: Lineage: part_orc.p_retailprice SIMPLE [(part_staging)part_staging.FieldSchema(name:p_retailprice, type:double, comment:null), ]
POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchema(name:p_size, type:int, comment:null), ]
POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ]
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -114,7 +114,7 @@ from noop(on part_orc
order by p_name
)
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -146,16 +146,19 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_size (type: int), p_retailprice (type: double)
- auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -166,60 +169,13 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
Execution mode: llap
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
@@ -246,16 +202,12 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: _col5 (type: int), _col7 (type: double)
- auto parallelism: true
Reducer 3
Execution mode: llap
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
@@ -306,26 +258,11 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3,_col4,_col5
- columns.types string:string:int:int:int:double
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -381,14 +318,14 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004
Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j
distribute by j.p_mfgr
sort by j.p_name)
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j
@@ -418,20 +355,26 @@ STAGE PLANS:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Filter Operator
- isSamplingPred: false
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: p_partkey (type: int)
- null sort order: a
sort order: +
Map-reduce partition columns: p_partkey (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: 0
value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
- auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -442,75 +385,35 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [p1]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0, 1, 2, 5]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Map 5
Map Operator Tree:
TableScan
alias: p2
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Filter Operator
- isSamplingPred: false
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: p_partkey (type: int)
- null sort order: a
sort order: +
Map-reduce partition columns: p_partkey (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -521,60 +424,13 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [p2]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
Execution mode: llap
- Needs Tagging: false
Reduce Operator Tree:
Merge Join Operator
condition map:
@@ -583,20 +439,15 @@ STAGE PLANS:
0 p_partkey (type: int)
1 p_partkey (type: int)
outputColumnNames: _col1, _col2, _col5
- Position of Big Table: 0
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: _col5 (type: int)
- auto parallelism: true
Reducer 3
Execution mode: llap
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
@@ -623,16 +474,12 @@ STAGE PLANS:
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: _col5 (type: int)
- auto parallelism: true
Reducer 4
Execution mode: llap
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
@@ -670,26 +517,11 @@ STAGE PLANS:
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3
- columns.types string:string:int:int
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -741,13 +573,13 @@ Manufacturer#5 almond antique medium spring khaki 6 -25
Manufacturer#5 almond antique sky peru orange 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 46 44
Manufacturer#5 almond azure blanched chiffon midnight 23 -23
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name)
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
@@ -774,16 +606,19 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_size (type: int)
- auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -794,60 +629,13 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
Execution mode: llap
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
@@ -878,26 +666,11 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2
- columns.types string:string:int
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -945,7 +718,7 @@ Manufacturer#5 almond antique medium spring khaki 6
Manufacturer#5 almond antique sky peru orange 2
Manufacturer#5 almond aquamarine dodger light gainsboro 46
Manufacturer#5 almond azure blanched chiffon midnight 23
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -955,7 +728,7 @@ from noop(on part_orc
order by p_name
) abc
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -987,16 +760,19 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_size (type: int), p_retailprice (type: double)
- auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -1007,60 +783,13 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
Execution mode: llap
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
@@ -1087,16 +816,12 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: _col5 (type: int), _col7 (type: double)
- auto parallelism: true
Reducer 3
Execution mode: llap
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
@@ -1147,26 +872,11 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3,_col4,_col5
- columns.types string:string:int:int:int:double
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -1222,7 +932,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35
Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -1232,7 +942,7 @@ from noop(on part_orc
order by p_name
)
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -1264,16 +974,19 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_size (type: int)
- auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -1284,60 +997,13 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
Execution mode: llap
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
@@ -1364,16 +1030,12 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: _col5 (type: int)
- auto parallelism: true
Reducer 3
Execution mode: llap
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
@@ -1425,26 +1087,11 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
- columns.types string:string:int:int:int:int:int
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -1500,7 +1147,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25
Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -1511,7 +1158,7 @@ from noop(on part_orc
)
group by p_mfgr, p_name, p_size
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -1544,16 +1191,19 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_size (type: int)
- auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -1564,60 +1214,13 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
Execution mode: llap
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
@@ -1653,15 +1256,11 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
- null sort order: aaa
sort order: +++
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- auto parallelism: true
Reducer 3
Execution mode: llap
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
@@ -1714,26 +1313,11 @@ STAGE PLANS:
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
- columns.types string:string:int:int:int:int:int
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -1790,14 +1374,14 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25
Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select abc.*
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select abc.*
from noop(on part_orc
partition by p_mfgr
@@ -1826,16 +1410,19 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
- auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -1846,75 +1433,35 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Map 4
Map Operator Tree:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Filter Operator
- isSamplingPred: false
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: p_partkey (type: int)
- null sort order: a
sort order: +
Map-reduce partition columns: p_partkey (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -1925,60 +1472,13 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [p1]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
Execution mode: llap
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
@@ -2004,21 +1504,16 @@ STAGE PLANS:
raw input shape:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- isSamplingPred: false
predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
- null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: 0
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
- auto parallelism: true
Reducer 3
Execution mode: llap
- Needs Tagging: false
Reduce Operator Tree:
Merge Join Operator
condition map:
@@ -2027,30 +1522,14 @@ STAGE PLANS:
0 _col0 (type: int)
1 p_partkey (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Position of Big Table: 0
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8
- columns.types int:string:string:string:string:int:string:double:string
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -2102,14 +1581,14 @@ POSTHOOK: Input: default@part_orc
85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select abc.*
from part_orc p1 join noop(on part_orc
partition by p_mfgr
order by p_name
) abc on abc.p_partkey = p1.p_partkey
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select abc.*
from part_orc p1 join noop(on part_orc
partition by p_mfgr
@@ -2138,19 +1617,25 @@ STAGE PLANS:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Filter Operator
- isSamplingPred: false
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: p_partkey (type: int)
- null sort order: a
sort order: +
Map-reduce partition columns: p_partkey (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -2161,72 +1646,29 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [p1]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Map 3
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
- auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -2237,60 +1679,13 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
Execution mode: llap
- Needs Tagging: false
Reduce Operator Tree:
Merge Join Operator
condition map:
@@ -2299,7 +1694,6 @@ STAGE PLANS:
0 p_partkey (type: int)
1 _col0 (type: int)
outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
- Position of Big Table: 1
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string)
@@ -2307,29 +1701,13 @@ STAGE PLANS:
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8
- columns.types int:string:string:string:string:int:string:double:string
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Reducer 4
Execution mode: llap
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
@@ -2355,18 +1733,14 @@ STAGE PLANS:
raw input shape:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- isSamplingPred: false
predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
- null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: 1
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
- auto parallelism: true
Stage: Stage-0
Fetch Operator
@@ -2418,14 +1792,14 @@ POSTHOOK: Input: default@part_orc
85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name, p_size desc) as r
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name, p_size desc)
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name, p_size desc) as r
from noopwithmap(on part_orc
@@ -2454,7 +1828,6 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
PTF Operator
Function definitions:
Input definition
@@ -2473,12 +1846,9 @@ STAGE PLANS:
Map-side function: true
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int)
- null sort order: aaz
sort order: ++-
Map-reduce partition columns: p_mfgr (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- auto parallelism: true
Execution mode: llap
LLAP IO: all inputs
Map Vectorization:
@@ -2487,60 +1857,8 @@ STAGE PLANS:
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
notVectorizedReason: PTF Operator (PTF) not supported
vectorized: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize
<TRUNCATED>