You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/04/26 02:27:56 UTC
[2/5] hive git commit: HIVE-16510: Vectorization: Add vectorized PTF
tests in preparation for HIVE-16369 (Matt McCline,
reviewed by Gopal Vijayaraghavan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b271bcb7/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
index 8b313c0..3e6a73f 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
@@ -104,7 +104,7 @@ POSTHOOK: Lineage: part_orc.p_partkey SIMPLE [(part_staging)part_staging.FieldSc
POSTHOOK: Lineage: part_orc.p_retailprice SIMPLE [(part_staging)part_staging.FieldSchema(name:p_retailprice, type:double, comment:null), ]
POSTHOOK: Lineage: part_orc.p_size SIMPLE [(part_staging)part_staging.FieldSchema(name:p_size, type:int, comment:null), ]
POSTHOOK: Lineage: part_orc.p_type SIMPLE [(part_staging)part_staging.FieldSchema(name:p_type, type:string, comment:null), ]
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -114,7 +114,7 @@ from noop(on part_orc
order by p_name
)
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -145,16 +145,19 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_size (type: int), p_retailprice (type: double)
- auto parallelism: false
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -164,59 +167,12 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -243,15 +199,11 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: _col5 (type: int), _col7 (type: double)
- auto parallelism: false
Reducer 3
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -302,26 +254,11 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3,_col4,_col5
- columns.types string:string:int:int:int:double
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -377,14 +314,14 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004
Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j
distribute by j.p_mfgr
sort by j.p_name)
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name,
p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz
from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j
@@ -413,20 +350,26 @@ STAGE PLANS:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Filter Operator
- isSamplingPred: false
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: p_partkey (type: int)
- null sort order: a
sort order: +
Map-reduce partition columns: p_partkey (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: 0
value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
- auto parallelism: false
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -436,75 +379,35 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [p1]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0, 1, 2, 5]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Map 5
Map Operator Tree:
TableScan
alias: p2
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Filter Operator
- isSamplingPred: false
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: p_partkey (type: int)
- null sort order: a
sort order: +
Map-reduce partition columns: p_partkey (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: false
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -514,59 +417,12 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [p2]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
- Needs Tagging: true
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -583,15 +439,11 @@ STAGE PLANS:
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: _col5 (type: int)
- auto parallelism: false
Reducer 3
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -618,15 +470,11 @@ STAGE PLANS:
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: _col5 (type: int)
- auto parallelism: false
Reducer 4
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -664,26 +512,11 @@ STAGE PLANS:
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3
- columns.types string:string:int:int
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -735,13 +568,13 @@ Manufacturer#5 almond antique medium spring khaki 6 -25
Manufacturer#5 almond antique sky peru orange 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 46 44
Manufacturer#5 almond azure blanched chiffon midnight 23 -23
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
order by p_name)
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size
from noop(on part_orc
partition by p_mfgr
@@ -767,16 +600,19 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_size (type: int)
- auto parallelism: false
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -786,59 +622,12 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -869,26 +658,11 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2
- columns.types string:string:int
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -936,7 +710,7 @@ Manufacturer#5 almond antique medium spring khaki 6
Manufacturer#5 almond antique sky peru orange 2
Manufacturer#5 almond aquamarine dodger light gainsboro 46
Manufacturer#5 almond azure blanched chiffon midnight 23
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -946,7 +720,7 @@ from noop(on part_orc
order by p_name
) abc
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -977,16 +751,19 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_size (type: int), p_retailprice (type: double)
- auto parallelism: false
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -996,59 +773,12 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5, 7]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -1075,15 +805,11 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: _col5 (type: int), _col7 (type: double)
- auto parallelism: false
Reducer 3
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -1134,26 +860,11 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3,_col4,_col5
- columns.types string:string:int:int:int:double
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -1209,7 +920,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.35
Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -1219,7 +930,7 @@ from noop(on part_orc
order by p_name
)
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -1250,16 +961,19 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_size (type: int)
- auto parallelism: false
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -1269,59 +983,12 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -1348,15 +1015,11 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: _col5 (type: int)
- auto parallelism: false
Reducer 3
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -1408,26 +1071,11 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
- columns.types string:string:int:int:int:int:int
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -1483,7 +1131,7 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25
Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -1494,7 +1142,7 @@ from noop(on part_orc
)
group by p_mfgr, p_name, p_size
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name) as r,
dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -1526,16 +1174,19 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_size (type: int)
- auto parallelism: false
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -1545,59 +1196,12 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [1, 2, 5]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -1633,14 +1237,10 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
- null sort order: aaa
sort order: +++
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- auto parallelism: false
Reducer 3
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -1693,26 +1293,11 @@ STAGE PLANS:
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
- columns.types string:string:int:int:int:int:int
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -1769,14 +1354,14 @@ Manufacturer#5 almond antique medium spring khaki 6 2 2 6 -25
Manufacturer#5 almond antique sky peru orange 2 3 3 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 46 44
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 23 -23
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select abc.*
from noop(on part_orc
partition by p_mfgr
order by p_name
) abc join part_orc p1 on abc.p_partkey = p1.p_partkey
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select abc.*
from noop(on part_orc
partition by p_mfgr
@@ -1804,16 +1389,19 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
- auto parallelism: false
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -1823,75 +1411,35 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Map 4
Map Operator Tree:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Filter Operator
- isSamplingPred: false
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: p_partkey (type: int)
- null sort order: a
sort order: +
Map-reduce partition columns: p_partkey (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: false
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -1901,59 +1449,12 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [p1]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -1979,20 +1480,15 @@ STAGE PLANS:
raw input shape:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- isSamplingPred: false
predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
- null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: 0
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
- auto parallelism: false
Reducer 3
- Needs Tagging: true
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -2009,26 +1505,11 @@ STAGE PLANS:
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8
- columns.types int:string:string:string:string:int:string:double:string
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Stage: Stage-0
Fetch Operator
@@ -2080,14 +1561,14 @@ POSTHOOK: Input: default@part_orc
85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select abc.*
from part_orc p1 join noop(on part_orc
partition by p_mfgr
order by p_name
) abc on abc.p_partkey = p1.p_partkey
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select abc.*
from part_orc p1 join noop(on part_orc
partition by p_mfgr
@@ -2115,19 +1596,25 @@ STAGE PLANS:
TableScan
alias: p1
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Filter Operator
- isSamplingPred: false
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: p_partkey (type: int)
- null sort order: a
sort order: +
Map-reduce partition columns: p_partkey (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: 0
- auto parallelism: false
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -2137,72 +1624,29 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [p1]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Map 3
Map Operator Tree:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string)
- null sort order: aa
sort order: ++
Map-reduce partition columns: p_mfgr (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
- auto parallelism: false
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -2212,59 +1656,12 @@ STAGE PLANS:
allNative: true
usesVectorUDFAdaptor: false
vectorized: true
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.part_orc
- name: default.part_orc
- Truncated Path -> Alias:
- /part_orc [part_orc]
+ rowBatchContext:
+ dataColumnCount: 9
+ includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ dataColumns: p_partkey:int, p_name:string, p_mfgr:string, p_brand:string, p_type:string, p_size:int, p_container:string, p_retailprice:double, p_comment:string
+ partitionColumnCount: 0
Reducer 2
- Needs Tagging: true
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -2285,28 +1682,12 @@ STAGE PLANS:
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8
- columns.types int:string:string:string:string:int:string:double:string
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
Reducer 4
- Needs Tagging: false
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
@@ -2332,18 +1713,14 @@ STAGE PLANS:
raw input shape:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- isSamplingPred: false
predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
- null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: 1
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
- auto parallelism: false
Stage: Stage-0
Fetch Operator
@@ -2395,14 +1772,14 @@ POSTHOOK: Input: default@part_orc
85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
-PREHOOK: query: explain vectorization extended
+PREHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name, p_size desc) as r
from noopwithmap(on part_orc
partition by p_mfgr
order by p_name, p_size desc)
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization extended
+POSTHOOK: query: explain vectorization detail
select p_mfgr, p_name, p_size,
rank() over (partition by p_mfgr order by p_name, p_size desc) as r
from noopwithmap(on part_orc
@@ -2430,7 +1807,6 @@ STAGE PLANS:
TableScan
alias: part_orc
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
PTF Operator
Function definitions:
Input definition
@@ -2449,71 +1825,16 @@ STAGE PLANS:
Map-side function: true
Reduce Output Operator
key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int)
- null sort order: aaz
sort order: ++-
Map-reduce partition columns: p_mfgr (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- auto parallelism: false
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
notVectorizedReason: PTF Operator (PTF) not supported
vectorized: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: part_orc
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2689
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- column.name.delimiter ,
- columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment
- columns.comments
- columns.types int:string:string:string:string:int:string:double:string
-#### A masked pattern was here ####
- name default.part_orc
- numFiles 1
- numRows 26
- rawDataSize 16042
- serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.ql.i
<TRUNCATED>