You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2018/10/21 19:47:08 UTC
[2/7] hive git commit: HIVE-20703: Put dynamic sort partition
optimization under cost based decision (Vineet Garg,
reviewed by Prasanth Jayachandran, Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out
index 7cd5ba4..a912216 100644
--- a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out
@@ -76,52 +76,28 @@ STAGE PLANS:
expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.orcfile_merge1_n0
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string)
- outputColumnNames: key, value, ds, part
- Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
- keys: ds (type: string), part (type: string)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ value expressions: _col0 (type: int), _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
- Group By Operator
- aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
- keys: KEY._col0 (type: string), KEY._col1 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ Dp Sort State: PARTITION_SORTED
+ Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1_n0
Stage: Stage-2
Dependency Collection
@@ -164,7 +140,7 @@ POSTHOOK: Lineage: orcfile_merge1_n0 PARTITION(ds=1,part=0).key EXPRESSION [(src
POSTHOOK: Lineage: orcfile_merge1_n0 PARTITION(ds=1,part=0).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: orcfile_merge1_n0 PARTITION(ds=1,part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: orcfile_merge1_n0 PARTITION(ds=1,part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-Found 6 items
+Found 1 items
#### A masked pattern was here ####
PREHOOK: query: EXPLAIN
INSERT OVERWRITE TABLE orcfile_merge1b_n0 PARTITION (ds='1', part)
@@ -207,52 +183,28 @@ STAGE PLANS:
expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.orcfile_merge1b_n0
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string)
- outputColumnNames: key, value, ds, part
- Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
- keys: ds (type: string), part (type: string)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ value expressions: _col0 (type: int), _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
- Group By Operator
- aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
- keys: KEY._col0 (type: string), KEY._col1 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ Dp Sort State: PARTITION_SORTED
+ Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1b_n0
Stage: Stage-8
Conditional Operator
@@ -383,52 +335,28 @@ STAGE PLANS:
expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.orcfile_merge1c_n0
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string)
- outputColumnNames: key, value, ds, part
- Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
- keys: ds (type: string), part (type: string)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ value expressions: _col0 (type: int), _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
- Group By Operator
- aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
- keys: KEY._col0 (type: string), KEY._col1 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ Dp Sort State: PARTITION_SORTED
+ Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orcfile_merge1c_n0
Stage: Stage-8
Conditional Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out
index 5c5bc8e..ee13faf 100644
--- a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out
@@ -52,52 +52,28 @@ STAGE PLANS:
expressions: userid (type: bigint), string1 (type: string), subtype (type: double), decimal1 (type: decimal(38,0)), ts (type: timestamp), subtype (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.orc_merge5a
- Select Operator
- expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: timestamp), _col5 (type: double)
- outputColumnNames: userid, string1, subtype, decimal1, ts, st
- Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll')
- keys: st (type: double)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: double)
- sort order: +
- Map-reduce partition columns: _col0 (type: double)
- Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,min:decimal(38,0),max:decimal(38,0),countnulls:bigint,bitvector:binary>), _col5 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Output Operator
+ key expressions: _col5 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col5 (type: double)
+ value expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(38,0)), _col4 (type: timestamp)
Execution mode: llap
LLAP IO: all inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
- Group By Operator
- aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4)
- keys: KEY._col0 (type: double)
- mode: mergepartial
+ Select Operator
+ expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: string), VALUE._col2 (type: double), VALUE._col3 (type: decimal(38,0)), VALUE._col4 (type: timestamp), KEY._col5 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,min:decimal(38,0),max:decimal(38,0),countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col5 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: double)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ File Output Operator
+ compressed: false
+ Dp Sort State: PARTITION_SORTED
Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orc_merge5a
Stage: Stage-2
Dependency Collection
http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
index 9d7b988..d981733 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
@@ -2594,24 +2594,16 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint), _col1 (type: bigint)
- Select Operator
- expressions: _col1 (type: double)
- outputColumnNames: _col1
- Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: floor(_col1) (type: bigint)
+ Group By Operator
+ keys: _col0 (type: bigint)
+ mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: _col0 (type: bigint)
- mode: hash
- outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: bigint)
- sort order: +
- Map-reduce partition columns: _col0 (type: bigint)
- Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 6
Execution mode: vectorized, llap
Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/tez_dml.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/tez_dml.q.out b/ql/src/test/results/clientpositive/llap/tez_dml.q.out
index 67d4bf3..a9b3172 100644
--- a/ql/src/test/results/clientpositive/llap/tez_dml.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_dml.q.out
@@ -466,52 +466,28 @@ STAGE PLANS:
expressions: value (type: string), cnt (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.tmp_src_part
- Select Operator
- expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
- outputColumnNames: c, d
- Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: compute_stats(c, 'hll')
- keys: d (type: int)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
- Execution mode: llap
+ Reduce Output Operator
+ key expressions: _col1 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: bigint)
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
- Group By Operator
- aggregations: compute_stats(VALUE._col0)
- keys: KEY._col0 (type: int)
- mode: mergepartial
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY._col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 154 Data size: 28228 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 154 Data size: 28228 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 154 Data size: 28228 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Dp Sort State: PARTITION_SORTED
+ Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.tmp_src_part
Stage: Stage-2
Dependency Collection
http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
index ca8232e..28977d7 100644
--- a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
@@ -1316,7 +1316,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: web_sales
- Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 3511604 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Select Operator
@@ -1326,7 +1326,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [16]
- Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 3511604 Basic stats: COMPLETE Column stats: NONE
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
@@ -1338,7 +1338,7 @@ STAGE PLANS:
keys: ws_order_number (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 170 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 3511604 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
@@ -1347,7 +1347,7 @@ STAGE PLANS:
className: VectorReduceSinkLongOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 170 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2000 Data size: 3511604 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -1379,7 +1379,7 @@ STAGE PLANS:
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 170 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 1755802 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(_col0)
Group By Vectorization:
@@ -1391,14 +1391,14 @@ STAGE PLANS:
projectedOutputColumnNums: [0]
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
Reducer 3
Execution mode: vectorized, llap
@@ -1420,13 +1420,13 @@ STAGE PLANS:
projectedOutputColumnNums: [0]
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat