You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/12/12 00:00:23 UTC
[25/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather
column stats by default (Zoltan Haindrich,
Pengcheng Xiong via Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out
index 57eead0..14f1efe 100644
--- a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out
@@ -111,7 +111,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -192,7 +193,7 @@ STAGE PLANS:
name: default.srcbucket_mapjoin_part
Truncated Path -> Alias:
/srcbucket_mapjoin_part/ds=2008-04-08 [a]
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: b
@@ -319,6 +320,53 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-2
Dependency Collection
@@ -355,6 +403,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(b)*/ a.key, a.value, b.value
@@ -484,7 +537,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -565,7 +619,7 @@ STAGE PLANS:
name: default.srcbucket_mapjoin_part
Truncated Path -> Alias:
/srcbucket_mapjoin_part/ds=2008-04-08 [a]
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: b
@@ -671,7 +725,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -692,6 +746,53 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-2
Dependency Collection
@@ -705,7 +806,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -728,6 +829,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
@@ -874,7 +980,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -955,7 +1062,7 @@ STAGE PLANS:
name: default.srcbucket_mapjoin_part
Truncated Path -> Alias:
/srcbucket_mapjoin_part/ds=2008-04-08 [a]
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: b
@@ -1110,7 +1217,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -1131,6 +1238,53 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 61 Data size: 17884 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-2
Dependency Collection
@@ -1144,7 +1298,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -1167,6 +1321,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(b)*/ a.key, a.value, b.value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
index 184e890..161631a 100644
--- a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
@@ -135,7 +135,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -216,7 +217,7 @@ STAGE PLANS:
name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
/srcbucket_mapjoin_part_2/ds=2008-04-08 [a]
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: b
@@ -343,6 +344,53 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-2
Dependency Collection
@@ -379,6 +427,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(b)*/ a.key, a.value, b.value
@@ -508,7 +561,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -589,7 +643,7 @@ STAGE PLANS:
name: default.srcbucket_mapjoin_part_2
Truncated Path -> Alias:
/srcbucket_mapjoin_part_2/ds=2008-04-08 [a]
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: b
@@ -695,7 +749,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -716,6 +770,53 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-2
Dependency Collection
@@ -729,7 +830,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -752,6 +853,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out
index b353073..c07f722 100644
--- a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out
@@ -135,7 +135,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -214,7 +215,7 @@ STAGE PLANS:
name: default.srcbucket_mapjoin
Truncated Path -> Alias:
/srcbucket_mapjoin [a]
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: b
@@ -339,6 +340,53 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-2
Dependency Collection
@@ -375,6 +423,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(b)*/ a.key, a.value, b.value
@@ -492,7 +545,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -571,7 +625,7 @@ STAGE PLANS:
name: default.srcbucket_mapjoin
Truncated Path -> Alias:
/srcbucket_mapjoin [a]
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: b
@@ -675,7 +729,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -696,6 +750,53 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-2
Dependency Collection
@@ -709,7 +810,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -732,6 +833,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select /*+mapjoin(a)*/ a.key, a.value, b.value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
index b907c2d..ce24832 100644
--- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
@@ -101,25 +101,26 @@ STAGE PLANS:
Edges:
Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -127,14 +128,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: b
- Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -144,16 +145,16 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col4
input vertices:
0 Map 1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -163,15 +164,51 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
@@ -191,6 +228,10 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table3
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, concat(a.value, b.value)
@@ -281,25 +322,26 @@ STAGE PLANS:
Edges:
Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 20 Data size: 3900 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 5580 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 19 Data size: 3705 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 5580 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -307,14 +349,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: b
- Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -324,16 +366,16 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col4
input vertices:
0 Map 1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -343,15 +385,51 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: PARTIAL
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
@@ -371,6 +449,10 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table3
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, concat(a.value, b.value)
@@ -485,25 +567,26 @@ STAGE PLANS:
Edges:
Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -511,14 +594,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: b
- Statistics: Num rows: 168 Data size: 31740 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 160 Data size: 30228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 160 Data size: 30228 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 168 Data size: 46704 Basic stats: COMPLETE Column stats: PARTIAL
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -528,16 +611,16 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col4
input vertices:
0 Map 1
- Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 168 Data size: 31080 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -547,15 +630,51 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 176 Data size: 33250 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 168 Data size: 31584 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Statistics: Num rows: 168 Data size: 45864 Basic stats: COMPLETE Column stats: PARTIAL
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: PARTIAL
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: PARTIAL
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
@@ -575,6 +694,10 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table3
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, concat(a.value, b.value)
@@ -695,25 +818,26 @@ STAGE PLANS:
Edges:
Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: test_table1
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -721,14 +845,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: test_table2
- Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -738,16 +862,16 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col3
input vertices:
0 Map 1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -757,15 +881,51 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
@@ -785,6 +945,10 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table3
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, concat(a.value, b.value)
@@ -887,25 +1051,26 @@ STAGE PLANS:
Edges:
Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: test_table1
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), concat(value, value) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -913,14 +1078,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: test_table2
- Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), concat(value, value) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -930,16 +1095,16 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col3
input vertices:
0 Map 1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 31248 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -949,15 +1114,51 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
@@ -977,6 +1178,10 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table3
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, concat(a.v1, b.v2)
@@ -1079,25 +1284,26 @@ STAGE PLANS:
Edges:
Map 2 <- Map 1 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: test_table1
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 10 Data size: 1950 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -1105,14 +1311,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: test_table2
- Statistics: Num rows: 84 Data size: 15964 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 80 Data size: 15203 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -1122,16 +1328,16 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col3
input vertices:
0 Map 1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15540 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: (_col0 + _col0) (type: int), concat(_col1, _col3) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -1141,15 +1347,51 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 88 Data size: 16723 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Statistics: Num rows: 84 Data size: 22932 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
@@ -1169,6 +1411,10 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table3
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key+a.key, concat(a.value, b.value)