You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/12/12 00:00:42 UTC
[44/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather
column stats by default (Zoltan Haindrich,
Pengcheng Xiong via Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby2_map_skew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby2_map_skew.q.out b/ql/src/test/results/clientpositive/groupby2_map_skew.q.out
index bb5a09f..02f5b47 100644
--- a/ql/src/test/results/clientpositive/groupby2_map_skew.q.out
+++ b/ql/src/test/results/clientpositive/groupby2_map_skew.q.out
@@ -18,7 +18,8 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
- Stage-3 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-0, Stage-4
+ Stage-4 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -86,6 +87,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ outputColumnNames: key, c1, c2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -100,6 +116,32 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, c1, c2
+ Column Types: string, int, string
+ Table: default.dest1
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby2_noskew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby2_noskew.q.out b/ql/src/test/results/clientpositive/groupby2_noskew.q.out
index 782a1a2..29a71f1 100644
--- a/ql/src/test/results/clientpositive/groupby2_noskew.q.out
+++ b/ql/src/test/results/clientpositive/groupby2_noskew.q.out
@@ -17,7 +17,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -54,6 +55,16 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_g2
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+ outputColumnNames: key, c1, c2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -68,6 +79,32 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, c1, c2
+ Column Types: string, int, string
+ Table: default.dest_g2
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: string), c1 (type: int), c2 (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll')
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out
index 9a070ac..f1ce838 100644
--- a/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out
@@ -17,7 +17,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -55,6 +56,16 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_g2
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int)
+ outputColumnNames: key, c1, c2, c3, c4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -69,6 +80,32 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, c1, c2, c3, c4
+ Column Types: string, int, string, int, int
+ Table: default.dest_g2
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll')
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby3.q.out b/ql/src/test/results/clientpositive/groupby3.q.out
index 2f2b533..7c97174 100644
--- a/ql/src/test/results/clientpositive/groupby3.q.out
+++ b/ql/src/test/results/clientpositive/groupby3.q.out
@@ -36,7 +36,8 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
- Stage-3 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-0, Stage-4
+ Stage-4 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -73,6 +74,7 @@ STAGE PLANS:
TableScan
Reduce Output Operator
sort order:
+ Map-reduce partition columns: rand() (type: double)
Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: double), _col1 (type: struct<count:bigint,sum:double,input:string>), _col2 (type: struct<count:bigint,sum:double,input:string>), _col3 (type: string), _col4 (type: string), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: struct<count:bigint,sum:double,variance:double>)
Reduce Operator Tree:
@@ -93,6 +95,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double)
+ outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9
+ Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll')
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -107,6 +124,32 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9
+ Column Types: double, double, double, double, double, double, double, double, double
+ Table: default.dest1
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col6 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col7 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col8 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby3_map.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby3_map.q.out b/ql/src/test/results/clientpositive/groupby3_map.q.out
index 1bbf23d..edad22b 100644
--- a/ql/src/test/results/clientpositive/groupby3_map.q.out
+++ b/ql/src/test/results/clientpositive/groupby3_map.q.out
@@ -77,6 +77,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double)
+ outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9
+ Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll')
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col6 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col7 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdis
tinctvalues:bigint,ndvbitvector:binary>), _col8 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Move Operator
@@ -91,6 +111,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9
+ Column Types: double, double, double, double, double, double, double, double, double
+ Table: default.dest1
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out
index 248a317..2034464 100644
--- a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out
@@ -81,6 +81,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double)
+ outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11
+ Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll')
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col6 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col7 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdis
tinctvalues:bigint,ndvbitvector:binary>), _col8 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col9 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col10 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Move Operator
@@ -95,6 +115,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11
+ Column Types: double, double, double, double, double, double, double, double, double, double, double
+ Table: default.dest1
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby3_map_skew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out
index 8ba3a58..e53e62c 100644
--- a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out
+++ b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out
@@ -100,6 +100,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double)
+ outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9
+ Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll')
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col6 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col7 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdis
tinctvalues:bigint,ndvbitvector:binary>), _col8 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Move Operator
@@ -114,6 +134,10 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9
+ Column Types: double, double, double, double, double, double, double, double, double
+ Table: default.dest1
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby3_noskew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/groupby3_noskew.q.out
index 5c520bb..1aa4cb6 100644
--- a/ql/src/test/results/clientpositive/groupby3_noskew.q.out
+++ b/ql/src/test/results/clientpositive/groupby3_noskew.q.out
@@ -70,6 +70,22 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double)
+ outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll')
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Move Operator
@@ -84,6 +100,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9
+ Column Types: double, double, double, double, double, double, double, double, double
+ Table: default.dest1
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out
index c5d121b..bb964e6 100644
--- a/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out
@@ -74,6 +74,22 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double)
+ outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll')
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 1 Data size: 5008 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 5008 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Move Operator
@@ -88,6 +104,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11
+ Column Types: double, double, double, double, double, double, double, double, double, double, double
+ Table: default.dest1
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby4.q.out b/ql/src/test/results/clientpositive/groupby4.q.out
index e8e7b88..34f2099 100644
--- a/ql/src/test/results/clientpositive/groupby4.q.out
+++ b/ql/src/test/results/clientpositive/groupby4.q.out
@@ -18,7 +18,9 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
- Stage-3 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-0, Stage-5
+ Stage-4 depends on stages: Stage-2
+ Stage-5 depends on stages: Stage-4
STAGE PLANS:
Stage: Stage-1
@@ -72,6 +74,16 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: c1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -86,6 +98,54 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: c1
+ Column Types: string
+ Table: default.dest1
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: rand() (type: double)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: c1 (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0, 'hll')
+ mode: partial1
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0)
+ mode: final
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby4_map.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby4_map.q.out b/ql/src/test/results/clientpositive/groupby4_map.q.out
index 647242a..4ed785f 100644
--- a/ql/src/test/results/clientpositive/groupby4_map.q.out
+++ b/ql/src/test/results/clientpositive/groupby4_map.q.out
@@ -53,6 +53,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: key
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll')
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Move Operator
@@ -67,6 +87,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key
+ Column Types: int
+ Table: default.dest1
PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1)
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby4_map_skew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby4_map_skew.q.out b/ql/src/test/results/clientpositive/groupby4_map_skew.q.out
index 0857f22..e6ae7e8 100644
--- a/ql/src/test/results/clientpositive/groupby4_map_skew.q.out
+++ b/ql/src/test/results/clientpositive/groupby4_map_skew.q.out
@@ -53,6 +53,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: key
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll')
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Move Operator
@@ -67,6 +87,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key
+ Column Types: int
+ Table: default.dest1
PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1)
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby4_noskew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby4_noskew.q.out b/ql/src/test/results/clientpositive/groupby4_noskew.q.out
index 5827ef0..0fa7046 100644
--- a/ql/src/test/results/clientpositive/groupby4_noskew.q.out
+++ b/ql/src/test/results/clientpositive/groupby4_noskew.q.out
@@ -17,7 +17,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -49,6 +50,16 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: c1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -63,6 +74,32 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: c1
+ Column Types: string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: c1 (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0, 'hll')
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby5.q.out b/ql/src/test/results/clientpositive/groupby5.q.out
index 995d30e..1677ab3 100644
--- a/ql/src/test/results/clientpositive/groupby5.q.out
+++ b/ql/src/test/results/clientpositive/groupby5.q.out
@@ -22,7 +22,9 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
- Stage-3 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-0, Stage-5
+ Stage-4 depends on stages: Stage-2
+ Stage-5 depends on stages: Stage-4
STAGE PLANS:
Stage: Stage-1
@@ -84,6 +86,16 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -98,6 +110,54 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest1
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: rand() (type: double)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: int), value (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll')
+ mode: partial1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: final
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE dest1
SELECT src.key, sum(substr(src.value,5))
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby5_map.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby5_map.q.out b/ql/src/test/results/clientpositive/groupby5_map.q.out
index 393c4e2..d04eb44 100644
--- a/ql/src/test/results/clientpositive/groupby5_map.q.out
+++ b/ql/src/test/results/clientpositive/groupby5_map.q.out
@@ -55,6 +55,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: key
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll')
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Move Operator
@@ -69,6 +89,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key
+ Column Types: int
+ Table: default.dest1
PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key)
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby5_map_skew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby5_map_skew.q.out b/ql/src/test/results/clientpositive/groupby5_map_skew.q.out
index 67767f5..6fa3fc1 100644
--- a/ql/src/test/results/clientpositive/groupby5_map_skew.q.out
+++ b/ql/src/test/results/clientpositive/groupby5_map_skew.q.out
@@ -55,6 +55,26 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: key
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll')
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Move Operator
@@ -69,6 +89,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key
+ Column Types: int
+ Table: default.dest1
PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key)
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby5_noskew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby5_noskew.q.out b/ql/src/test/results/clientpositive/groupby5_noskew.q.out
index dfecdb5..9cd1ccc 100644
--- a/ql/src/test/results/clientpositive/groupby5_noskew.q.out
+++ b/ql/src/test/results/clientpositive/groupby5_noskew.q.out
@@ -21,7 +21,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -59,6 +60,16 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -73,6 +84,32 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: int), value (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll')
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE dest1
SELECT src.key, sum(substr(src.value,5))
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby6.q.out b/ql/src/test/results/clientpositive/groupby6.q.out
index afc23ec..a8cd738 100644
--- a/ql/src/test/results/clientpositive/groupby6.q.out
+++ b/ql/src/test/results/clientpositive/groupby6.q.out
@@ -18,7 +18,9 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
- Stage-3 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-0, Stage-5
+ Stage-4 depends on stages: Stage-2
+ Stage-5 depends on stages: Stage-4
STAGE PLANS:
Stage: Stage-1
@@ -72,6 +74,16 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: c1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -86,6 +98,54 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: c1
+ Column Types: string
+ Table: default.dest1
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: rand() (type: double)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: c1 (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0, 'hll')
+ mode: partial1
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0)
+ mode: final
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby6_map.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby6_map.q.out b/ql/src/test/results/clientpositive/groupby6_map.q.out
index c0c8841..1d89c1a 100644
--- a/ql/src/test/results/clientpositive/groupby6_map.q.out
+++ b/ql/src/test/results/clientpositive/groupby6_map.q.out
@@ -17,7 +17,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -54,6 +55,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: c1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 'hll')
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -68,6 +84,32 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: c1
+ Column Types: string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby6_map_skew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby6_map_skew.q.out b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out
index 5bb8695..bc7ee51 100644
--- a/ql/src/test/results/clientpositive/groupby6_map_skew.q.out
+++ b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out
@@ -18,7 +18,8 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
- Stage-3 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-0, Stage-4
+ Stage-4 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -77,6 +78,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: c1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 'hll')
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -91,6 +107,32 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: c1
+ Column Types: string
+ Table: default.dest1
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0)
+ mode: final
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby6_noskew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby6_noskew.q.out b/ql/src/test/results/clientpositive/groupby6_noskew.q.out
index 0cafbd1..bf818a0 100644
--- a/ql/src/test/results/clientpositive/groupby6_noskew.q.out
+++ b/ql/src/test/results/clientpositive/groupby6_noskew.q.out
@@ -17,7 +17,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -49,6 +50,16 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: c1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -63,6 +74,32 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: c1
+ Column Types: string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: c1 (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0, 'hll')
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby7_map.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby7_map.q.out b/ql/src/test/results/clientpositive/groupby7_map.q.out
index b7b2c8c..76d1aa4 100644
--- a/ql/src/test/results/clientpositive/groupby7_map.q.out
+++ b/ql/src/test/results/clientpositive/groupby7_map.q.out
@@ -27,10 +27,12 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-0 depends on stages: Stage-2
- Stage-3 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-0, Stage-4, Stage-7
Stage-4 depends on stages: Stage-2
- Stage-1 depends on stages: Stage-4
- Stage-5 depends on stages: Stage-1
+ Stage-6 depends on stages: Stage-1, Stage-4, Stage-7
+ Stage-5 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-5
+ Stage-7 depends on stages: Stage-5
STAGE PLANS:
Stage: Stage-2
@@ -90,6 +92,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: true
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -104,12 +121,46 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest1
Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: true
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest2
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
@@ -134,6 +185,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: true
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-1
Move Operator
@@ -145,9 +211,27 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
- Stage: Stage-5
- Stats Work
- Basic Stats Work:
+ Stage: Stage-7
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: true
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: FROM SRC
INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out
index 6f88027..4c47797 100644
--- a/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out
+++ b/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out
@@ -27,9 +27,11 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-0 depends on stages: Stage-2
- Stage-3 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-0, Stage-4, Stage-6
+ Stage-4 depends on stages: Stage-2
+ Stage-5 depends on stages: Stage-1, Stage-4, Stage-6
Stage-1 depends on stages: Stage-2
- Stage-4 depends on stages: Stage-1
+ Stage-6 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-2
@@ -69,6 +71,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: true
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Group By Operator
aggregations: sum(VALUE._col0)
keys: KEY._col0 (type: string)
@@ -87,6 +104,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: true
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -101,6 +133,40 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest1
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: true
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-5
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest2
Stage: Stage-1
Move Operator
@@ -112,9 +178,27 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
- Stage: Stage-4
- Stats Work
- Basic Stats Work:
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: true
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: FROM SRC
INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key