You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/12/12 00:00:37 UTC
[39/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather
column stats by default (Zoltan Haindrich,
Pengcheng Xiong via Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby_sort_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_2.q.out b/ql/src/test/results/clientpositive/groupby_sort_2.q.out
index 9d41436..25ed6b6 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_2.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_2.q.out
@@ -45,7 +45,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -90,6 +91,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.outputtbl1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int)
+ outputColumnNames: val, cnt
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(val, 'hll'), compute_stats(cnt, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -104,6 +120,32 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: val, cnt
+ Column Types: string, int
+ Table: default.outputtbl1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
SELECT val, count(1) FROM T1 GROUP BY val
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby_sort_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_3.q.out b/ql/src/test/results/clientpositive/groupby_sort_3.q.out
index 67b5322..be9a131 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_3.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_3.q.out
@@ -81,6 +81,32 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.outputtbl1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ outputColumnNames: key, val, cnt
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-7
Conditional Operator
@@ -104,6 +130,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, val, cnt
+ Column Types: string, string, int
+ Table: default.outputtbl1
Stage: Stage-3
Map Reduce
@@ -217,6 +247,32 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.outputtbl2
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int)
+ outputColumnNames: key, cnt
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-7
Conditional Operator
@@ -240,6 +296,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, cnt
+ Column Types: string, int
+ Table: default.outputtbl2
Stage: Stage-3
Map Reduce
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby_sort_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_4.q.out b/ql/src/test/results/clientpositive/groupby_sort_4.q.out
index 0b26978..c2ab1bd 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_4.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_4.q.out
@@ -45,7 +45,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -90,6 +91,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.outputtbl1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int)
+ outputColumnNames: key, cnt
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -104,6 +120,32 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, cnt
+ Column Types: string, int
+ Table: default.outputtbl1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
SELECT key, count(1) FROM T1 GROUP BY key
@@ -149,7 +191,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -193,6 +236,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.outputtbl2
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ outputColumnNames: key, val, cnt
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -207,6 +265,32 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, val, cnt
+ Column Types: string, string, int
+ Table: default.outputtbl2
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2
SELECT key, val, count(1) FROM T1 GROUP BY key, val
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby_sort_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_5.q.out b/ql/src/test/results/clientpositive/groupby_sort_5.q.out
index 41e1782..7a7b8df 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_5.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_5.q.out
@@ -81,6 +81,32 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.outputtbl1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ outputColumnNames: key, val, cnt
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-7
Conditional Operator
@@ -104,6 +130,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, val, cnt
+ Column Types: string, string, int
+ Table: default.outputtbl1
Stage: Stage-3
Map Reduce
@@ -245,6 +275,32 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.outputtbl1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ outputColumnNames: key, val, cnt
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-7
Conditional Operator
@@ -268,6 +324,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, val, cnt
+ Column Types: string, string, int
+ Table: default.outputtbl1
Stage: Stage-3
Map Reduce
@@ -381,7 +441,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -426,6 +487,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.outputtbl2
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int)
+ outputColumnNames: key, cnt
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -440,6 +516,32 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, cnt
+ Column Types: string, int
+ Table: default.outputtbl2
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2
SELECT key, count(1) FROM T1 GROUP BY key
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby_sort_6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_6.q.out b/ql/src/test/results/clientpositive/groupby_sort_6.q.out
index aa3bc96..871c7b3 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_6.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_6.q.out
@@ -25,7 +25,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -102,6 +103,33 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: key, cnt
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -135,6 +163,80 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, cnt
+ Column Types: int, int
+ Table: default.outputtbl1
+ Is Table Level Stats: true
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key
@@ -176,7 +278,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -232,7 +335,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,cnt
@@ -253,6 +356,33 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: key, cnt
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -263,7 +393,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,cnt
@@ -286,6 +416,80 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, cnt
+ Column Types: int, int
+ Table: default.outputtbl1
+ Is Table Level Stats: true
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key
@@ -318,7 +522,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -421,7 +626,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,cnt
@@ -442,6 +647,33 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: key, cnt
+ Statistics: Num rows: 1 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -452,7 +684,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,cnt
@@ -475,6 +707,80 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, cnt
+ Column Types: int, int
+ Table: default.outputtbl1
+ Is Table Level Stats: true
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 848 Basic stats: PARTIAL Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>,struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: PARTIAL Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/groupby_sort_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_7.q.out b/ql/src/test/results/clientpositive/groupby_sort_7.q.out
index c9746c3..a9e469b 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_7.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_7.q.out
@@ -84,6 +84,32 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.outputtbl1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ outputColumnNames: key, val, cnt
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-7
Conditional Operator
@@ -107,6 +133,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, val, cnt
+ Column Types: string, string, int
+ Table: default.outputtbl1
Stage: Stage-3
Map Reduce