You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/12/12 00:00:45 UTC
[47/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather
column stats by default (Zoltan Haindrich,
Pengcheng Xiong via Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketmapjoin5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out
index 1096912..21937f4 100644
--- a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out
+++ b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out
@@ -272,6 +272,22 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -376,6 +392,35 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcbucket_mapjoin_part/ds=2008-04-08 [b]
/srcbucket_mapjoin_part/ds=2008-04-09 [b]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-8
Conditional Operator
@@ -418,6 +463,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
Stage: Stage-4
Map Reduce
@@ -793,7 +843,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -814,6 +864,22 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -918,6 +984,35 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
/srcbucket_mapjoin_part_2/ds=2008-04-09 [b]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-8
Conditional Operator
@@ -937,7 +1032,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -960,6 +1055,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
Stage: Stage-4
Map Reduce
@@ -975,7 +1075,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -1005,7 +1105,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -1026,7 +1126,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -1062,7 +1162,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -1092,7 +1192,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -1113,7 +1213,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out
index 15286ed..f6652b8 100644
--- a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out
+++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out
@@ -212,6 +212,22 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -265,6 +281,35 @@ STAGE PLANS:
name: default.srcbucket_mapjoin
Truncated Path -> Alias:
/srcbucket_mapjoin [a]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-7
Conditional Operator
@@ -307,6 +352,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
Stage: Stage-3
Map Reduce
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out
index 3c171d6..095d559 100644
--- a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out
+++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out
@@ -275,6 +275,22 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -328,6 +344,35 @@ STAGE PLANS:
name: default.srcbucket_mapjoin
Truncated Path -> Alias:
/srcbucket_mapjoin [a]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-7
Conditional Operator
@@ -370,6 +415,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
Stage: Stage-3
Map Reduce
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
index 2d6bd6f..b59c4bc 100644
--- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
+++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
@@ -84,6 +84,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table2
PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
SELECT x.key, x.value from
@@ -190,6 +194,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table2
PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
SELECT * from
@@ -296,6 +304,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table2
PREHOOK: query: EXPLAIN
INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
@@ -314,7 +326,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -346,6 +359,22 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table2
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -362,6 +391,39 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table2
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: EXPLAIN
INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
@@ -417,4 +479,8 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table2
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
index 7eb36bf..52ef3db 100644
--- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
+++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
@@ -80,6 +80,10 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: value, key
+ Column Types: string, int
+ Table: default.test_table2
PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
SELECT x.value, x.key from
@@ -151,7 +155,8 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -183,6 +188,22 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table2
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int), '1' (type: string)
+ outputColumnNames: value, key, ds
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll')
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -199,6 +220,39 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: value, key
+ Column Types: string, int
+ Table: default.test_table2
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
SELECT x.key, x.value from
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
index 7efb7ce..eaf85c3 100644
--- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
+++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
@@ -65,39 +65,13 @@ FROM test_table1 a JOIN test_table2 b
ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
- Stage-7 has a backup stage: Stage-1
- Stage-4 depends on stages: Stage-7
- Stage-0 depends on stages: Stage-1, Stage-4, Stage-5
- Stage-2 depends on stages: Stage-0
- Stage-8 has a backup stage: Stage-1
- Stage-5 depends on stages: Stage-8
- Stage-1
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-6
- Conditional Operator
-
- Stage: Stage-7
- Map Reduce Local Work
- Alias -> Map Local Tables:
- b
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- b
- TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 key (type: int)
- 1 key (type: int)
-
- Stage: Stage-4
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
@@ -106,7 +80,7 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
@@ -121,8 +95,6 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col1 (type: int)
value expressions: _col2 (type: string)
- Local Work:
- Map Reduce Local Work
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -134,6 +106,20 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ outputColumnNames: key, key2, value
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll')
+ keys: '1' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -150,99 +136,35 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, key2, value
+ Column Types: int, int, string
+ Table: default.test_table3
- Stage: Stage-8
- Map Reduce Local Work
- Alias -> Map Local Tables:
- a
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- a
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 key (type: int)
- 1 key (type: int)
-
- Stage: Stage-5
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col7
- Select Operator
- expressions: _col0 (type: int), concat(_col1, _col7) (type: string)
- outputColumnNames: _col1, _col2
- Reduce Output Operator
- key expressions: _col1 (type: int)
- sort order: +
- Map-reduce partition columns: _col1 (type: int)
- value expressions: _col2 (type: string)
- Local Work:
- Map Reduce Local Work
+ Reduce Output Operator
+ key expressions: '1' (type: string)
+ sort order: +
+ Map-reduce partition columns: '1' (type: string)
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
-
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col7
- Select Operator
- expressions: _col0 (type: int), concat(_col1, _col7) (type: string)
- outputColumnNames: _col1, _col2
- Reduce Output Operator
- key expressions: _col1 (type: int)
- sort order: +
- Map-reduce partition columns: _col1 (type: int)
- value expressions: _col2 (type: string)
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ keys: '1' (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), '1' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, a.key, concat(a.value, b.value)
@@ -340,43 +262,13 @@ FROM test_table1 a JOIN test_table2 b
ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
- Stage-7 has a backup stage: Stage-1
- Stage-4 depends on stages: Stage-7
- Stage-0 depends on stages: Stage-1, Stage-4, Stage-5
- Stage-2 depends on stages: Stage-0
- Stage-8 has a backup stage: Stage-1
- Stage-5 depends on stages: Stage-8
- Stage-1
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-6
- Conditional Operator
-
- Stage: Stage-7
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_1:b
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_1:b
- TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
- Stage: Stage-4
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
@@ -389,7 +281,7 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
@@ -401,8 +293,6 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col1 (type: string)
value expressions: _col0 (type: int)
- Local Work:
- Map Reduce Local Work
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string)
@@ -414,6 +304,20 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -430,105 +334,35 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table3
- Stage: Stage-8
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_0:a
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_0:a
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
- Stage: Stage-5
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- value expressions: _col0 (type: int)
- Local Work:
- Map Reduce Local Work
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string)
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
-
- Stage: Stage-1
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- value expressions: _col0 (type: int)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string)
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, a.value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
index 6c10249..661114d 100644
--- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
+++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
@@ -65,43 +65,13 @@ FROM test_table1 a JOIN test_table2 b
ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
- Stage-7 has a backup stage: Stage-1
- Stage-4 depends on stages: Stage-7
- Stage-0 depends on stages: Stage-1, Stage-4, Stage-5
- Stage-2 depends on stages: Stage-0
- Stage-8 has a backup stage: Stage-1
- Stage-5 depends on stages: Stage-8
- Stage-1
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-6
- Conditional Operator
-
- Stage: Stage-7
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_1:b
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_1:b
- TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
- Stage: Stage-4
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
@@ -114,7 +84,7 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
@@ -129,8 +99,6 @@ STAGE PLANS:
sort order: -
Map-reduce partition columns: _col0 (type: int)
value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -142,6 +110,20 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -158,111 +140,35 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table3
- Stage: Stage-8
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_0:a
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_0:a
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
- Stage: Stage-5
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col4
- Select Operator
- expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: -
- Map-reduce partition columns: _col0 (type: int)
- value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
-
- Stage: Stage-1
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col4
- Select Operator
- expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: -
- Map-reduce partition columns: _col0 (type: int)
- value expressions: _col1 (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, concat(a.value, b.value)
@@ -347,43 +253,13 @@ JOIN
ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
- Stage-7 has a backup stage: Stage-1
- Stage-4 depends on stages: Stage-7
- Stage-0 depends on stages: Stage-1, Stage-4, Stage-5
- Stage-2 depends on stages: Stage-0
- Stage-8 has a backup stage: Stage-1
- Stage-5 depends on stages: Stage-8
- Stage-1
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-6
- Conditional Operator
-
- Stage: Stage-7
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_1:test_table2
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_1:test_table2
- TableScan
- alias: test_table2
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
- Stage: Stage-4
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
@@ -396,7 +272,7 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
@@ -411,8 +287,6 @@ STAGE PLANS:
sort order: -
Map-reduce partition columns: _col0 (type: int)
value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -424,6 +298,20 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -440,111 +328,35 @@ STAGE PLANS:
Stage: Stage-2
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table3
- Stage: Stage-8
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_0:test_table1
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_0:test_table1
- TableScan
- alias: test_table1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
- Stage: Stage-5
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: test_table2
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- Select Operator
- expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: -
- Map-reduce partition columns: _col0 (type: int)
- value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
-
- Stage: Stage-1
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
- alias: test_table1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- Select Operator
- expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: -
- Map-reduce partition columns: _col0 (type: int)
- value expressions: _col1 (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, concat(a.value, b.value)