You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/12/12 00:00:29 UTC
[31/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather
column stats by default (Zoltan Haindrich,
Pengcheng Xiong via Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out
index 7f90ac5..7fc96c1 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out
@@ -69,6 +69,25 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string)
+ outputColumnNames: key, value, ds, hr
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string), hr (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -171,6 +190,40 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -204,6 +257,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.list_bucketing_static_part
+ Is Table Level Stats: false
PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
select key, value from srcpart where ds = '2008-04-08'
@@ -249,7 +307,7 @@ Database: default
Table: list_bucketing_static_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 6
numRows 1000
rawDataSize 9624
@@ -330,6 +388,25 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string)
+ outputColumnNames: key, value, ds, hr
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string), hr (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -432,6 +509,40 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-7
Conditional Operator
@@ -473,6 +584,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.list_bucketing_static_part
+ Is Table Level Stats: false
Stage: Stage-3
Merge File Operator
@@ -622,7 +738,7 @@ Database: default
Table: list_bucketing_static_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 4
numRows 1000
rawDataSize 9624
@@ -683,7 +799,7 @@ STAGE PLANS:
ds 2008-04-08
hr 11
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out
index 64fdacb..f70ffa5 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out
@@ -65,6 +65,25 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
+ outputColumnNames: key, value, ds, hr
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string), hr (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -167,6 +186,40 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -200,6 +253,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.list_bucketing_dynamic_part
+ Is Table Level Stats: false
PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08'
PREHOOK: type: QUERY
@@ -239,7 +297,7 @@ Database: default
Table: list_bucketing_dynamic_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 3
numRows 500
rawDataSize 5312
@@ -282,7 +340,7 @@ Database: default
Table: list_bucketing_dynamic_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 3
numRows 500
rawDataSize 5312
@@ -359,7 +417,7 @@ STAGE PLANS:
ds 2008-04-08
hr 11
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
@@ -405,7 +463,7 @@ STAGE PLANS:
ds 2008-04-08
hr 12
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
index 32dac42..5b625f0 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out
@@ -69,6 +69,25 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
+ outputColumnNames: key, value, ds, hr
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string), hr (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -171,6 +190,40 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -204,6 +257,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.list_bucketing_dynamic_part
+ Is Table Level Stats: false
PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08'
@@ -253,7 +311,7 @@ Database: default
Table: list_bucketing_dynamic_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 2
numRows 16
rawDataSize 136
@@ -294,7 +352,7 @@ Database: default
Table: list_bucketing_dynamic_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 6
numRows 984
rawDataSize 9488
@@ -376,6 +434,25 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
+ outputColumnNames: key, value, ds, hr
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string), hr (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -478,6 +555,40 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-7
Conditional Operator
@@ -520,6 +631,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.list_bucketing_dynamic_part
+ Is Table Level Stats: false
Stage: Stage-3
Merge File Operator
@@ -677,7 +793,7 @@ Database: default
Table: list_bucketing_dynamic_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 1
numRows 16
rawDataSize 136
@@ -718,7 +834,7 @@ Database: default
Table: list_bucketing_dynamic_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 4
numRows 984
rawDataSize 9488
@@ -779,7 +895,7 @@ STAGE PLANS:
ds 2008-04-08
hr a1
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
@@ -825,7 +941,7 @@ STAGE PLANS:
ds 2008-04-08
hr b1
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
index 7c11d3f..4160ad6 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
@@ -69,6 +69,25 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
+ outputColumnNames: key, value, ds, hr
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string), hr (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -171,6 +190,40 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -204,6 +257,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.list_bucketing_dynamic_part
+ Is Table Level Stats: false
PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08'
@@ -253,7 +311,7 @@ Database: default
Table: list_bucketing_dynamic_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 2
numRows 16
rawDataSize 136
@@ -294,7 +352,7 @@ Database: default
Table: list_bucketing_dynamic_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 4
numRows 984
rawDataSize 9488
@@ -376,6 +434,25 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
+ outputColumnNames: key, value, ds, hr
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string), hr (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -478,6 +555,40 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-7
Conditional Operator
@@ -520,6 +631,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.list_bucketing_dynamic_part
+ Is Table Level Stats: false
Stage: Stage-3
Merge File Operator
@@ -677,7 +793,7 @@ Database: default
Table: list_bucketing_dynamic_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 1
numRows 16
rawDataSize 136
@@ -718,7 +834,7 @@ Database: default
Table: list_bucketing_dynamic_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 3
numRows 984
rawDataSize 9488
@@ -779,7 +895,7 @@ STAGE PLANS:
ds 2008-04-08
hr a1
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
@@ -825,7 +941,7 @@ STAGE PLANS:
ds 2008-04-08
hr b1
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
index d45be4e..7e9aad4 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
@@ -69,6 +69,25 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string)
+ outputColumnNames: key, value, ds, hr
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string), hr (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -171,6 +190,40 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -204,6 +257,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.list_bucketing_dynamic_part
+ Is Table Level Stats: false
PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr)
select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08'
@@ -253,7 +311,7 @@ Database: default
Table: list_bucketing_dynamic_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 2
numRows 16
rawDataSize 136
@@ -294,7 +352,7 @@ Database: default
Table: list_bucketing_dynamic_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 6
numRows 984
rawDataSize 9488
@@ -409,7 +467,7 @@ STAGE PLANS:
ds 2008-04-08
hr a1
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out
index 9df2130..495934f 100644
--- a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out
@@ -69,6 +69,25 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string)
+ outputColumnNames: key, value, ds, hr
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string), hr (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -171,6 +190,40 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -204,6 +257,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.list_bucketing_static_part
+ Is Table Level Stats: false
PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
select key, value from srcpart where ds = '2008-04-08'
@@ -249,7 +307,7 @@ Database: default
Table: list_bucketing_static_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 6
numRows 1000
rawDataSize 9624
@@ -330,6 +388,25 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string)
+ outputColumnNames: key, value, ds, hr
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ keys: ds (type: string), hr (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -432,6 +509,40 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcpart/ds=2008-04-08/hr=11 [srcpart]
/srcpart/ds=2008-04-08/hr=12 [srcpart]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>:string:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-7
Conditional Operator
@@ -473,6 +584,11 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
#### A masked pattern was here ####
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.list_bucketing_static_part
+ Is Table Level Stats: false
Stage: Stage-3
Merge File Operator
@@ -622,7 +738,7 @@ Database: default
Table: list_bucketing_static_part
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 4
numRows 1000
rawDataSize 9624
@@ -683,7 +799,7 @@ STAGE PLANS:
ds 2008-04-08
hr 11
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out
index a1f225c..0dc5493 100644
--- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out
@@ -45,7 +45,7 @@ Database: default
Table: fact_daily
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 3
numRows 500
rawDataSize 5312
@@ -95,7 +95,7 @@ STAGE PLANS:
ds 1
hr 4
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
@@ -179,7 +179,7 @@ STAGE PLANS:
ds 1
hr 4
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
@@ -264,7 +264,7 @@ STAGE PLANS:
ds 1
hr 4
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
@@ -347,7 +347,7 @@ STAGE PLANS:
ds 1
hr 4
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
index 0ad2ff5..3aefb55 100644
--- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out
@@ -45,7 +45,7 @@ Database: default
Table: fact_daily
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 3
numRows 500
rawDataSize 5312
@@ -95,7 +95,7 @@ STAGE PLANS:
ds 1
hr 4
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
@@ -179,7 +179,7 @@ STAGE PLANS:
ds 1
hr 4
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
@@ -266,7 +266,7 @@ STAGE PLANS:
ds 1
hr 4
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out
index 02fc314..13073f4 100644
--- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out
+++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out
@@ -41,7 +41,7 @@ Database: default
Table: fact_daily
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 1
numRows 500
rawDataSize 5312
@@ -99,7 +99,7 @@ Database: default
Table: fact_daily
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 3
numRows 500
rawDataSize 5312
@@ -162,7 +162,7 @@ Database: default
Table: fact_daily
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 2
numRows 500
rawDataSize 5312
@@ -205,7 +205,7 @@ STAGE PLANS:
ds 1
hr 1
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
@@ -316,7 +316,7 @@ STAGE PLANS:
ds 1
hr 2
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
@@ -402,7 +402,7 @@ STAGE PLANS:
ds 1
hr 3
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out
index 86a9bf5..77bea79 100644
--- a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out
@@ -86,7 +86,6 @@ STAGE PLANS:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
properties:
- COLUMN_STATS_ACCURATE {}
bucket_count 16
bucket_field_name a
column.name.delimiter ,
@@ -110,7 +109,6 @@ STAGE PLANS:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
properties:
- COLUMN_STATS_ACCURATE {}
bucket_count 16
bucket_field_name a
column.name.delimiter ,
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
index 9b83fad..5fe7544 100644
--- a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
@@ -1471,10 +1471,11 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
+ Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
Reducer 5 <- Reducer 3 (SIMPLE_EDGE)
Reducer 6 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 7 <- Reducer 3 (SIMPLE_EDGE)
Vertices:
Map 1
Map Operator Tree:
@@ -1517,7 +1518,7 @@ STAGE PLANS:
partitionColumnCount: 2
partitionColumns: ds:string, hr:string
scratchColumnTypeNames: []
- Map 7
+ Map 8
Map Operator Tree:
TableScan Vectorization:
native: true
@@ -1661,6 +1662,14 @@ STAGE PLANS:
notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported
vectorized: false
Reduce Operator Tree:
+ Reducer 7
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+ vectorized: false
+ Reduce Operator Tree:
Stage: Stage-5
@@ -2246,11 +2255,12 @@ STAGE PLANS:
Tez
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
+ Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
Reducer 5 <- Reducer 3 (SIMPLE_EDGE)
Reducer 6 <- Reducer 3 (SIMPLE_EDGE)
Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
Vertices:
Map 1
Map Operator Tree:
@@ -2293,7 +2303,7 @@ STAGE PLANS:
partitionColumnCount: 2
partitionColumns: ds:string, hr:string
scratchColumnTypeNames: []
- Map 8
+ Map 9
Map Operator Tree:
TableScan Vectorization:
native: true
@@ -2438,28 +2448,21 @@ STAGE PLANS:
vectorized: false
Reduce Operator Tree:
Reducer 7
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 0
- partitionColumnCount: 0
- scratchColumnTypeNames: [string, string, string, string]
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+ vectorized: false
+ Reduce Operator Tree:
+ Reducer 8
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+ vectorized: false
Reduce Operator Tree:
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
- selectExpressions: ConstantVectorExpression(val this) -> 0:string, ConstantVectorExpression(val should) -> 1:string, ConstantVectorExpression(val not) -> 2:string, ConstantVectorExpression(val be there) -> 3:string
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Stage: Stage-5
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
index f7022b5..22a2375 100644
--- a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
@@ -427,18 +427,18 @@ STAGE PLANS:
TableScan
alias: over10k_orc_bucketed
filterExpr: ((b = 4294967363) and (t < 100)) (type: boolean)
- Statistics: Num rows: 2098 Data size: 39900 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2098 Data size: 41920 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((b = 4294967363) and (t < 100)) (type: boolean)
- Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: t (type: tinyint), si (type: smallint), i (type: int)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int)
sort order: +++
- Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
@@ -447,10 +447,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -497,18 +497,18 @@ STAGE PLANS:
TableScan
alias: over10k_orc_bucketed
filterExpr: ((b = 4294967363) and (t < 100)) (type: boolean)
- Statistics: Num rows: 2098 Data size: 39900 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2098 Data size: 41920 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((b = 4294967363) and (t < 100)) (type: boolean)
- Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), t (type: tinyint), si (type: smallint), i (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
sort order: +
- Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
@@ -518,10 +518,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -570,19 +570,19 @@ STAGE PLANS:
TableScan
alias: over10k_orc_bucketed
filterExpr: ((b = 4294967363) and (t < 100)) (type: boolean)
- Statistics: Num rows: 2098 Data size: 1021440 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2098 Data size: 706986 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((b = 4294967363) and (t < 100)) (type: boolean)
- Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 674 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), t (type: tinyint), si (type: smallint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary)
outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
sort order: +
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: tinyint), _col2 (type: smallint), _col5 (type: float), _col6 (type: double), _col7 (type: boolean), _col8 (type: string), _col9 (type: timestamp), _col10 (type: decimal(4,2)), _col11 (type: binary)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
@@ -592,10 +592,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), 0 (type: int), 4294967363 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: boolean), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: decimal(4,2)), VALUE._col9 (type: binary)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 486 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 834 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out b/ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out
index 2b069ec..c87a0a6 100644
--- a/ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out
+++ b/ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out
@@ -88,7 +88,7 @@ Retention: 0
#### A masked pattern was here ####
Table Type: MANAGED_TABLE
Table Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 3
numRows 1500
rawDataSize 141000
@@ -237,7 +237,7 @@ Database: default
Table: src_orc_merge_test_part_stat
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 3
numRows 1500
rawDataSize 141000
@@ -284,7 +284,7 @@ Database: default
Table: src_orc_merge_test_part_stat
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 3
numRows 1500
rawDataSize 141000
http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/auto_join1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_join1.q.out b/ql/src/test/results/clientpositive/llap/auto_join1.q.out
index bbe63e2..9146560 100644
--- a/ql/src/test/results/clientpositive/llap/auto_join1.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_join1.q.out
@@ -26,6 +26,7 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Map 2 <- Map 1 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -81,8 +82,36 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_j1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
Execution mode: llap
LLAP IO: no inputs
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-2
Dependency Collection
@@ -100,6 +129,10 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest_j1
PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value