You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2017/07/15 08:06:57 UTC
[18/24] hive git commit: HIVE-16996: Add HLL as an alternative to FM
sketch to compute stats (Pengcheng Xiong, reviewed by Ashutosh Chauhan,
Prasanth Jayachandran)
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
index 29b3373..1b12570 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out
@@ -59,7 +59,7 @@ STAGE PLANS:
outputColumnNames: key, value, one, two, three
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: one (type: string), two (type: string), three (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
@@ -69,7 +69,7 @@ STAGE PLANS:
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col4 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ value expressions: _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>), _col4 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/autoColumnStats_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_7.q.out b/ql/src/test/results/clientpositive/autoColumnStats_7.q.out
index 9d24bc5..9e2121e 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_7.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_7.q.out
@@ -132,10 +132,10 @@ STAGE PLANS:
value expressions: key (type: string), c1 (type: int), c2 (type: string)
Reduce Operator Tree:
Group By Operator
- aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16)
+ aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll')
mode: partial1
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 1424 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -149,17 +149,17 @@ STAGE PLANS:
TableScan
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ Statistics: Num rows: 1 Data size: 1424 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
mode: final
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/autoColumnStats_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_8.q.out b/ql/src/test/results/clientpositive/autoColumnStats_8.q.out
index 681d962..cdf2082 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_8.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_8.q.out
@@ -104,7 +104,7 @@ STAGE PLANS:
outputColumnNames: key, value, ds, hr
Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: ds (type: string), hr (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -116,7 +116,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
tag: -1
- value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
auto parallelism: false
Filter Operator
isSamplingPred: false
@@ -161,7 +161,7 @@ STAGE PLANS:
outputColumnNames: key, value, hr
Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
keys: '2008-12-31' (type: string), hr (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -177,7 +177,7 @@ STAGE PLANS:
properties:
column.name.delimiter ,
columns _col0,_col1,_col2,_col3
- columns.types string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
+ columns.types string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>
escape.delim \
serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -511,7 +511,7 @@ STAGE PLANS:
Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string)
Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
tag: -1
- value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
@@ -524,7 +524,7 @@ STAGE PLANS:
properties:
column.name.delimiter ,
columns _col0,_col1,_col2,_col3
- columns.types string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
+ columns.types string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>
escape.delim \
serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -534,7 +534,7 @@ STAGE PLANS:
properties:
column.name.delimiter ,
columns _col0,_col1,_col2,_col3
- columns.types string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>
+ columns.types string,string,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>,struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>
escape.delim \
serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
index d26e2c0..06f23b1 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
@@ -86,10 +86,10 @@ STAGE PLANS:
outputColumnNames: key, value
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -142,10 +142,10 @@ STAGE PLANS:
outputColumnNames: key, value
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -181,17 +181,17 @@ STAGE PLANS:
TableScan
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -252,7 +252,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@dest_j1
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-key int 0 498 0 196 from deserializer
+key int 0 498 0 309 from deserializer
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
PREHOOK: query: desc formatted dest_j1 value
PREHOOK: type: DESCTABLE
@@ -262,5 +262,5 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@dest_j1
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-value string 0 214 6.834630350194552 7 from deserializer
+value string 0 309 6.834630350194552 7 from deserializer
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out
index 17a912e..57f0067 100644
--- a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out
+++ b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out
@@ -285,24 +285,24 @@ STAGE PLANS:
Stage: Stage-14
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_1:c
+ $hdt$_1:b
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_1:c
+ $hdt$_1:b
TableScan
- alias: c
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: value is not null (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string)
+ expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
- 0 _col1 (type: string)
+ 0 _col0 (type: string)
1 _col0 (type: string)
Stage: Stage-9
@@ -322,7 +322,7 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: string)
+ 0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
@@ -341,24 +341,24 @@ STAGE PLANS:
Stage: Stage-12
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_2:b
+ $hdt$_2:c
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_2:b
+ $hdt$_2:c
TableScan
- alias: b
+ alias: c
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
+ predicate: value is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
+ expressions: value (type: string)
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
- 0 _col0 (type: string)
+ 0 _col1 (type: string)
1 _col0 (type: string)
Stage: Stage-6
@@ -369,7 +369,7 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
+ 0 _col1 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
@@ -411,20 +411,20 @@ STAGE PLANS:
Map Reduce
Map Operator Tree:
TableScan
- alias: b
+ alias: c
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
+ predicate: value is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
+ expressions: value (type: string)
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
+ 0 _col1 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
@@ -449,19 +449,19 @@ STAGE PLANS:
Map Operator Tree:
TableScan
Reduce Output Operator
- key expressions: _col0 (type: string)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
+ value expressions: _col0 (type: string)
TableScan
- alias: b
+ alias: c
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: key is not null (type: boolean)
+ predicate: value is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
+ expressions: value (type: string)
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -474,7 +474,7 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
+ 0 _col1 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
@@ -505,27 +505,27 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
- 0 _col1 (type: string)
+ 0 _col0 (type: string)
1 _col0 (type: string)
Stage: Stage-10
Map Reduce
Map Operator Tree:
TableScan
- alias: c
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: value is not null (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string)
+ expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: string)
+ 0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
@@ -552,19 +552,19 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col1 (type: string)
+ key expressions: _col0 (type: string)
sort order: +
- Map-reduce partition columns: _col1 (type: string)
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string)
+ value expressions: _col1 (type: string)
TableScan
- alias: c
+ alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: value is not null (type: boolean)
+ predicate: key is not null (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: value (type: string)
+ expressions: key (type: string)
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -577,7 +577,7 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: string)
+ 0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/avro_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/avro_decimal.q.out b/ql/src/test/results/clientpositive/avro_decimal.q.out
index 5a3b72d..e1045eb 100644
--- a/ql/src/test/results/clientpositive/avro_decimal.q.out
+++ b/ql/src/test/results/clientpositive/avro_decimal.q.out
@@ -34,7 +34,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@dec
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-value decimal(8,4) -12.25 234.79 0 6 from deserializer
+value decimal(8,4) -12.25 234.79 0 10 from deserializer
COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}}
PREHOOK: query: DROP TABLE IF EXISTS avro_dec
PREHOOK: type: DROPTABLE
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/avro_decimal_native.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/avro_decimal_native.q.out b/ql/src/test/results/clientpositive/avro_decimal_native.q.out
index fe77512..b73b5f5 100644
--- a/ql/src/test/results/clientpositive/avro_decimal_native.q.out
+++ b/ql/src/test/results/clientpositive/avro_decimal_native.q.out
@@ -38,7 +38,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@dec
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-value decimal(8,4) -12.25 234.79 0 6 from deserializer
+value decimal(8,4) -12.25 234.79 0 10 from deserializer
COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"value\":\"true\"}}
PREHOOK: query: DROP TABLE IF EXISTS avro_dec
PREHOOK: type: DROPTABLE
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
index f260f03..23f5fcf 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
@@ -129,13 +129,13 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: int)
mode: mergepartial
outputColumnNames: state, locid, $f2
- Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL
Group By Operator
aggregations: min(locid)
keys: state (type: string), $f2 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
table:
@@ -151,7 +151,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: bigint)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col2 (type: int)
Reduce Operator Tree:
Group By Operator
@@ -159,10 +159,10 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
outputColumnNames: state, $f2, $f2_0
- Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/cbo_rp_join0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_join0.q.out b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out
index b9cf3ce..29499a1 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_join0.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_join0.q.out
@@ -68,14 +68,14 @@ STAGE PLANS:
1 key (type: string)
2 key (type: string)
outputColumnNames: key, c_int, key0, c_int0
- Statistics: Num rows: 324 Data size: 57494 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), c_int (type: int), key0 (type: string), c_int0 (type: int)
outputColumnNames: key, c_int, p, q
- Statistics: Num rows: 324 Data size: 57494 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 324 Data size: 57494 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 216 Data size: 38270 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -730,14 +730,14 @@ STAGE PLANS:
2 key (type: string)
3 key (type: string)
outputColumnNames: key, c_int, key0, c_int0, key1, c_int2
- Statistics: Num rows: 1620 Data size: 432273 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1080 Data size: 288093 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), c_int (type: int), key0 (type: string), c_int0 (type: int), key1 (type: string), c_int2 (type: int)
outputColumnNames: key, c_int, p, q, x, b
- Statistics: Num rows: 1620 Data size: 432273 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1080 Data size: 288093 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1620 Data size: 432273 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1080 Data size: 288093 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/char_udf1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/char_udf1.q.out b/ql/src/test/results/clientpositive/char_udf1.q.out
index 07ce108..fefc740 100644
--- a/ql/src/test/results/clientpositive/char_udf1.q.out
+++ b/ql/src/test/results/clientpositive/char_udf1.q.out
@@ -393,15 +393,15 @@ POSTHOOK: Input: default@char_udf_1
#### A masked pattern was here ####
val_238 val_238 true
PREHOOK: query: select
- compute_stats(c2, 16),
- compute_stats(c4, 16)
+ compute_stats(c2, 'fm', 16),
+ compute_stats(c4, 'fm', 16)
from char_udf_1
PREHOOK: type: QUERY
PREHOOK: Input: default@char_udf_1
#### A masked pattern was here ####
POSTHOOK: query: select
- compute_stats(c2, 16),
- compute_stats(c4, 16)
+ compute_stats(c2, 'fm', 16),
+ compute_stats(c4, 'fm', 16)
from char_udf_1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@char_udf_1
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
index 14c5d5b..0f28225 100644
--- a/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
+++ b/ql/src/test/results/clientpositive/colstats_all_nulls.q.out
@@ -43,7 +43,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@all_nulls
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-a bigint 0 0 5 1 from deserializer
+a bigint 0 0 5 0 from deserializer
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
PREHOOK: query: describe formatted all_nulls b
PREHOOK: type: DESCTABLE
@@ -53,7 +53,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@all_nulls
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-b double 0.0 0.0 5 1 from deserializer
+b double 0.0 0.0 5 0 from deserializer
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
PREHOOK: query: drop table all_nulls
PREHOOK: type: DROPTABLE
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out
index 96feeed..9925928 100644
--- a/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out
+++ b/ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out
@@ -108,7 +108,7 @@ STAGE PLANS:
value expressions: key (type: int), value (type: string)
Reduce Operator Tree:
Group By Operator
- aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16)
+ aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll')
mode: complete
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
index 07d26e9..5ecb205 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
@@ -52,7 +52,7 @@ STAGE PLANS:
outputColumnNames: employeeid
Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(employeeid, 16)
+ aggregations: compute_stats(employeeid, 'hll')
keys: 2000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
@@ -62,7 +62,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: 2000.0 (type: double)
Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
@@ -112,7 +112,7 @@ STAGE PLANS:
outputColumnNames: employeeid
Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(employeeid, 16)
+ aggregations: compute_stats(employeeid, 'hll')
keys: 2000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
@@ -124,7 +124,7 @@ STAGE PLANS:
Map-reduce partition columns: 2000.0 (type: double)
Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
tag: -1
- value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
@@ -254,7 +254,7 @@ STAGE PLANS:
outputColumnNames: employeeid
Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(employeeid, 16)
+ aggregations: compute_stats(employeeid, 'hll')
keys: 4000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
@@ -264,7 +264,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: 4000.0 (type: double)
Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
@@ -314,7 +314,7 @@ STAGE PLANS:
outputColumnNames: employeeid
Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(employeeid, 16)
+ aggregations: compute_stats(employeeid, 'hll')
keys: 4000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
@@ -326,7 +326,7 @@ STAGE PLANS:
Map-reduce partition columns: 4000.0 (type: double)
Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
tag: -1
- value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
@@ -456,7 +456,7 @@ STAGE PLANS:
outputColumnNames: employeeid, employeename
Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16)
+ aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll')
keys: 2000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2
@@ -466,7 +466,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: 2000.0 (type: double)
Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
@@ -511,7 +511,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-employeeID int 16 34 1 14 from deserializer
+employeeID int 16 34 1 12 from deserializer
PREHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeName
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee_part
@@ -520,7 +520,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-employeeName string 1 9 4.3076923076923075 6 from deserializer
+employeeName string 1 12 4.3076923076923075 6 from deserializer
PREHOOK: query: explain
analyze table Employee_Part compute statistics for columns
PREHOOK: type: QUERY
@@ -543,7 +543,7 @@ STAGE PLANS:
outputColumnNames: employeeid, employeename, employeesalary
Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16)
+ aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll')
keys: employeesalary (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2
@@ -553,7 +553,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: double)
Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
@@ -600,7 +600,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-employeeID int 16 34 1 14 from deserializer
+employeeID int 16 34 1 12 from deserializer
PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=4000.0) employeeID
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee_part
@@ -609,7 +609,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-employeeID int 16 34 1 14 from deserializer
+employeeID int 16 34 1 12 from deserializer
PREHOOK: query: explain
analyze table Employee_Part compute statistics for columns
PREHOOK: type: QUERY
@@ -632,23 +632,23 @@ STAGE PLANS:
outputColumnNames: employeeid, employeename
Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16)
+ aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll')
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -681,7 +681,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-employeeID int 16 34 2 14 from deserializer
+employeeID int 16 34 2 12 from deserializer
COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}}
PREHOOK: query: create database if not exists dummydb
PREHOOK: type: CREATEDATABASE
@@ -713,7 +713,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-employeeID int 16 34 1 14 from deserializer
+employeeID int 16 34 1 12 from deserializer
COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"employeeid\":\"true\",\"employeename\":\"true\"}}
PREHOOK: query: analyze table default.Employee_Part compute statistics for columns
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
index 468d2e7..a64c76b 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
@@ -88,7 +88,7 @@ STAGE PLANS:
outputColumnNames: employeeid, employeename, country
Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- aggregations: compute_stats(employeename, 16), compute_stats(employeeid, 16)
+ aggregations: compute_stats(employeename, 'hll'), compute_stats(employeeid, 'hll')
keys: 4000.0 (type: double), country (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -98,7 +98,7 @@ STAGE PLANS:
sort order: ++
Map-reduce partition columns: 4000.0 (type: double), _col1 (type: string)
Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
@@ -143,7 +143,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-employeeName string 0 6 5.142857142857143 6 from deserializer
+employeeName string 0 7 5.142857142857143 6 from deserializer
PREHOOK: query: explain
analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID
PREHOOK: type: QUERY
@@ -166,7 +166,7 @@ STAGE PLANS:
outputColumnNames: employeeid, country
Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(employeeid, 16)
+ aggregations: compute_stats(employeeid, 'hll')
keys: 2000.0 (type: double), country (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
@@ -176,7 +176,7 @@ STAGE PLANS:
sort order: ++
Map-reduce partition columns: 2000.0 (type: double), _col1 (type: string)
Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
@@ -223,7 +223,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-employeeID int 16 34 1 14 from deserializer
+employeeID int 16 34 1 12 from deserializer
PREHOOK: query: describe formatted Employee_Part partition (employeeSalary='2000.0', country='UK') employeeID
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee_part
@@ -232,7 +232,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-employeeID int 16 31 0 9 from deserializer
+employeeID int 16 31 0 7 from deserializer
PREHOOK: query: explain
analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID
PREHOOK: type: QUERY
@@ -255,7 +255,7 @@ STAGE PLANS:
outputColumnNames: employeeid, employeesalary, country
Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: compute_stats(employeeid, 16)
+ aggregations: compute_stats(employeeid, 'hll')
keys: employeesalary (type: double), country (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
@@ -265,7 +265,7 @@ STAGE PLANS:
sort order: ++
Map-reduce partition columns: _col0 (type: double), _col1 (type: string)
Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
@@ -320,7 +320,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-employeeID int 16 34 1 14 from deserializer
+employeeID int 16 34 1 12 from deserializer
PREHOOK: query: explain
analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns
PREHOOK: type: QUERY
@@ -343,7 +343,7 @@ STAGE PLANS:
outputColumnNames: employeeid, employeename, employeesalary, country
Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16)
+ aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll')
keys: employeesalary (type: double), country (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -353,7 +353,7 @@ STAGE PLANS:
sort order: ++
Map-reduce partition columns: _col0 (type: double), _col1 (type: string)
Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
@@ -408,7 +408,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee_part
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-employeeName string 0 6 5.142857142857143 6 from deserializer
+employeeName string 0 12 5.142857142857143 6 from deserializer
PREHOOK: query: drop table Employee
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table Employee
@@ -483,7 +483,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-employeeName string 0 6 5.142857142857143 6 from deserializer
+employeeName string 0 12 5.142857142857143 6 from deserializer
PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee partition(employeeSalary='3000.0', country='USA')
PREHOOK: type: LOAD
#### A masked pattern was here ####
@@ -530,7 +530,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-employeeName string 0 6 5.142857142857143 6 from deserializer
+employeeName string 0 12 5.142857142857143 6 from deserializer
PREHOOK: query: alter table Employee add columns (c int ,d string)
PREHOOK: type: ALTERTABLE_ADDCOLS
PREHOOK: Input: default@employee
@@ -575,7 +575,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@employee
# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
-c int 2000 4000 0 4 from deserializer
+c int 2000 4000 0 3 from deserializer
PREHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') d
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@employee
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/columnstats_quoting.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_quoting.q.out b/ql/src/test/results/clientpositive/columnstats_quoting.q.out
index 52e3538..7e080fe 100644
--- a/ql/src/test/results/clientpositive/columnstats_quoting.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_quoting.q.out
@@ -30,23 +30,23 @@ STAGE PLANS:
outputColumnNames: user id, user name
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- aggregations: compute_stats(user id, 16), compute_stats(user name, 16)
+ aggregations: compute_stats(user id, 'hll'), compute_stats(user name, 'hll')
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -87,14 +87,14 @@ STAGE PLANS:
outputColumnNames: user id
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- aggregations: compute_stats(user id, 16)
+ aggregations: compute_stats(user id, 'hll')
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+ Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)