You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/10/06 06:00:54 UTC
svn commit: r1629563 [16/33] - in /hive/branches/spark: ./ accumulo-handler/
beeline/ beeline/src/java/org/apache/hive/beeline/ bin/ bin/ext/ common/
common/src/java/org/apache/hadoop/hive/conf/
common/src/test/org/apache/hadoop/hive/common/type/ contr...
Modified: hive/branches/spark/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out?rev=1629563&r1=1629562&r2=1629563&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out Mon Oct 6 04:00:39 2014
@@ -1,4 +1,24 @@
-PREHOOK: query: create table if not exists loc_staging (
+PREHOOK: query: -- hash aggregation is disabled
+
+-- There are different cases for Group By depending on map/reduce side, hash aggregation,
+-- grouping sets and column stats. If we don't have column stats, we just assume hash
+-- aggregation is disabled. Following are the possible cases and rule for cardinality
+-- estimation
+
+-- MAP SIDE:
+-- Case 1: NO column stats, NO hash aggregation, NO grouping sets â numRows
+-- Case 2: NO column stats, NO hash aggregation, grouping sets â numRows * sizeOfGroupingSet
+-- Case 3: column stats, hash aggregation, NO grouping sets â Min(numRows / 2, ndvProduct * parallelism)
+-- Case 4: column stats, hash aggregation, grouping sets â Min((numRows * sizeOfGroupingSet) / 2, ndvProduct * parallelism * sizeOfGroupingSet)
+-- Case 5: column stats, NO hash aggregation, NO grouping sets â numRows
+-- Case 6: column stats, NO hash aggregation, grouping sets â numRows * sizeOfGroupingSet
+
+-- REDUCE SIDE:
+-- Case 7: NO column stats â numRows / 2
+-- Case 8: column stats, grouping sets â Min(numRows, ndvProduct * sizeOfGroupingSet)
+-- Case 9: column stats, NO grouping sets - Min(numRows, ndvProduct)
+
+create table if not exists loc_staging (
state string,
locid int,
zip bigint,
@@ -7,7 +27,27 @@ PREHOOK: query: create table if not exis
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@loc_staging
-POSTHOOK: query: create table if not exists loc_staging (
+POSTHOOK: query: -- hash aggregation is disabled
+
+-- There are different cases for Group By depending on map/reduce side, hash aggregation,
+-- grouping sets and column stats. If we don't have column stats, we just assume hash
+-- aggregation is disabled. Following are the possible cases and rule for cardinality
+-- estimation
+
+-- MAP SIDE:
+-- Case 1: NO column stats, NO hash aggregation, NO grouping sets â numRows
+-- Case 2: NO column stats, NO hash aggregation, grouping sets â numRows * sizeOfGroupingSet
+-- Case 3: column stats, hash aggregation, NO grouping sets â Min(numRows / 2, ndvProduct * parallelism)
+-- Case 4: column stats, hash aggregation, grouping sets â Min((numRows * sizeOfGroupingSet) / 2, ndvProduct * parallelism * sizeOfGroupingSet)
+-- Case 5: column stats, NO hash aggregation, NO grouping sets â numRows
+-- Case 6: column stats, NO hash aggregation, grouping sets â numRows * sizeOfGroupingSet
+
+-- REDUCE SIDE:
+-- Case 7: NO column stats â numRows / 2
+-- Case 8: column stats, grouping sets â Min(numRows, ndvProduct * sizeOfGroupingSet)
+-- Case 9: column stats, NO grouping sets - Min(numRows, ndvProduct)
+
+create table if not exists loc_staging (
state string,
locid int,
zip bigint,
@@ -190,22 +230,20 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year
+PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year
PREHOOK: type: QUERY
PREHOOK: Input: default@loc_orc
#### A masked pattern was here ####
-POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year
+POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year
POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc
#### A masked pattern was here ####
-PREHOOK: query: -- only one distinct value in year column + 1 NULL value
--- map-side GBY: numRows: 8 (map-side will not do any reduction)
--- reduce-side GBY: numRows: 2
+PREHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets - cardinality = 8
+-- Case 9: column stats, NO grouping sets - caridnality = 2
explain select year from loc_orc group by year
PREHOOK: type: QUERY
-POSTHOOK: query: -- only one distinct value in year column + 1 NULL value
--- map-side GBY: numRows: 8 (map-side will not do any reduction)
--- reduce-side GBY: numRows: 2
+POSTHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets - cardinality = 8
+-- Case 9: column stats, NO grouping sets - caridnality = 2
explain select year from loc_orc group by year
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -257,12 +295,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY: numRows: 8
--- reduce-side GBY: numRows: 4
+PREHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets - cardinality = 8
+-- Case 9: column stats, NO grouping sets - caridnality = 8
explain select state,locid from loc_orc group by state,locid
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY: numRows: 8
--- reduce-side GBY: numRows: 4
+POSTHOOK: query: -- Case 5: column stats, NO hash aggregation, NO grouping sets - cardinality = 8
+-- Case 9: column stats, NO grouping sets - caridnality = 8
explain select state,locid from loc_orc group by state,locid
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -295,14 +333,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -314,10 +352,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 32
+-- Case 8: column stats, grouping sets - cardinality = 32
explain select state,locid from loc_orc group by state,locid with cube
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 32
+-- Case 8: column stats, grouping sets - cardinality = 32
explain select state,locid from loc_orc group by state,locid with cube
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -339,25 +379,25 @@ STAGE PLANS:
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
- Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -369,10 +409,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24
+-- Case 8: column stats, grouping sets - cardinality = 24
explain select state,locid from loc_orc group by state,locid with rollup
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24
+-- Case 8: column stats, grouping sets - cardinality = 24
explain select state,locid from loc_orc group by state,locid with rollup
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -394,25 +436,25 @@ STAGE PLANS:
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
- Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -424,10 +466,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 8
+-- Case 8: column stats, grouping sets - cardinality = 8
explain select state,locid from loc_orc group by state,locid grouping sets((state))
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 8
+-- Case 8: column stats, grouping sets - cardinality = 8
explain select state,locid from loc_orc group by state,locid grouping sets((state))
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -449,25 +493,25 @@ STAGE PLANS:
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 4 Data size: 700 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -479,10 +523,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 16
+-- Case 8: column stats, grouping sets - cardinality = 16
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid))
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 16
+-- Case 8: column stats, grouping sets - cardinality = 16
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid))
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -504,25 +550,25 @@ STAGE PLANS:
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
- Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -534,10 +580,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24
+-- Case 8: column stats, grouping sets - cardinality = 24
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),())
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 24
+-- Case 8: column stats, grouping sets - cardinality = 24
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),())
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -559,25 +607,25 @@ STAGE PLANS:
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
- Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 12 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 24 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -589,10 +637,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+PREHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 32
+-- Case 8: column stats, grouping sets - cardinality = 32
explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),())
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+POSTHOOK: query: -- Case 6: column stats, NO hash aggregation, grouping sets - cardinality = 32
+-- Case 8: column stats, grouping sets - cardinality = 32
explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),())
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -614,25 +664,25 @@ STAGE PLANS:
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
- Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 32 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -644,12 +694,16 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction)
--- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
+PREHOOK: query: -- map-side parallelism will be 10
+
+-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4
+-- Case 9: column stats, NO grouping sets - caridnality = 2
explain select year from loc_orc group by year
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction)
--- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
+POSTHOOK: query: -- map-side parallelism will be 10
+
+-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4
+-- Case 9: column stats, NO grouping sets - caridnality = 2
explain select year from loc_orc group by year
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -671,25 +725,25 @@ STAGE PLANS:
keys: year (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 80 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 80 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -701,10 +755,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+PREHOOK: query: -- Case 4: column stats, hash aggregation, grouping sets - cardinality = 16
+-- Case 8: column stats, grouping sets - cardinality = 16
explain select state,locid from loc_orc group by state,locid with cube
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+POSTHOOK: query: -- Case 4: column stats, hash aggregation, grouping sets - cardinality = 16
+-- Case 8: column stats, grouping sets - cardinality = 16
explain select state,locid from loc_orc group by state,locid with cube
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -726,25 +782,84 @@ STAGE PLANS:
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
- Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 35 Data size: 6125 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 35 Data size: 3150 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- ndvProduct becomes 0 as zip does not have column stats
+-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4
+-- Case 9: column stats, NO grouping sets - caridnality = 2
+explain select state,zip from loc_orc group by state,zip
+PREHOOK: type: QUERY
+POSTHOOK: query: -- ndvProduct becomes 0 as zip does not have column stats
+-- Case 3: column stats, hash aggregation, NO grouping sets - cardinality = 4
+-- Case 9: column stats, NO grouping sets - caridnality = 2
+explain select state,zip from loc_orc group by state,zip
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
+ Select Operator
+ expressions: state (type: string), zip (type: bigint)
+ outputColumnNames: state, zip
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
+ Group By Operator
+ keys: state (type: string), zip (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+ Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column stats: PARTIAL
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 35 Data size: 3150 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 172 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -756,10 +871,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32
+-- Case 7: NO column stats - cardinality = 16
explain select state,locid from loc_orc group by state,locid with cube
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32
+-- Case 7: NO column stats - cardinality = 16
explain select state,locid from loc_orc group by state,locid with cube
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -811,10 +928,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24
+-- Case 7: NO column stats - cardinality = 12
explain select state,locid from loc_orc group by state,locid with rollup
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24
+-- Case 7: NO column stats - cardinality = 12
explain select state,locid from loc_orc group by state,locid with rollup
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -866,10 +985,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8
+-- Case 7: NO column stats - cardinality = 4
explain select state,locid from loc_orc group by state,locid grouping sets((state))
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8
+-- Case 7: NO column stats - cardinality = 4
explain select state,locid from loc_orc group by state,locid grouping sets((state))
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -921,10 +1042,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 16
+-- Case 7: NO column stats - cardinality = 8
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid))
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 16
+-- Case 7: NO column stats - cardinality = 8
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid))
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -976,10 +1099,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24
+-- Case 7: NO column stats - cardinality = 12
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),())
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 24
+-- Case 7: NO column stats - cardinality = 12
explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),())
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -1031,10 +1156,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32
+-- Case 7: NO column stats - cardinality = 16
explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),())
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32
+-- Case 7: NO column stats - cardinality = 16
explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),())
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -1086,12 +1213,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction)
--- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
+PREHOOK: query: -- Case 1: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8
+-- Case 7: NO column stats - cardinality = 4
explain select year from loc_orc group by year
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction)
--- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
+POSTHOOK: query: -- Case 1: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 8
+-- Case 7: NO column stats - cardinality = 4
explain select year from loc_orc group by year
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -1113,25 +1240,25 @@ STAGE PLANS:
keys: year (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 80 Data size: 7960 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 80 Data size: 7960 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -1143,10 +1270,12 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+PREHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32
+-- Case 7: NO column stats - cardinality = 16
explain select state,locid from loc_orc group by state,locid with cube
PREHOOK: type: QUERY
-POSTHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
+POSTHOOK: query: -- Case 2: NO column stats, NO hash aggregation, NO grouping sets - cardinality = 32
+-- Case 7: NO column stats - cardinality = 16
explain select state,locid from loc_orc group by state,locid with cube
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -1168,25 +1297,25 @@ STAGE PLANS:
keys: state (type: string), locid (type: int), '0' (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
- Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Modified: hive/branches/spark/ql/src/test/results/clientpositive/annotate_stats_part.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/annotate_stats_part.q.out?rev=1629563&r1=1629562&r2=1629563&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/annotate_stats_part.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/annotate_stats_part.q.out Mon Oct 6 04:00:39 2014
@@ -56,11 +56,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL
ListSink
PREHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging
@@ -98,11 +98,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 5 Data size: 724 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 5 Data size: 724 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- partition level analyze statistics for specific parition
@@ -135,11 +135,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 323 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2 Data size: 325 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 323 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE
@@ -158,11 +158,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 9 Data size: 724 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 9 Data size: 724 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE
@@ -181,11 +181,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- partition level analyze statistics for all partitions
@@ -222,11 +222,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE
@@ -245,11 +245,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE
@@ -268,11 +268,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- both partitions will be pruned
@@ -293,14 +293,14 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL
Filter Operator
predicate: ((year = '2001') and (year = '__HIVE_DEFAULT_PARTITION__')) (type: boolean)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL
ListSink
PREHOOK: query: -- partition level partial column statistics
@@ -322,33 +322,21 @@ POSTHOOK: query: -- basicStatState: COMP
explain select zip from loc_orc
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc
- Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: zip (type: bigint)
- outputColumnNames: _col0
- Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: zip (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: NONE
+ ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL
explain select state from loc_orc
@@ -357,33 +345,44 @@ POSTHOOK: query: -- basicStatState: COMP
explain select state from loc_orc
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc
- Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL
- Select Operator
- expressions: state (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
- File Output Operator
- compressed: false
- Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
+ Select Operator
+ expressions: state (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
+ ListSink
+PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE
+explain select year from loc_orc
+PREHOOK: type: QUERY
+POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE
+explain select year from loc_orc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: year (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8 Data size: 1472 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
PREHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL
-- basicStatState: COMPLETE colStatState: PARTIAL
@@ -394,33 +393,21 @@ POSTHOOK: query: -- column statistics fo
explain select state,locid from loc_orc
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc
- Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL
- Select Operator
- expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL
- File Output Operator
- compressed: false
- Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: PARTIAL
+ ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE
explain select state,locid from loc_orc where year='2001'
@@ -429,33 +416,21 @@ POSTHOOK: query: -- basicStatState: COMP
explain select state,locid from loc_orc where year='2001'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc
- Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 7 Data size: 630 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 7 Data size: 630 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7 Data size: 630 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE
explain select state,locid from loc_orc where year!='2001'
@@ -464,33 +439,21 @@ POSTHOOK: query: -- basicStatState: COMP
explain select state,locid from loc_orc where year!='2001'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc
- Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: state (type: string), locid (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: loc_orc
+ Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: state (type: string), locid (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 323 Basic stats: COMPLETE Column stats: NONE
+ ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL
explain select * from loc_orc
@@ -508,11 +471,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 8 Data size: 724 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- This is to test filter expression evaluation on partition column
@@ -533,7 +496,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (locid > 0) (type: boolean)
Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
@@ -569,10 +532,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (locid > 0) (type: boolean)
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: locid (type: int), year (type: string)
outputColumnNames: _col0, _col1
@@ -605,17 +568,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (locid > 0) (type: boolean)
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: locid (type: int), year (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat