You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/07/03 17:48:05 UTC
[37/46] hive git commit: HIVE-19326 : stats auto gather: incorrect
aggregation during UNION queries (may lead to incorrect results) (Zoltan
Haindrich via Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/results/clientpositive/union_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_stats.q.out b/ql/src/test/results/clientpositive/union_stats.q.out
index 73e880b..1acf5c8 100644
--- a/ql/src/test/results/clientpositive/union_stats.q.out
+++ b/ql/src/test/results/clientpositive/union_stats.q.out
@@ -454,6 +454,249 @@ POSTHOOK: query: create table t2 like src
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t2
+PREHOOK: query: explain from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+PREHOOK: type: QUERY
+POSTHOOK: query: explain from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-3 depends on stages: Stage-0, Stage-10
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
+ Stage-1 depends on stages: Stage-2
+ Stage-9 depends on stages: Stage-1, Stage-10
+ Stage-10 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t2
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t2
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.t1
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t2
+
+ Stage: Stage-9
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: string, string
+ Table: default.t2
+
+ Stage: Stage-10
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: from (select * from src union all select * from src)s
insert overwrite table t1 select *
insert overwrite table t2 select *
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
index 3b10987..09dddac 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
@@ -68,6 +68,8 @@ public interface Deserializer {
/**
* Returns statistics collected when serializing
+ *
+ * @return {@link SerDeStats} object; or in case not supported: null
*/
SerDeStats getSerDeStats();
}