You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jp...@apache.org on 2015/11/18 00:08:27 UTC
hive git commit: Bug: HIVE-12384 - Union Operator may produce
incorrect result on TEZ (Laljo John Pullokkaran reviewed by Sergey Shelukhin,
Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master ce6293b3d -> bce63d58f
Bug: HIVE-12384 - Union Operator may produce incorrect result on TEZ (Laljo John Pullokkaran reviewed by Sergey Shelukhin, Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bce63d58
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bce63d58
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bce63d58
Branch: refs/heads/master
Commit: bce63d58f9a42f03a1dd01948bdc9e697ce01ced
Parents: ce6293b
Author: jpullokk <jp...@apache.org>
Authored: Tue Nov 17 15:06:35 2015 -0800
Committer: jpullokk <jp...@apache.org>
Committed: Tue Nov 17 15:06:35 2015 -0800
----------------------------------------------------------------------
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 10 +++---
.../queries/clientpositive/union_type_chk.q | 6 ++++
.../cbo_rp_annotate_stats_groupby.q.out | 32 ++++++++++----------
.../clientpositive/tez/union_type_chk.q.out | 28 +++++++++++++++++
.../results/clientpositive/union_type_chk.q.out | 28 +++++++++++++++++
5 files changed, 83 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bce63d58/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 1ca113c..5323a7d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -8991,21 +8991,21 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
unionoutRR.put(unionalias, field, unionColInfo);
}
- // For Spark, we rely on the generated SelectOperator to do the type casting.
+ // For Spark,TEZ we rely on the generated SelectOperator to do the type casting.
// Consider:
// SEL_1 (int) SEL_2 (int) SEL_3 (double)
// If we first merge SEL_1 and SEL_2 into a UNION_1, and then merge UNION_1
// with SEL_3 to get UNION_2, then no SelectOperator will be inserted. Hence error
// will happen afterwards. The solution here is to insert one after UNION_1, which
// cast int to double.
- boolean isSpark = HiveConf.getVar(conf,
- HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark");
+ boolean isMR = HiveConf.getVar(conf,
+ HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr");
- if (isSpark || !(leftOp instanceof UnionOperator)) {
+ if (!isMR || !(leftOp instanceof UnionOperator)) {
leftOp = genInputSelectForUnion(leftOp, leftmap, leftalias, unionoutRR, unionalias);
}
- if (isSpark || !(rightOp instanceof UnionOperator)) {
+ if (!isMR || !(rightOp instanceof UnionOperator)) {
rightOp = genInputSelectForUnion(rightOp, rightmap, rightalias, unionoutRR, unionalias);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/bce63d58/ql/src/test/queries/clientpositive/union_type_chk.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_type_chk.q b/ql/src/test/queries/clientpositive/union_type_chk.q
new file mode 100644
index 0000000..0c8282a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_type_chk.q
@@ -0,0 +1,6 @@
+set hive.cbo.enable=false;
+set hive.execution.engine=mr;
+
+select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y;
+
+select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y;
http://git-wip-us.apache.org/repos/asf/hive/blob/bce63d58/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
index b47a3b3..7bdf06b 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
@@ -154,7 +154,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: sq1:loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: state, locid
@@ -248,7 +248,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: year (type: int)
outputColumnNames: year
@@ -301,7 +301,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: state, locid
@@ -354,7 +354,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: state, locid
@@ -411,7 +411,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: state, locid
@@ -468,7 +468,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: state, locid
@@ -525,7 +525,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: state, locid
@@ -582,7 +582,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: state, locid
@@ -639,7 +639,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: state, locid
@@ -700,7 +700,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: year (type: int)
outputColumnNames: year
@@ -709,21 +709,21 @@ STAGE PLANS:
keys: year (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: year
- Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -753,7 +753,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: state, locid
@@ -812,7 +812,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), zip (type: bigint)
outputColumnNames: state, zip
http://git-wip-us.apache.org/repos/asf/hive/blob/bce63d58/ql/src/test/results/clientpositive/tez/union_type_chk.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union_type_chk.q.out b/ql/src/test/results/clientpositive/tez/union_type_chk.q.out
new file mode 100644
index 0000000..12f060b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/union_type_chk.q.out
@@ -0,0 +1,28 @@
+PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4.999999900000002E-9
+4.999999900000002E-9
+4.999999900000002E-9
+4.999999900000002E-9
+0.4999999900000002
+0.4999999900000002
+PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL
+NULL
+0.25
+0.25
+0.25
+0.25
http://git-wip-us.apache.org/repos/asf/hive/blob/bce63d58/ql/src/test/results/clientpositive/union_type_chk.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_type_chk.q.out b/ql/src/test/results/clientpositive/union_type_chk.q.out
new file mode 100644
index 0000000..12f060b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/union_type_chk.q.out
@@ -0,0 +1,28 @@
+PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4.999999900000002E-9
+4.999999900000002E-9
+4.999999900000002E-9
+4.999999900000002E-9
+0.4999999900000002
+0.4999999900000002
+PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL
+NULL
+0.25
+0.25
+0.25
+0.25