You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jp...@apache.org on 2015/11/18 00:08:27 UTC

hive git commit: Bug: HIVE-12384 - Union Operator may produce incorrect result on TEZ (Laljo John Pullokkaran reviewed by Sergey Shelukhin, Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master ce6293b3d -> bce63d58f


Bug: HIVE-12384 - Union Operator may produce incorrect result on TEZ (Laljo John Pullokkaran reviewed by Sergey Shelukhin, Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bce63d58
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bce63d58
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bce63d58

Branch: refs/heads/master
Commit: bce63d58f9a42f03a1dd01948bdc9e697ce01ced
Parents: ce6293b
Author: jpullokk <jp...@apache.org>
Authored: Tue Nov 17 15:06:35 2015 -0800
Committer: jpullokk <jp...@apache.org>
Committed: Tue Nov 17 15:06:35 2015 -0800

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  | 10 +++---
 .../queries/clientpositive/union_type_chk.q     |  6 ++++
 .../cbo_rp_annotate_stats_groupby.q.out         | 32 ++++++++++----------
 .../clientpositive/tez/union_type_chk.q.out     | 28 +++++++++++++++++
 .../results/clientpositive/union_type_chk.q.out | 28 +++++++++++++++++
 5 files changed, 83 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/bce63d58/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 1ca113c..5323a7d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -8991,21 +8991,21 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       unionoutRR.put(unionalias, field, unionColInfo);
     }
 
-    // For Spark, we rely on the generated SelectOperator to do the type casting.
+    // For Spark,TEZ we rely on the generated SelectOperator to do the type casting.
     // Consider:
     //    SEL_1 (int)   SEL_2 (int)    SEL_3 (double)
     // If we first merge SEL_1 and SEL_2 into a UNION_1, and then merge UNION_1
     // with SEL_3 to get UNION_2, then no SelectOperator will be inserted. Hence error
     // will happen afterwards. The solution here is to insert one after UNION_1, which
     // cast int to double.
-    boolean isSpark = HiveConf.getVar(conf,
-        HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark");
+    boolean isMR = HiveConf.getVar(conf,
+        HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr");
 
-    if (isSpark || !(leftOp instanceof UnionOperator)) {
+    if (!isMR || !(leftOp instanceof UnionOperator)) {
       leftOp = genInputSelectForUnion(leftOp, leftmap, leftalias, unionoutRR, unionalias);
     }
 
-    if (isSpark || !(rightOp instanceof UnionOperator)) {
+    if (!isMR || !(rightOp instanceof UnionOperator)) {
       rightOp = genInputSelectForUnion(rightOp, rightmap, rightalias, unionoutRR, unionalias);
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/bce63d58/ql/src/test/queries/clientpositive/union_type_chk.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_type_chk.q b/ql/src/test/queries/clientpositive/union_type_chk.q
new file mode 100644
index 0000000..0c8282a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_type_chk.q
@@ -0,0 +1,6 @@
+set hive.cbo.enable=false;
+set hive.execution.engine=mr;
+
+select (x/sum(x) over())  as y from(select cast(1 as decimal(10,0))  as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y;
+
+select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0))  as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y;

http://git-wip-us.apache.org/repos/asf/hive/blob/bce63d58/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
index b47a3b3..7bdf06b 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
@@ -154,7 +154,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: sq1:loc_orc
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
+            Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
             Select Operator
               expressions: state (type: string), locid (type: int)
               outputColumnNames: state, locid
@@ -248,7 +248,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: year (type: int)
               outputColumnNames: year
@@ -301,7 +301,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
               outputColumnNames: state, locid
@@ -354,7 +354,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
               outputColumnNames: state, locid
@@ -411,7 +411,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
               outputColumnNames: state, locid
@@ -468,7 +468,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
               outputColumnNames: state, locid
@@ -525,7 +525,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
               outputColumnNames: state, locid
@@ -582,7 +582,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
               outputColumnNames: state, locid
@@ -639,7 +639,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
               outputColumnNames: state, locid
@@ -700,7 +700,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: year (type: int)
               outputColumnNames: year
@@ -709,21 +709,21 @@ STAGE PLANS:
                 keys: year (type: int)
                 mode: hash
                 outputColumnNames: _col0
-                Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
           keys: KEY._col0 (type: int)
           mode: mergepartial
           outputColumnNames: year
-          Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -753,7 +753,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
             Select Operator
               expressions: state (type: string), locid (type: int)
               outputColumnNames: state, locid
@@ -812,7 +812,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: loc_orc
-            Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: PARTIAL
+            Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
             Select Operator
               expressions: state (type: string), zip (type: bigint)
               outputColumnNames: state, zip

http://git-wip-us.apache.org/repos/asf/hive/blob/bce63d58/ql/src/test/results/clientpositive/tez/union_type_chk.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union_type_chk.q.out b/ql/src/test/results/clientpositive/tez/union_type_chk.q.out
new file mode 100644
index 0000000..12f060b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/union_type_chk.q.out
@@ -0,0 +1,28 @@
+PREHOOK: query: select (x/sum(x) over())  as y from(select cast(1 as decimal(10,0))  as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select (x/sum(x) over())  as y from(select cast(1 as decimal(10,0))  as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4.999999900000002E-9
+4.999999900000002E-9
+4.999999900000002E-9
+4.999999900000002E-9
+0.4999999900000002
+0.4999999900000002
+PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0))  as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0))  as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL
+NULL
+0.25
+0.25
+0.25
+0.25

http://git-wip-us.apache.org/repos/asf/hive/blob/bce63d58/ql/src/test/results/clientpositive/union_type_chk.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_type_chk.q.out b/ql/src/test/results/clientpositive/union_type_chk.q.out
new file mode 100644
index 0000000..12f060b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/union_type_chk.q.out
@@ -0,0 +1,28 @@
+PREHOOK: query: select (x/sum(x) over())  as y from(select cast(1 as decimal(10,0))  as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select (x/sum(x) over())  as y from(select cast(1 as decimal(10,0))  as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u order by y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4.999999900000002E-9
+4.999999900000002E-9
+4.999999900000002E-9
+4.999999900000002E-9
+0.4999999900000002
+0.4999999900000002
+PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0))  as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0))  as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u order by y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL
+NULL
+0.25
+0.25
+0.25
+0.25