You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/12/29 20:52:28 UTC

hive git commit: HIVE-15493: Wrong result for LEFT outer join in Tez using MapJoinOperator (Jesus Camacho Rodriguez, reviewed by Pengcheng Xiong)

Repository: hive
Updated Branches:
  refs/heads/master fb548b27c -> 4486f2a94


HIVE-15493: Wrong result for LEFT outer join in Tez using MapJoinOperator (Jesus Camacho Rodriguez, reviewed by Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4486f2a9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4486f2a9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4486f2a9

Branch: refs/heads/master
Commit: 4486f2a94746c035094001e127807c8d2e82b49a
Parents: fb548b2
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Dec 22 00:43:18 2016 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Dec 29 20:42:45 2016 +0000

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  12 --
 ql/src/test/queries/clientpositive/mapjoin3.q   |  55 +++++
 .../results/clientpositive/llap/mapjoin3.q.out  | 200 +++++++++++++++++++
 .../test/results/clientpositive/mapjoin3.q.out  | 196 ++++++++++++++++++
 5 files changed, 452 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4486f2a9/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 623e7c1..1cebc70 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -185,6 +185,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   load_dyn_part3.q,\
   lvj_mapjoin.q,\
   mapjoin2.q,\
+  mapjoin3.q,\
   mapjoin_decimal.q,\
   mapjoin_mapjoin.q,\
   mapjoin46.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/4486f2a9/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 71d34eb..863b153 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -8062,18 +8062,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         index[i] = kindex;
         continue;
       }
-      int vindex;
-      if (exprBack == null) {
-        vindex = -1;
-      } else if (ExprNodeDescUtils.isConstant(exprBack)) {
-        vindex = reduceValuesBack.indexOf(exprBack);
-      } else {
-        vindex = ExprNodeDescUtils.indexOf(exprBack, reduceValuesBack);
-      }
-      if (vindex >= 0) {
-        index[i] = -vindex - 1;
-        continue;
-      }
       index[i] = -reduceValues.size() - 1;
       String outputColName = getColumnInternalName(reduceValues.size());
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4486f2a9/ql/src/test/queries/clientpositive/mapjoin3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mapjoin3.q b/ql/src/test/queries/clientpositive/mapjoin3.q
new file mode 100644
index 0000000..0a4a5d3
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/mapjoin3.q
@@ -0,0 +1,55 @@
+set hive.auto.convert.join=true;
+
+DROP TABLE IF EXISTS test_1; 
+CREATE TABLE test_1 
+( 
+member BIGINT 
+, age VARCHAR (100) 
+) 
+STORED AS TEXTFILE 
+;
+DROP TABLE IF EXISTS test_2; 
+CREATE TABLE test_2 
+( 
+member BIGINT 
+) 
+STORED AS TEXTFILE 
+;
+INSERT INTO test_1 VALUES (1, '20'), (2, '30'), (3, '40'); 
+INSERT INTO test_2 VALUES (1), (2), (3);
+
+EXPLAIN
+SELECT 
+t2.member 
+, t1.age_1 
+, t1.age_2 
+FROM 
+test_2 t2 
+LEFT JOIN ( 
+SELECT 
+member 
+, age as age_1 
+, age as age_2 
+FROM 
+test_1 
+) t1 
+ON t2.member = t1.member 
+;
+
+SELECT 
+t2.member 
+, t1.age_1 
+, t1.age_2 
+FROM 
+test_2 t2 
+LEFT JOIN ( 
+SELECT 
+member 
+, age as age_1 
+, age as age_2 
+FROM 
+test_1 
+) t1 
+ON t2.member = t1.member 
+;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/4486f2a9/ql/src/test/results/clientpositive/llap/mapjoin3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin3.q.out b/ql/src/test/results/clientpositive/llap/mapjoin3.q.out
new file mode 100644
index 0000000..cf1bebb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/mapjoin3.q.out
@@ -0,0 +1,200 @@
+PREHOOK: query: DROP TABLE IF EXISTS test_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS test_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE test_1 
+( 
+member BIGINT 
+, age VARCHAR (100) 
+) 
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_1
+POSTHOOK: query: CREATE TABLE test_1 
+( 
+member BIGINT 
+, age VARCHAR (100) 
+) 
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_1
+PREHOOK: query: DROP TABLE IF EXISTS test_2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS test_2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE test_2 
+( 
+member BIGINT 
+) 
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_2
+POSTHOOK: query: CREATE TABLE test_2 
+( 
+member BIGINT 
+) 
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_2
+PREHOOK: query: INSERT INTO test_1 VALUES (1, '20'), (2, '30'), (3, '40')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test_1
+POSTHOOK: query: INSERT INTO test_1 VALUES (1, '20'), (2, '30'), (3, '40')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test_1
+POSTHOOK: Lineage: test_1.age EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: test_1.member EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT INTO test_2 VALUES (1), (2), (3)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test_2
+POSTHOOK: query: INSERT INTO test_2 VALUES (1), (2), (3)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test_2
+POSTHOOK: Lineage: test_2.member EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: EXPLAIN
+SELECT 
+t2.member 
+, t1.age_1 
+, t1.age_2 
+FROM 
+test_2 t2 
+LEFT JOIN ( 
+SELECT 
+member 
+, age as age_1 
+, age as age_2 
+FROM 
+test_1 
+) t1 
+ON t2.member = t1.member
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT 
+t2.member 
+, t1.age_1 
+, t1.age_2 
+FROM 
+test_2 t2 
+LEFT JOIN ( 
+SELECT 
+member 
+, age as age_1 
+, age as age_2 
+FROM 
+test_1 
+) t1 
+ON t2.member = t1.member
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t2
+                  Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: member (type: bigint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      keys:
+                        0 _col0 (type: bigint)
+                        1 _col0 (type: bigint)
+                      outputColumnNames: _col0, _col2, _col3
+                      input vertices:
+                        1 Map 2
+                      Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: bigint), _col2 (type: varchar(100)), _col3 (type: varchar(100))
+                        outputColumnNames: _col0, _col1, _col2
+                        Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+                          table:
+                              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: test_1
+                  Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: member (type: bigint), age (type: varchar(100)), age (type: varchar(100))
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: varchar(100)), _col2 (type: varchar(100))
+            Execution mode: llap
+            LLAP IO: no inputs
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT 
+t2.member 
+, t1.age_1 
+, t1.age_2 
+FROM 
+test_2 t2 
+LEFT JOIN ( 
+SELECT 
+member 
+, age as age_1 
+, age as age_2 
+FROM 
+test_1 
+) t1 
+ON t2.member = t1.member
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_1
+PREHOOK: Input: default@test_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT 
+t2.member 
+, t1.age_1 
+, t1.age_2 
+FROM 
+test_2 t2 
+LEFT JOIN ( 
+SELECT 
+member 
+, age as age_1 
+, age as age_2 
+FROM 
+test_1 
+) t1 
+ON t2.member = t1.member
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_1
+POSTHOOK: Input: default@test_2
+#### A masked pattern was here ####
+1	20	20
+2	30	30
+3	40	40

http://git-wip-us.apache.org/repos/asf/hive/blob/4486f2a9/ql/src/test/results/clientpositive/mapjoin3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/mapjoin3.q.out b/ql/src/test/results/clientpositive/mapjoin3.q.out
new file mode 100644
index 0000000..e0aa2d3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/mapjoin3.q.out
@@ -0,0 +1,196 @@
+PREHOOK: query: DROP TABLE IF EXISTS test_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS test_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE test_1 
+( 
+member BIGINT 
+, age VARCHAR (100) 
+) 
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_1
+POSTHOOK: query: CREATE TABLE test_1 
+( 
+member BIGINT 
+, age VARCHAR (100) 
+) 
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_1
+PREHOOK: query: DROP TABLE IF EXISTS test_2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS test_2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE test_2 
+( 
+member BIGINT 
+) 
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_2
+POSTHOOK: query: CREATE TABLE test_2 
+( 
+member BIGINT 
+) 
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_2
+PREHOOK: query: INSERT INTO test_1 VALUES (1, '20'), (2, '30'), (3, '40')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test_1
+POSTHOOK: query: INSERT INTO test_1 VALUES (1, '20'), (2, '30'), (3, '40')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test_1
+POSTHOOK: Lineage: test_1.age EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: test_1.member EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT INTO test_2 VALUES (1), (2), (3)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test_2
+POSTHOOK: query: INSERT INTO test_2 VALUES (1), (2), (3)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test_2
+POSTHOOK: Lineage: test_2.member EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: EXPLAIN
+SELECT 
+t2.member 
+, t1.age_1 
+, t1.age_2 
+FROM 
+test_2 t2 
+LEFT JOIN ( 
+SELECT 
+member 
+, age as age_1 
+, age as age_2 
+FROM 
+test_1 
+) t1 
+ON t2.member = t1.member
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT 
+t2.member 
+, t1.age_1 
+, t1.age_2 
+FROM 
+test_2 t2 
+LEFT JOIN ( 
+SELECT 
+member 
+, age as age_1 
+, age as age_2 
+FROM 
+test_1 
+) t1 
+ON t2.member = t1.member
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_1:test_1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_1:test_1 
+          TableScan
+            alias: test_1
+            Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: member (type: bigint), age (type: varchar(100)), age (type: varchar(100))
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                keys:
+                  0 _col0 (type: bigint)
+                  1 _col0 (type: bigint)
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t2
+            Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: member (type: bigint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col0 (type: bigint)
+                  1 _col0 (type: bigint)
+                outputColumnNames: _col0, _col2, _col3
+                Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), _col2 (type: varchar(100)), _col3 (type: varchar(100))
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT 
+t2.member 
+, t1.age_1 
+, t1.age_2 
+FROM 
+test_2 t2 
+LEFT JOIN ( 
+SELECT 
+member 
+, age as age_1 
+, age as age_2 
+FROM 
+test_1 
+) t1 
+ON t2.member = t1.member
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_1
+PREHOOK: Input: default@test_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT 
+t2.member 
+, t1.age_1 
+, t1.age_2 
+FROM 
+test_2 t2 
+LEFT JOIN ( 
+SELECT 
+member 
+, age as age_1 
+, age as age_2 
+FROM 
+test_1 
+) t1 
+ON t2.member = t1.member
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_1
+POSTHOOK: Input: default@test_2
+#### A masked pattern was here ####
+1	20	20
+2	30	30
+3	40	40