You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2014/11/20 01:49:37 UTC

svn commit: r1640651 - in /hive/trunk: data/files/ itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/tez/

Author: prasanthj
Date: Thu Nov 20 00:49:37 2014
New Revision: 1640651

URL: http://svn.apache.org/r1640651
Log:
HIVE-8888: Mapjoin with LateralViewJoin generates wrong plan in Tez (Prasanth J reviewed by Gunther Hagleitner)

Added:
    hive/trunk/data/files/sour1.txt
    hive/trunk/data/files/sour2.txt
    hive/trunk/ql/src/test/queries/clientpositive/lvj_mapjoin.q
    hive/trunk/ql/src/test/results/clientpositive/tez/lvj_mapjoin.q.out
Modified:
    hive/trunk/itests/src/test/resources/testconfiguration.properties
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java

Added: hive/trunk/data/files/sour1.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/sour1.txt?rev=1640651&view=auto
==============================================================================
--- hive/trunk/data/files/sour1.txt (added)
+++ hive/trunk/data/files/sour1.txt Thu Nov 20 00:49:37 2014
@@ -0,0 +1,3 @@
+1,a1,a11,a111
+2,a2,a22,a222
+3,a3,a33,a333

Added: hive/trunk/data/files/sour2.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/sour2.txt?rev=1640651&view=auto
==============================================================================
--- hive/trunk/data/files/sour2.txt (added)
+++ hive/trunk/data/files/sour2.txt Thu Nov 20 00:49:37 2014
@@ -0,0 +1,3 @@
+1,b1,b11,b111
+2,b2,b22,b222
+4,b4,b44,b444

Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/trunk/itests/src/test/resources/testconfiguration.properties?rev=1640651&r1=1640650&r2=1640651&view=diff
==============================================================================
--- hive/trunk/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/trunk/itests/src/test/resources/testconfiguration.properties Thu Nov 20 00:49:37 2014
@@ -269,6 +269,7 @@ minitez.query.files=bucket_map_join_tez1
   dynamic_partition_pruning.q,\
   dynamic_partition_pruning_2.q,\
   mapjoin_decimal.q,\
+  lvj_mapjoin.q, \
   mrr.q,\
   tez_bmj_schema_evolution.q,\
   tez_dml.q,\

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java?rev=1640651&r1=1640650&r2=1640651&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java Thu Nov 20 00:49:37 2014
@@ -18,13 +18,6 @@
 
 package org.apache.hadoop.hive.ql.parse;
 
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Stack;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -50,6 +43,13 @@ import org.apache.hadoop.hive.ql.plan.Te
 import org.apache.hadoop.hive.ql.plan.TezWork.VertexType;
 import org.apache.hadoop.hive.ql.plan.UnionWork;
 
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Stack;
+
 /**
  * GenTezWork separates the operator tree into tez tasks.
  * It is called once per leaf operator (operator that forces
@@ -109,10 +109,14 @@ public class GenTezWork implements NodeP
       // operator graph. There's typically two reasons for that: a) mux/demux
       // b) multi insert. Mux/Demux will hit the same leaf again, multi insert
       // will result into a vertex with multiple FS or RS operators.
-
-      // At this point we don't have to do anything special in this case. Just
-      // run through the regular paces w/o creating a new task.
-      work = context.rootToWorkMap.get(root);
+      if (context.childToWorkMap.containsKey(operator)) {
+        // if we've seen both root and child, we can bail.
+        return null;
+      } else {
+        // At this point we don't have to do anything special in this case. Just
+        // run through the regular paces w/o creating a new task.
+        work = context.rootToWorkMap.get(root);
+      }
     } else {
       // create a new vertex
       if (context.preceedingWork == null) {

Added: hive/trunk/ql/src/test/queries/clientpositive/lvj_mapjoin.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/lvj_mapjoin.q?rev=1640651&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/lvj_mapjoin.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/lvj_mapjoin.q Thu Nov 20 00:49:37 2014
@@ -0,0 +1,37 @@
+-- SORT_QUERY_RESULTS
+
+drop table sour1;
+drop table sour2;
+drop table expod1;
+drop table expod2;
+
+set hive.auto.convert.join=true;
+
+create table sour1(id int, av1 string, av2 string, av3 string) row format delimited fields terminated by ',';
+create table sour2(id int, bv1 string, bv2 string, bv3 string) row format delimited fields terminated by ',';
+
+load data local inpath '../../data/files/sour1.txt' into table sour1;
+load data local inpath '../../data/files//sour2.txt' into table sour2;
+
+create table expod1(aid int, av array<string>);
+create table expod2(bid int, bv array<string>);
+
+insert overwrite table expod1 select id, array(av1,av2,av3) from sour1;
+insert overwrite table expod2 select id, array(bv1,bv2,bv3) from sour2;
+
+explain with sub1 as
+(select aid, avalue from expod1 lateral view explode(av) avs as avalue ),
+sub2 as
+(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue)
+select sub1.aid, sub1.avalue, sub2.bvalue
+from sub1,sub2
+where sub1.aid=sub2.bid;
+
+with sub1 as
+(select aid, avalue from expod1 lateral view explode(av) avs as avalue ),
+sub2 as
+(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue)
+select sub1.aid, sub1.avalue, sub2.bvalue
+from sub1,sub2
+where sub1.aid=sub2.bid;
+

Added: hive/trunk/ql/src/test/results/clientpositive/tez/lvj_mapjoin.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/lvj_mapjoin.q.out?rev=1640651&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/lvj_mapjoin.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/lvj_mapjoin.q.out Thu Nov 20 00:49:37 2014
@@ -0,0 +1,298 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table sour1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table sour1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table sour2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table sour2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table expod1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table expod1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table expod2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table expod2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table sour1(id int, av1 string, av2 string, av3 string) row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sour1
+POSTHOOK: query: create table sour1(id int, av1 string, av2 string, av3 string) row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sour1
+PREHOOK: query: create table sour2(id int, bv1 string, bv2 string, bv3 string) row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sour2
+POSTHOOK: query: create table sour2(id int, bv1 string, bv2 string, bv3 string) row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sour2
+PREHOOK: query: load data local inpath '../../data/files/sour1.txt' into table sour1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@sour1
+POSTHOOK: query: load data local inpath '../../data/files/sour1.txt' into table sour1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@sour1
+PREHOOK: query: load data local inpath '../../data/files//sour2.txt' into table sour2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@sour2
+POSTHOOK: query: load data local inpath '../../data/files//sour2.txt' into table sour2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@sour2
+PREHOOK: query: create table expod1(aid int, av array<string>)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@expod1
+POSTHOOK: query: create table expod1(aid int, av array<string>)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@expod1
+PREHOOK: query: create table expod2(bid int, bv array<string>)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@expod2
+POSTHOOK: query: create table expod2(bid int, bv array<string>)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@expod2
+PREHOOK: query: insert overwrite table expod1 select id, array(av1,av2,av3) from sour1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sour1
+PREHOOK: Output: default@expod1
+POSTHOOK: query: insert overwrite table expod1 select id, array(av1,av2,av3) from sour1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sour1
+POSTHOOK: Output: default@expod1
+POSTHOOK: Lineage: expod1.aid SIMPLE [(sour1)sour1.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: expod1.av EXPRESSION [(sour1)sour1.FieldSchema(name:av1, type:string, comment:null), (sour1)sour1.FieldSchema(name:av2, type:string, comment:null), (sour1)sour1.FieldSchema(name:av3, type:string, comment:null), ]
+PREHOOK: query: insert overwrite table expod2 select id, array(bv1,bv2,bv3) from sour2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sour2
+PREHOOK: Output: default@expod2
+POSTHOOK: query: insert overwrite table expod2 select id, array(bv1,bv2,bv3) from sour2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sour2
+POSTHOOK: Output: default@expod2
+POSTHOOK: Lineage: expod2.bid SIMPLE [(sour2)sour2.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: expod2.bv EXPRESSION [(sour2)sour2.FieldSchema(name:bv1, type:string, comment:null), (sour2)sour2.FieldSchema(name:bv2, type:string, comment:null), (sour2)sour2.FieldSchema(name:bv3, type:string, comment:null), ]
+PREHOOK: query: explain with sub1 as
+(select aid, avalue from expod1 lateral view explode(av) avs as avalue ),
+sub2 as
+(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue)
+select sub1.aid, sub1.avalue, sub2.bvalue
+from sub1,sub2
+where sub1.aid=sub2.bid
+PREHOOK: type: QUERY
+POSTHOOK: query: explain with sub1 as
+(select aid, avalue from expod1 lateral view explode(av) avs as avalue ),
+sub2 as
+(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue)
+select sub1.aid, sub1.avalue, sub2.bvalue
+from sub1,sub2
+where sub1.aid=sub2.bid
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: expod2
+                  Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: bid is not null (type: boolean)
+                    Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                    Lateral View Forward
+                      Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: bid (type: int)
+                        outputColumnNames: bid
+                        Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                        Lateral View Join Operator
+                          outputColumnNames: _col0, _col5
+                          Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                          Select Operator
+                            expressions: _col0 (type: int), _col5 (type: string)
+                            outputColumnNames: _col0, _col1
+                            Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                            Reduce Output Operator
+                              key expressions: _col0 (type: int)
+                              sort order: +
+                              Map-reduce partition columns: _col0 (type: int)
+                              Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                              value expressions: _col1 (type: string)
+                      Select Operator
+                        expressions: bv (type: array<string>)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                        UDTF Operator
+                          Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                          function name: explode
+                          Lateral View Join Operator
+                            outputColumnNames: _col0, _col5
+                            Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                            Select Operator
+                              expressions: _col0 (type: int), _col5 (type: string)
+                              outputColumnNames: _col0, _col1
+                              Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                              Reduce Output Operator
+                                key expressions: _col0 (type: int)
+                                sort order: +
+                                Map-reduce partition columns: _col0 (type: int)
+                                Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                                value expressions: _col1 (type: string)
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: expod1
+                  Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: aid is not null (type: boolean)
+                    Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                    Lateral View Forward
+                      Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: aid (type: int)
+                        outputColumnNames: aid
+                        Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                        Lateral View Join Operator
+                          outputColumnNames: _col0, _col5
+                          Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                          Select Operator
+                            expressions: _col0 (type: int), _col5 (type: string)
+                            outputColumnNames: _col0, _col1
+                            Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                            Map Join Operator
+                              condition map:
+                                   Inner Join 0 to 1
+                              condition expressions:
+                                0 {_col0} {_col1}
+                                1 {_col0} {_col1}
+                              keys:
+                                0 _col0 (type: int)
+                                1 _col0 (type: int)
+                              outputColumnNames: _col0, _col1, _col2, _col3
+                              input vertices:
+                                1 Map 1
+                              Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                              Filter Operator
+                                predicate: (_col0 = _col2) (type: boolean)
+                                Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                                Select Operator
+                                  expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
+                                  outputColumnNames: _col0, _col1, _col2
+                                  Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                                  File Output Operator
+                                    compressed: false
+                                    Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                                    table:
+                                        input format: org.apache.hadoop.mapred.TextInputFormat
+                                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      Select Operator
+                        expressions: av (type: array<string>)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                        UDTF Operator
+                          Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                          function name: explode
+                          Lateral View Join Operator
+                            outputColumnNames: _col0, _col5
+                            Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                            Select Operator
+                              expressions: _col0 (type: int), _col5 (type: string)
+                              outputColumnNames: _col0, _col1
+                              Statistics: Num rows: 4 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                              Map Join Operator
+                                condition map:
+                                     Inner Join 0 to 1
+                                condition expressions:
+                                  0 {_col0} {_col1}
+                                  1 {_col0} {_col1}
+                                keys:
+                                  0 _col0 (type: int)
+                                  1 _col0 (type: int)
+                                outputColumnNames: _col0, _col1, _col2, _col3
+                                input vertices:
+                                  1 Map 1
+                                Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                                Filter Operator
+                                  predicate: (_col0 = _col2) (type: boolean)
+                                  Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                                  Select Operator
+                                    expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
+                                    outputColumnNames: _col0, _col1, _col2
+                                    Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                                    File Output Operator
+                                      compressed: false
+                                      Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+                                      table:
+                                          input format: org.apache.hadoop.mapred.TextInputFormat
+                                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: with sub1 as
+(select aid, avalue from expod1 lateral view explode(av) avs as avalue ),
+sub2 as
+(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue)
+select sub1.aid, sub1.avalue, sub2.bvalue
+from sub1,sub2
+where sub1.aid=sub2.bid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@expod1
+PREHOOK: Input: default@expod2
+#### A masked pattern was here ####
+POSTHOOK: query: with sub1 as
+(select aid, avalue from expod1 lateral view explode(av) avs as avalue ),
+sub2 as
+(select bid, bvalue from expod2 lateral view explode(bv) bvs as bvalue)
+select sub1.aid, sub1.avalue, sub2.bvalue
+from sub1,sub2
+where sub1.aid=sub2.bid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@expod1
+POSTHOOK: Input: default@expod2
+#### A masked pattern was here ####
+1	a1	b1
+1	a1	b11
+1	a1	b111
+1	a11	b1
+1	a11	b11
+1	a11	b111
+1	a111	b1
+1	a111	b11
+1	a111	b111
+2	a2	b2
+2	a2	b22
+2	a2	b222
+2	a22	b2
+2	a22	b22
+2	a22	b222
+2	a222	b2
+2	a222	b22
+2	a222	b222