You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2014/12/30 01:59:25 UTC

svn commit: r1648457 - in /hive/trunk: itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ ql/src/test/results/clientpositive/tez/

Author: navis
Date: Tue Dec 30 00:59:25 2014
New Revision: 1648457

URL: http://svn.apache.org/r1648457
Log:
HIVE-9215 : Some mapjoin queries broken with IdentityProjectRemover with PPD (Navis reviewed by Xuefu Zhang and Szehon Ho)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/identity_project_remove_skip.q
    hive/trunk/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/identity_project_remove_skip.q.out
Modified:
    hive/trunk/itests/src/test/resources/testconfiguration.properties
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
    hive/trunk/ql/src/test/results/clientpositive/ppd_join4.q.out

Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/trunk/itests/src/test/resources/testconfiguration.properties?rev=1648457&r1=1648456&r2=1648457&view=diff
==============================================================================
--- hive/trunk/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/trunk/itests/src/test/resources/testconfiguration.properties Tue Dec 30 00:59:25 2014
@@ -99,6 +99,7 @@ minitez.query.files.shared=alter_merge_2
   groupby2.q,\
   groupby3.q,\
   having.q,\
+  identity_project_remove_skip.q\
   insert1.q,\
   insert_into1.q,\
   insert_into2.q,\

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java?rev=1648457&r1=1648456&r2=1648457&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java Tue Dec 30 00:59:25 2014
@@ -24,10 +24,13 @@ import java.util.List;
 import java.util.Map;
 import java.util.Stack;
 
+import com.google.common.base.Predicates;
+import com.google.common.collect.Iterators;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -91,6 +94,11 @@ public class IdentityProjectRemover impl
         return null;
       }
       Operator<? extends OperatorDesc> parent = parents.get(0);
+      if (parent instanceof ReduceSinkOperator && Iterators.any(sel.getChildOperators().iterator(),
+          Predicates.instanceOf(ReduceSinkOperator.class))) {
+        // For RS-SEL-RS case. reducer operator in reducer task cannot be null in task compiler
+        return null;
+      }
       if(sel.isIdentitySelect()) {
         parent.removeChildAndAdoptItsChildren(sel);
         LOG.debug("Identity project remover optimization removed : " + sel);

Added: hive/trunk/ql/src/test/queries/clientpositive/identity_project_remove_skip.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/identity_project_remove_skip.q?rev=1648457&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/identity_project_remove_skip.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/identity_project_remove_skip.q Tue Dec 30 00:59:25 2014
@@ -0,0 +1,20 @@
+set hive.optimize.remove.identity.project=true;
+set hive.auto.convert.join=true;
+set hive.optimize.ppd=true;
+
+explain
+select t2.* 
+from
+  (select key,value from (select key,value from src) t1 sort by key) t2
+  join 
+  (select * from src sort by key) t3 
+  on (t2.key=t3.key )
+  where t2.value='val_105' and t3.key='105';
+  
+select t2.* 
+from
+  (select key,value from (select key,value from src) t1 sort by key) t2
+  join 
+  (select * from src sort by key) t3 
+  on (t2.key=t3.key )
+  where t2.value='val_105' and t3.key='105';

Added: hive/trunk/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out?rev=1648457&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out Tue Dec 30 00:59:25 2014
@@ -0,0 +1,217 @@
+PREHOOK: query: explain
+select t2.* 
+from
+  (select key,value from (select key,value from src) t1 sort by key) t2
+  join 
+  (select * from src sort by key) t3 
+  on (t2.key=t3.key )
+  where t2.value='val_105' and t3.key='105'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select t2.* 
+from
+  (select key,value from (select key,value from src) t1 sort by key) t2
+  join 
+  (select * from src sort by key) t3 
+  on (t2.key=t3.key )
+  where t2.value='val_105' and t3.key='105'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-6 depends on stages: Stage-1, Stage-3 , consists of Stage-7, Stage-8, Stage-2
+  Stage-7 has a backup stage: Stage-2
+  Stage-4 depends on stages: Stage-7
+  Stage-8 has a backup stage: Stage-2
+  Stage-5 depends on stages: Stage-8
+  Stage-2
+  Stage-3 is a root stage
+  Stage-0 depends on stages: Stage-4, Stage-5, Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((key is not null and (value = 'val_105')) and (key = '105')) (type: boolean)
+              Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: '105' (type: string)
+                  sort order: +
+                  Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Select Operator
+          Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-6
+    Conditional Operator
+
+  Stage: Stage-7
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME1 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 '105' (type: string)
+                1 '105' (type: string)
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 '105' (type: string)
+                1 '105' (type: string)
+              Select Operator
+                expressions: '105' (type: string), 'val_105' (type: string)
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 '105' (type: string)
+                1 '105' (type: string)
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 '105' (type: string)
+                1 '105' (type: string)
+              Select Operator
+                expressions: '105' (type: string), 'val_105' (type: string)
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: '105' (type: string)
+              sort order: +
+              Map-reduce partition columns: '' (type: string)
+              Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: '105' (type: string)
+              sort order: +
+              Map-reduce partition columns: '' (type: string)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: '105' (type: string), 'val_105' (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key = '105') (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: '105' (type: string)
+                  sort order: +
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Select Operator
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select t2.* 
+from
+  (select key,value from (select key,value from src) t1 sort by key) t2
+  join 
+  (select * from src sort by key) t3 
+  on (t2.key=t3.key )
+  where t2.value='val_105' and t3.key='105'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select t2.* 
+from
+  (select key,value from (select key,value from src) t1 sort by key) t2
+  join 
+  (select * from src sort by key) t3 
+  on (t2.key=t3.key )
+  where t2.value='val_105' and t3.key='105'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+105	val_105

Modified: hive/trunk/ql/src/test/results/clientpositive/ppd_join4.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ppd_join4.q.out?rev=1648457&r1=1648456&r2=1648457&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/ppd_join4.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/ppd_join4.q.out Tue Dec 30 00:59:25 2014
@@ -66,11 +66,14 @@ STAGE PLANS:
                   sort order: +
                   Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
       Reduce Operator Tree:
-        Reduce Output Operator
-          key expressions: 'a' (type: string)
-          sort order: +
-          Map-reduce partition columns: '' (type: string)
+        Select Operator
           Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
     Map Reduce

Added: hive/trunk/ql/src/test/results/clientpositive/tez/identity_project_remove_skip.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/identity_project_remove_skip.q.out?rev=1648457&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/identity_project_remove_skip.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/identity_project_remove_skip.q.out Tue Dec 30 00:59:25 2014
@@ -0,0 +1,119 @@
+PREHOOK: query: explain
+select t2.* 
+from
+  (select key,value from (select key,value from src) t1 sort by key) t2
+  join 
+  (select * from src sort by key) t3 
+  on (t2.key=t3.key )
+  where t2.value='val_105' and t3.key='105'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select t2.* 
+from
+  (select key,value from (select key,value from src) t1 sort by key) t2
+  join 
+  (select * from src sort by key) t3 
+  on (t2.key=t3.key )
+  where t2.value='val_105' and t3.key='105'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((key is not null and (value = 'val_105')) and (key = '105')) (type: boolean)
+                    Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: '105' (type: string)
+                        sort order: +
+                        Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (key = '105') (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: '105' (type: string)
+                        sort order: +
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: '105' (type: string)
+                  sort order: +
+                  Map-reduce partition columns: '' (type: string)
+                  Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+        Reducer 4 
+            Reduce Operator Tree:
+              Select Operator
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 '105' (type: string)
+                    1 '105' (type: string)
+                  input vertices:
+                    0 Reducer 2
+                  Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: '105' (type: string), 'val_105' (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select t2.* 
+from
+  (select key,value from (select key,value from src) t1 sort by key) t2
+  join 
+  (select * from src sort by key) t3 
+  on (t2.key=t3.key )
+  where t2.value='val_105' and t3.key='105'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select t2.* 
+from
+  (select key,value from (select key,value from src) t1 sort by key) t2
+  join 
+  (select * from src sort by key) t3 
+  on (t2.key=t3.key )
+  where t2.value='val_105' and t3.key='105'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+105	val_105