You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2014/12/30 01:59:25 UTC
svn commit: r1648457 - in /hive/trunk: itests/src/test/resources/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
ql/src/test/results/clientpositive/tez/
Author: navis
Date: Tue Dec 30 00:59:25 2014
New Revision: 1648457
URL: http://svn.apache.org/r1648457
Log:
HIVE-9215 : Some mapjoin queries broken with IdentityProjectRemover with PPD (Navis reviewed by Xuefu Zhang and Szehon Ho)
Added:
hive/trunk/ql/src/test/queries/clientpositive/identity_project_remove_skip.q
hive/trunk/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/identity_project_remove_skip.q.out
Modified:
hive/trunk/itests/src/test/resources/testconfiguration.properties
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
hive/trunk/ql/src/test/results/clientpositive/ppd_join4.q.out
Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/trunk/itests/src/test/resources/testconfiguration.properties?rev=1648457&r1=1648456&r2=1648457&view=diff
==============================================================================
--- hive/trunk/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/trunk/itests/src/test/resources/testconfiguration.properties Tue Dec 30 00:59:25 2014
@@ -99,6 +99,7 @@ minitez.query.files.shared=alter_merge_2
groupby2.q,\
groupby3.q,\
having.q,\
+ identity_project_remove_skip.q\
insert1.q,\
insert_into1.q,\
insert_into2.q,\
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java?rev=1648457&r1=1648456&r2=1648457&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java Tue Dec 30 00:59:25 2014
@@ -24,10 +24,13 @@ import java.util.List;
import java.util.Map;
import java.util.Stack;
+import com.google.common.base.Predicates;
+import com.google.common.collect.Iterators;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -91,6 +94,11 @@ public class IdentityProjectRemover impl
return null;
}
Operator<? extends OperatorDesc> parent = parents.get(0);
+ if (parent instanceof ReduceSinkOperator && Iterators.any(sel.getChildOperators().iterator(),
+ Predicates.instanceOf(ReduceSinkOperator.class))) {
+ // For RS-SEL-RS case. reducer operator in reducer task cannot be null in task compiler
+ return null;
+ }
if(sel.isIdentitySelect()) {
parent.removeChildAndAdoptItsChildren(sel);
LOG.debug("Identity project remover optimization removed : " + sel);
Added: hive/trunk/ql/src/test/queries/clientpositive/identity_project_remove_skip.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/identity_project_remove_skip.q?rev=1648457&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/identity_project_remove_skip.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/identity_project_remove_skip.q Tue Dec 30 00:59:25 2014
@@ -0,0 +1,20 @@
+set hive.optimize.remove.identity.project=true;
+set hive.auto.convert.join=true;
+set hive.optimize.ppd=true;
+
+explain
+select t2.*
+from
+ (select key,value from (select key,value from src) t1 sort by key) t2
+ join
+ (select * from src sort by key) t3
+ on (t2.key=t3.key )
+ where t2.value='val_105' and t3.key='105';
+
+select t2.*
+from
+ (select key,value from (select key,value from src) t1 sort by key) t2
+ join
+ (select * from src sort by key) t3
+ on (t2.key=t3.key )
+ where t2.value='val_105' and t3.key='105';
Added: hive/trunk/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out?rev=1648457&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out Tue Dec 30 00:59:25 2014
@@ -0,0 +1,217 @@
+PREHOOK: query: explain
+select t2.*
+from
+ (select key,value from (select key,value from src) t1 sort by key) t2
+ join
+ (select * from src sort by key) t3
+ on (t2.key=t3.key )
+ where t2.value='val_105' and t3.key='105'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select t2.*
+from
+ (select key,value from (select key,value from src) t1 sort by key) t2
+ join
+ (select * from src sort by key) t3
+ on (t2.key=t3.key )
+ where t2.value='val_105' and t3.key='105'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-6 depends on stages: Stage-1, Stage-3 , consists of Stage-7, Stage-8, Stage-2
+ Stage-7 has a backup stage: Stage-2
+ Stage-4 depends on stages: Stage-7
+ Stage-8 has a backup stage: Stage-2
+ Stage-5 depends on stages: Stage-8
+ Stage-2
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-4, Stage-5, Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((key is not null and (value = 'val_105')) and (key = '105')) (type: boolean)
+ Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: '105' (type: string)
+ sort order: +
+ Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Select Operator
+ Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-6
+ Conditional Operator
+
+ Stage: Stage-7
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 '105' (type: string)
+ 1 '105' (type: string)
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 '105' (type: string)
+ 1 '105' (type: string)
+ Select Operator
+ expressions: '105' (type: string), 'val_105' (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 '105' (type: string)
+ 1 '105' (type: string)
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 '105' (type: string)
+ 1 '105' (type: string)
+ Select Operator
+ expressions: '105' (type: string), 'val_105' (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: '105' (type: string)
+ sort order: +
+ Map-reduce partition columns: '' (type: string)
+ Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: '105' (type: string)
+ sort order: +
+ Map-reduce partition columns: '' (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '105' (type: string), 'val_105' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key = '105') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: '105' (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Select Operator
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select t2.*
+from
+ (select key,value from (select key,value from src) t1 sort by key) t2
+ join
+ (select * from src sort by key) t3
+ on (t2.key=t3.key )
+ where t2.value='val_105' and t3.key='105'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select t2.*
+from
+ (select key,value from (select key,value from src) t1 sort by key) t2
+ join
+ (select * from src sort by key) t3
+ on (t2.key=t3.key )
+ where t2.value='val_105' and t3.key='105'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+105 val_105
Modified: hive/trunk/ql/src/test/results/clientpositive/ppd_join4.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ppd_join4.q.out?rev=1648457&r1=1648456&r2=1648457&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/ppd_join4.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/ppd_join4.q.out Tue Dec 30 00:59:25 2014
@@ -66,11 +66,14 @@ STAGE PLANS:
sort order: +
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Reduce Operator Tree:
- Reduce Output Operator
- key expressions: 'a' (type: string)
- sort order: +
- Map-reduce partition columns: '' (type: string)
+ Select Operator
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
Added: hive/trunk/ql/src/test/results/clientpositive/tez/identity_project_remove_skip.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/identity_project_remove_skip.q.out?rev=1648457&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/identity_project_remove_skip.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/identity_project_remove_skip.q.out Tue Dec 30 00:59:25 2014
@@ -0,0 +1,119 @@
+PREHOOK: query: explain
+select t2.*
+from
+ (select key,value from (select key,value from src) t1 sort by key) t2
+ join
+ (select * from src sort by key) t3
+ on (t2.key=t3.key )
+ where t2.value='val_105' and t3.key='105'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select t2.*
+from
+ (select key,value from (select key,value from src) t1 sort by key) t2
+ join
+ (select * from src sort by key) t3
+ on (t2.key=t3.key )
+ where t2.value='val_105' and t3.key='105'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((key is not null and (value = 'val_105')) and (key = '105')) (type: boolean)
+ Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: '105' (type: string)
+ sort order: +
+ Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key = '105') (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: '105' (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Select Operator
+ Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: '105' (type: string)
+ sort order: +
+ Map-reduce partition columns: '' (type: string)
+ Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 '105' (type: string)
+ 1 '105' (type: string)
+ input vertices:
+ 0 Reducer 2
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '105' (type: string), 'val_105' (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select t2.*
+from
+ (select key,value from (select key,value from src) t1 sort by key) t2
+ join
+ (select * from src sort by key) t3
+ on (t2.key=t3.key )
+ where t2.value='val_105' and t3.key='105'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select t2.*
+from
+ (select key,value from (select key,value from src) t1 sort by key) t2
+ join
+ (select * from src sort by key) t3
+ on (t2.key=t3.key )
+ where t2.value='val_105' and t3.key='105'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+105 val_105