You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/10/30 23:23:41 UTC
svn commit: r1635630 [1/3] - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/
java/org/apache/hadoop/hive/ql/optimizer/
java/org/apache/hadoop/hive/ql/parse/ test/queries/clientpositive/
test/results/clientpositive/
Author: xuefu
Date: Thu Oct 30 22:23:40 2014
New Revision: 1635630
URL: http://svn.apache.org/r1635630
Log:
HIVE-8610: Compile time skew join optimization doesn't work with auto map join (Rui via Xuefu)
Added:
hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q
hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q
hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q
hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q
hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q
hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q
hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q
hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q
hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q
hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q
hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q
hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out
hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out
hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin11.q.out
hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out
hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin3.q.out
hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin4.q.out
hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin5.q.out
hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin6.q.out
hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin7.q.out
hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin8.q.out
hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin9.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCond.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=1635630&r1=1635629&r2=1635630&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Thu Oct 30 22:23:40 2014
@@ -1093,8 +1093,9 @@ public abstract class Operator<T extends
@SuppressWarnings("unchecked")
T descClone = (T)conf.clone();
+ // also clone the colExprMap by default
Operator<? extends OperatorDesc> ret =
- OperatorFactory.getAndMakeChild(descClone, getSchema(), parentClones);
+ OperatorFactory.getAndMakeChild(descClone, getSchema(), getColumnExprMap(), parentClones);
return ret;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java?rev=1635630&r1=1635629&r2=1635630&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java Thu Oct 30 22:23:40 2014
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.lib.Rul
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.QBJoinTree;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -164,12 +165,23 @@ public class SkewJoinOptimizer implement
return null;
}
+ // have to create a QBJoinTree for the cloned join operator
+ QBJoinTree originJoinTree = parseContext.getJoinContext().get(joinOp);
+ QBJoinTree newJoinTree;
+ try {
+ newJoinTree = originJoinTree.clone();
+ } catch (CloneNotSupportedException e) {
+ LOG.debug("QBJoinTree could not be cloned: ", e);
+ return null;
+ }
+
JoinOperator joinOpClone;
if (processSelect) {
joinOpClone = (JoinOperator)(currOpClone.getParentOperators().get(0));
} else {
joinOpClone = (JoinOperator)currOpClone;
}
+ parseContext.getJoinContext().put(joinOpClone, newJoinTree);
List<TableScanOperator> tableScanCloneOpsForJoin =
new ArrayList<TableScanOperator>();
@@ -201,6 +213,7 @@ public class SkewJoinOptimizer implement
}
parseContext.getTopOps().put(newAlias, tso);
+ setUpAlias(originJoinTree, newJoinTree, tabAlias, newAlias, tso);
}
// Now do a union of the select operators: selectOp and selectOpClone
@@ -610,6 +623,48 @@ public class SkewJoinOptimizer implement
}
}
}
+
+ /**
+ * Set alias in the cloned join tree
+ */
+ private static void setUpAlias(QBJoinTree origin, QBJoinTree cloned, String origAlias,
+ String newAlias, Operator<? extends OperatorDesc> topOp) {
+ cloned.getAliasToOpInfo().remove(origAlias);
+ cloned.getAliasToOpInfo().put(newAlias, topOp);
+ if (origin.getLeftAlias().equals(origAlias)) {
+ cloned.setLeftAlias(null);
+ cloned.setLeftAlias(newAlias);
+ }
+ replaceAlias(origin.getLeftAliases(), cloned.getLeftAliases(), origAlias, newAlias);
+ replaceAlias(origin.getRightAliases(), cloned.getRightAliases(), origAlias, newAlias);
+ replaceAlias(origin.getBaseSrc(), cloned.getBaseSrc(), origAlias, newAlias);
+ replaceAlias(origin.getMapAliases(), cloned.getMapAliases(), origAlias, newAlias);
+ replaceAlias(origin.getStreamAliases(), cloned.getStreamAliases(), origAlias, newAlias);
+ }
+
+ private static void replaceAlias(String[] origin, String[] cloned,
+ String alias, String newAlias) {
+ if (origin == null || cloned == null || origin.length != cloned.length) {
+ return;
+ }
+ for (int i = 0; i < origin.length; i++) {
+ if (origin[i].equals(alias)) {
+ cloned[i] = newAlias;
+ }
+ }
+ }
+
+ private static void replaceAlias(List<String> origin, List<String> cloned,
+ String alias, String newAlias) {
+ if (origin == null || cloned == null || origin.size() != cloned.size()) {
+ return;
+ }
+ for (int i = 0; i < origin.size(); i++) {
+ if (origin.get(i).equals(alias)) {
+ cloned.set(i, newAlias);
+ }
+ }
+ }
}
/* (non-Javadoc)
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCond.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCond.java?rev=1635630&r1=1635629&r2=1635630&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCond.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/JoinCond.java Thu Oct 30 22:23:40 2014
@@ -79,4 +79,7 @@ public class JoinCond {
this.joinType = joinType;
}
+ public void setPreserved(boolean preserved) {
+ this.preserved = preserved;
+ }
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java?rev=1635630&r1=1635629&r2=1635630&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java Thu Oct 30 22:23:40 2014
@@ -32,7 +32,7 @@ import org.apache.hadoop.hive.ql.plan.Op
* Internal representation of the join tree.
*
*/
-public class QBJoinTree implements Serializable{
+public class QBJoinTree implements Serializable, Cloneable {
private static final long serialVersionUID = 1L;
private String leftAlias;
private String[] rightAliases;
@@ -363,4 +363,70 @@ public class QBJoinTree implements Seria
public List<ASTNode> getPostJoinFilters() {
return postJoinFilters;
}
+
+ @Override
+ public QBJoinTree clone() throws CloneNotSupportedException {
+ QBJoinTree cloned = new QBJoinTree();
+
+ // shallow copy aliasToOpInfo, we won't want to clone the operator tree here
+ cloned.setAliasToOpInfo(aliasToOpInfo == null ? null :
+ new HashMap<String, Operator<? extends OperatorDesc>>(aliasToOpInfo));
+
+ cloned.setBaseSrc(baseSrc == null ? null : baseSrc.clone());
+
+ // shallow copy ASTNode
+ cloned.setExpressions(expressions);
+ cloned.setFilters(filters);
+ cloned.setFiltersForPushing(filtersForPushing);
+
+ // clone filterMap
+ int[][] clonedFilterMap = filterMap == null ? null : new int[filterMap.length][];
+ if (filterMap != null) {
+ for (int i = 0; i < filterMap.length; i++) {
+ clonedFilterMap[i] = filterMap[i] == null ? null : filterMap[i].clone();
+ }
+ }
+ cloned.setFilterMap(clonedFilterMap);
+
+ cloned.setId(id);
+
+ // clone joinCond
+ JoinCond[] clonedJoinCond = joinCond == null ? null : new JoinCond[joinCond.length];
+ if (joinCond != null) {
+ for (int i = 0; i < joinCond.length; i++) {
+ if(joinCond[i] == null) {
+ continue;
+ }
+ JoinCond clonedCond = new JoinCond();
+ clonedCond.setJoinType(joinCond[i].getJoinType());
+ clonedCond.setLeft(joinCond[i].getLeft());
+ clonedCond.setPreserved(joinCond[i].getPreserved());
+ clonedCond.setRight(joinCond[i].getRight());
+ clonedJoinCond[i] = clonedCond;
+ }
+ }
+ cloned.setJoinCond(clonedJoinCond);
+
+ cloned.setJoinSrc(joinSrc == null ? null : joinSrc.clone());
+ cloned.setLeftAlias(leftAlias);
+ cloned.setLeftAliases(leftAliases == null ? null : leftAliases.clone());
+ cloned.setMapAliases(mapAliases == null ? null : new ArrayList<String>(mapAliases));
+ cloned.setMapSideJoin(mapSideJoin);
+ cloned.setNoOuterJoin(noOuterJoin);
+ cloned.setNoSemiJoin(noSemiJoin);
+ cloned.setNullSafes(nullsafes == null ? null : new ArrayList<Boolean>(nullsafes));
+ cloned.setRightAliases(rightAliases == null ? null : rightAliases.clone());
+ cloned.setStreamAliases(streamAliases == null ? null : new ArrayList<String>(streamAliases));
+
+ // clone postJoinFilters
+ for (ASTNode filter : postJoinFilters) {
+ cloned.getPostJoinFilters().add(filter);
+ }
+ // clone rhsSemijoin
+ for (Entry<String, ArrayList<ASTNode>> entry : rhsSemijoin.entrySet()) {
+ cloned.addRHSSemijoinColumns(entry.getKey(), entry.getValue());
+ }
+
+ return cloned;
+ }
}
Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin1.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,44 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+-- copy from skewjoinopt1
+-- test compile time skew join and auto map join
+-- a simple join query with skew on both the tables on the join key
+-- adding an order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;
+
+-- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;
+
+-- an aggregation at the end should not change anything
+
+EXPLAIN
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key;
+
+EXPLAIN
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key;
+
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key;
Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin10.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,53 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1;
+
+-- testing skew on other data types - int
+CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2));
+INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1;
+
+CREATE TABLE tmpT2(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2;
+
+CREATE TABLE T2(key INT, val STRING) SKEWED BY (key) ON ((3));
+
+INSERT OVERWRITE TABLE T2 SELECT key, val FROM tmpT2;
+
+-- copy from skewjoinopt15
+-- test compile time skew join and auto map join
+-- The skewed key is a integer column.
+-- Otherwise this test is similar to skewjoinopt1.q
+-- Both the joined tables are skewed, and the joined column
+-- is an integer
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;
+
+-- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;
+
+-- an aggregation at the end should not change anything
+
+EXPLAIN
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key;
+
+EXPLAIN
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key;
+
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key;
Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin11.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,26 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+CLUSTERED BY (key) INTO 4 BUCKETS
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+-- copy from skewjoinopt19
+-- test compile time skew join and auto map join
+-- add a test where the skewed key is also the bucketized key
+-- it should not matter, and the compile time skewed join
+-- optimization is performed
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;
Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin2.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,34 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+-- copy from skewjoinopt3
+-- test compile time skew join and auto map join
+-- a simple query with skew on both the tables. One of the skewed
+-- value is common to both the tables. The skewed value should not be
+-- repeated in the filter.
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;
+
+-- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a FULL OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;
Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin3.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,26 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+-- copy from skewjoinopt6
+-- test compile time skew join and auto map join
+-- Both the join tables are skewed by 2 keys, and one of the skewed values
+-- is common to both the tables. The join key is a subset of the skewed key set:
+-- it only contains the first skewed key for both the tables
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key;
+
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val;
Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin4.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,30 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
+
+-- copy from skewjoinopt7
+-- test compile time skew join and auto map join
+-- This test is for validating skewed join compile time optimization for more than
+-- 2 tables. The join key is the same, and so a 3-way join would be performed.
+-- 2 of the 3 tables are skewed on the join key
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key;
+
+SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+ORDER BY a.key, b.key, c.key, a.val, b.val, c.val;
Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin5.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,51 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+-- copy from skewjoinopt9
+-- test compile time skew join and auto map join
+-- no skew join compile time optimization would be performed if one of the
+-- join sources is a sub-query consisting of a union all
+-- adding a order by at the end to make the results deterministic
+EXPLAIN
+select * from
+(
+select key, val from T1
+ union all
+select key, val from T1
+) subq1
+join T2 b on subq1.key = b.key;
+
+select * from
+(
+select key, val from T1
+ union all
+select key, val from T1
+) subq1
+join T2 b on subq1.key = b.key
+ORDER BY subq1.key, b.key, subq1.val, b.val;
+
+-- no skew join compile time optimization would be performed if one of the
+-- join sources is a sub-query consisting of a group by
+EXPLAIN
+select * from
+(
+select key, count(1) as cnt from T1 group by key
+) subq1
+join T2 b on subq1.key = b.key;
+
+select * from
+(
+select key, count(1) as cnt from T1 group by key
+) subq1
+join T2 b on subq1.key = b.key
+ORDER BY subq1.key, b.key, subq1.cnt, b.val;
Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin6.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,22 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, value STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+drop table array_valued_T1;
+create table array_valued_T1 (key string, value array<string>) SKEWED BY (key) ON ((8));
+insert overwrite table array_valued_T1 select key, array(value) from T1;
+
+-- copy from skewjoinopt10
+-- test compile time skew join and auto map join
+-- This test is to verify the skew join compile optimization when the join is followed by a lateral view
+-- adding a order by at the end to make the results deterministic
+
+explain
+select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val;
+
+select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val
+ORDER BY key, val;
Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin7.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,35 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+-- copy from skewjoinopt11
+-- test compile time skew join and auto map join
+-- This test is to verify the skew join compile optimization when the join is followed
+-- by a union. Both sides of a union consist of a join, which should have used
+-- skew join compile time optimization.
+-- adding an order by at the end to make the results deterministic
+
+EXPLAIN
+select * from
+(
+ select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key
+ union all
+ select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key
+) subq1;
+
+select * from
+(
+ select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key
+ union all
+ select a.key, a.val as val1, b.val as val2 from T1 a join T2 b on a.key = b.key
+) subq1
+ORDER BY key, val1, val2;
Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin8.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,38 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+CREATE TABLE T3(key STRING, val STRING)
+SKEWED BY (val) ON ((12)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
+
+-- copy from skewjoinopt13
+-- test compile time skew join and auto map join
+-- This test is for skewed join compile time optimization for more than 2 tables.
+-- The join key for table 3 is different from the join key used for joining
+-- tables 1 and 2. Table 3 is skewed, but since one of the join sources for table
+-- 3 consist of a sub-query which contains a join, the compile time skew join
+-- optimization is not performed
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+select *
+from
+T1 a join T2 b on a.key = b.key
+join T3 c on a.val = c.val;
+
+select *
+from
+T1 a join T2 b on a.key = b.key
+join T3 c on a.val = c.val
+order by a.key, b.key, c.key, a.val, b.val, c.val;
+
Added: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_mapjoin9.q Thu Oct 30 22:23:40 2014
@@ -0,0 +1,40 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.join=true;
+
+CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
+
+CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2;
+
+CREATE TABLE T3(key STRING, val STRING)
+SKEWED BY (val) ON ((12)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3;
+
+-- copy from skewjoinopt14
+-- test compile time skew join and auto map join
+-- This test is for skewed join compile time optimization for more than 2 tables.
+-- The join key for table 3 is different from the join key used for joining
+-- tables 1 and 2. Tables 1 and 3 are skewed. Since one of the join sources for table
+-- 3 consist of a sub-query which contains a join, the compile time skew join
+-- optimization is not enabled for table 3, but it is used for the first join between
+-- tables 1 and 2
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+select *
+from
+T1 a join T2 b on a.key = b.key
+join T3 c on a.val = c.val;
+
+select *
+from
+T1 a join T2 b on a.key = b.key
+join T3 c on a.val = c.val
+order by a.key, b.key, a.val, b.val;
+
Added: hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin1.q.out Thu Oct 30 22:23:40 2014
@@ -0,0 +1,660 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T2
+POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t2
+PREHOOK: query: -- copy from skewjoinopt1
+-- test compile time skew join and auto map join
+-- a simple join query with skew on both the tables on the join key
+-- adding an order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- copy from skewjoinopt1
+-- test compile time skew join and auto map join
+-- a simple join query with skew on both the tables on the join key
+-- adding an order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-8 is a root stage
+ Stage-2 depends on stages: Stage-8
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ a
+ Fetch Operator
+ limit: -1
+ subquery1:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ a
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ subquery1:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Union
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Union
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+2 12 2 22
+3 13 3 13
+8 18 8 18
+8 18 8 18
+8 28 8 18
+8 28 8 18
+PREHOOK: query: -- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-8 is a root stage
+ Stage-2 depends on stages: Stage-8
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ a
+ Fetch Operator
+ limit: -1
+ subquery1:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ a
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: ((key = '2') or (key = '3')) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ subquery1:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (not ((key = '2') or (key = '3'))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (not ((key = '2') or (key = '3'))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ condition expressions:
+ 0 {key} {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Union
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: ((key = '2') or (key = '3')) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ condition expressions:
+ 0 {key} {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Union
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+NULL NULL 4 14
+NULL NULL 5 15
+2 12 2 22
+3 13 3 13
+8 18 8 18
+8 18 8 18
+8 28 8 18
+8 28 8 18
+PREHOOK: query: -- an aggregation at the end should not change anything
+
+EXPLAIN
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- an aggregation at the end should not change anything
+
+EXPLAIN
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-8 is a root stage
+ Stage-2 depends on stages: Stage-8
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ a
+ Fetch Operator
+ limit: -1
+ subquery1:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ a
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ subquery1:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Union
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Union
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+6
+PREHOOK: query: EXPLAIN
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-8 is a root stage
+ Stage-2 depends on stages: Stage-8
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ a
+ Fetch Operator
+ limit: -1
+ subquery1:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ a
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: ((key = '2') or (key = '3')) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ subquery1:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (not ((key = '2') or (key = '3'))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (not ((key = '2') or (key = '3'))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Union
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: ((key = '2') or (key = '3')) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Union
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+8
Added: hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out?rev=1635630&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/skewjoin_mapjoin10.q.out Thu Oct 30 22:23:40 2014
@@ -0,0 +1,700 @@
+PREHOOK: query: CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmpT1
+POSTHOOK: query: CREATE TABLE tmpT1(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmpT1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tmpt1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE tmpT1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tmpt1
+PREHOOK: query: -- testing skew on other data types - int
+CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: -- testing skew on other data types - int
+CREATE TABLE T1(key INT, val STRING) SKEWED BY (key) ON ((2))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmpt1
+PREHOOK: Output: default@t1
+POSTHOOK: query: INSERT OVERWRITE TABLE T1 SELECT key, val FROM tmpT1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmpt1
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.key EXPRESSION [(tmpt1)tmpt1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1.val SIMPLE [(tmpt1)tmpt1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE tmpT2(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmpT2
+POSTHOOK: query: CREATE TABLE tmpT2(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmpT2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tmpt2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE tmpT2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tmpt2
+PREHOOK: query: CREATE TABLE T2(key INT, val STRING) SKEWED BY (key) ON ((3))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T2
+POSTHOOK: query: CREATE TABLE T2(key INT, val STRING) SKEWED BY (key) ON ((3))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T2
+PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT key, val FROM tmpT2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmpt2
+PREHOOK: Output: default@t2
+POSTHOOK: query: INSERT OVERWRITE TABLE T2 SELECT key, val FROM tmpT2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmpt2
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t2.key EXPRESSION [(tmpt2)tmpt2.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t2.val SIMPLE [(tmpt2)tmpt2.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: -- copy from skewjoinopt15
+-- test compile time skew join and auto map join
+-- The skewed key is a integer column.
+-- Otherwise this test is similar to skewjoinopt1.q
+-- Both the joined tables are skewed, and the joined column
+-- is an integer
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- copy from skewjoinopt15
+-- test compile time skew join and auto map join
+-- The skewed key is a integer column.
+-- Otherwise this test is similar to skewjoinopt1.q
+-- Both the joined tables are skewed, and the joined column
+-- is an integer
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-8 is a root stage
+ Stage-2 depends on stages: Stage-8
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ a
+ Fetch Operator
+ limit: -1
+ subquery1:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ a
+ TableScan
+ alias: a
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean)
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ subquery1:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ alias: b
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean)
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+2 12 2 22
+3 13 3 13
+8 18 8 18
+8 18 8 18
+8 28 8 18
+8 28 8 18
+PREHOOK: query: -- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- test outer joins also
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-8 is a root stage
+ Stage-2 depends on stages: Stage-8
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ a
+ Fetch Operator
+ limit: -1
+ subquery1:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ a
+ TableScan
+ alias: a
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((key = 2) or (key = 3)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ subquery1:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (not ((key = 2) or (key = 3))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (not ((key = 2) or (key = 3))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ condition expressions:
+ 0 {key} {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Union
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ alias: b
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((key = 2) or (key = 3)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ condition expressions:
+ 0 {key} {val}
+ 1 {key} {val}
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+NULL NULL 4 14
+NULL NULL 5 15
+2 12 2 22
+3 13 3 13
+8 18 8 18
+8 18 8 18
+8 28 8 18
+8 28 8 18
+PREHOOK: query: -- an aggregation at the end should not change anything
+
+EXPLAIN
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- an aggregation at the end should not change anything
+
+EXPLAIN
+SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-8 is a root stage
+ Stage-2 depends on stages: Stage-8
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ a
+ Fetch Operator
+ limit: -1
+ subquery1:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ a
+ TableScan
+ alias: a
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean)
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ subquery1:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ TableScan
+ alias: b
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean)
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+6
+PREHOOK: query: EXPLAIN
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-8 is a root stage
+ Stage-2 depends on stages: Stage-8
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ a
+ Fetch Operator
+ limit: -1
+ subquery1:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ a
+ TableScan
+ alias: a
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((key = 2) or (key = 3)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ subquery1:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (not ((key = 2) or (key = 3))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (not ((key = 2) or (key = 3))) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Union
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ TableScan
+ alias: b
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((key = 2) or (key = 3)) (type: boolean)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ SELECT * : (no compute)
+ Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+8