You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/12/15 18:12:14 UTC
svn commit: r1643058 - in /hive/branches/spark: itests/src/test/resources/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/
ql/src/java/org/apache/hadoop/hive/ql/parse/spark/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/...
Author: xuefu
Date: Wed Dec 3 05:48:21 2014
New Revision: 1643058
URL: http://svn.apache.org/r1643058
Log:
HIVE-8943: Fix memory limit check for combine nested mapjoins [Spark Branch] (Szehon via Xuefu)
Added:
hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats.q
hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats2.q
hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats.q.out
hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out
Modified:
hive/branches/spark/itests/src/test/resources/testconfiguration.properties
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java
Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1643058&r1=1643057&r2=1643058&view=diff
==============================================================================
--- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Wed Dec 3 05:48:21 2014
@@ -491,6 +491,8 @@ spark.query.files=add_part_multiple.q, \
auto_join_filters.q, \
auto_join_nulls.q, \
auto_join_reordering_values.q, \
+ auto_join_stats.q, \
+ auto_join_stats2.q, \
auto_join_without_localtask.q, \
auto_smb_mapjoin_14.q, \
auto_sortmerge_join_1.q, \
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java?rev=1643058&r1=1643057&r2=1643058&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java Wed Dec 3 05:48:21 2014
@@ -24,6 +24,7 @@ import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
@@ -90,7 +91,6 @@ public class SparkMapJoinOptimizer imple
int numBuckets = 1;
LOG.info("Estimated number of buckets " + numBuckets);
- int mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, numBuckets);
/* TODO: handle this later
if (mapJoinConversionPos < 0) {
@@ -153,8 +153,8 @@ public class SparkMapJoinOptimizer imple
LOG.info("Convert to non-bucketed map join");
// check if we can convert to map join no bucket scaling.
- mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1);
-
+ ObjectPair<Integer, Long> mapJoinInfo = getMapJoinConversionInfo(joinOp, context, 1);
+ int mapJoinConversionPos = mapJoinInfo.getFirst();
if (mapJoinConversionPos < 0) {
// we are just converting to a common merge join operator. The shuffle
@@ -175,6 +175,8 @@ public class SparkMapJoinOptimizer imple
setAllChildrenTraitsToNull(childOp);
}
+ context.getMjOpSizes().put(mapJoinOp, mapJoinInfo.getSecond());
+
return null;
}
@@ -311,10 +313,10 @@ public class SparkMapJoinOptimizer imple
* @param joinOp
* @param context
* @param buckets
- * @return
+ * @return pair, first value is the position, second value is the in-memory size of this mapjoin.
*/
- private int getMapJoinConversionPos(JoinOperator joinOp, OptimizeSparkProcContext context,
- int buckets) {
+ private ObjectPair<Integer, Long> getMapJoinConversionInfo(JoinOperator joinOp, OptimizeSparkProcContext context,
+ int buckets) {
Set<Integer> bigTableCandidateSet =
MapJoinProcessor.getBigTableCandidates(joinOp.getConf().getConds());
@@ -336,7 +338,7 @@ public class SparkMapJoinOptimizer imple
Statistics currInputStat = parentOp.getStatistics();
if (currInputStat == null) {
LOG.warn("Couldn't get statistics from: "+parentOp);
- return -1;
+ return new ObjectPair(-1, 0);
}
// Union is hard to handle. For instance, the following case:
@@ -359,7 +361,7 @@ public class SparkMapJoinOptimizer imple
// But, this is tricky to implement, and we'll leave it as a future work for now.
// TODO: handle this as a MJ case
if (containUnionWithoutRS(parentOp.getParentOperators().get(0))) {
- return -1;
+ return new ObjectPair(-1, 0);
}
long inputSize = currInputStat.getDataSize();
@@ -370,14 +372,14 @@ public class SparkMapJoinOptimizer imple
if (bigTableFound) {
// cannot convert to map join; we've already chosen a big table
// on size and there's another one that's bigger.
- return -1;
+ return new ObjectPair(-1, 0);
}
if (inputSize/buckets > maxSize) {
if (!bigTableCandidateSet.contains(pos)) {
// can't use the current table as the big table, but it's too
// big for the map side.
- return -1;
+ return new ObjectPair(-1, 0);
}
bigTableFound = true;
@@ -392,7 +394,7 @@ public class SparkMapJoinOptimizer imple
if (totalSize/buckets > maxSize) {
// sum of small tables size in this join exceeds configured limit
// hence cannot convert.
- return -1;
+ return new ObjectPair(-1, 0);
}
if (bigTableCandidateSet.contains(pos)) {
@@ -403,13 +405,93 @@ public class SparkMapJoinOptimizer imple
totalSize += currInputStat.getDataSize();
if (totalSize/buckets > maxSize) {
// cannot hold all map tables in memory. Cannot convert.
- return -1;
+ return new ObjectPair(-1, 0);
}
}
pos++;
}
- return bigTablePosition;
+ if (bigTablePosition == -1) {
+ //No big table candidates.
+ return new ObjectPair(-1, 0);
+ }
+
+ //Final check, find size of already-calculated Mapjoin Operators in same work (spark-stage). We need to factor
+ //this in to prevent overwhelming Spark executor-memory.
+ long connectedMapJoinSize = getConnectedMapJoinSize(joinOp.getParentOperators().get(bigTablePosition), joinOp, context);
+ if ((connectedMapJoinSize + (totalSize / buckets)) > maxSize) {
+ return new ObjectPair(-1, 0);
+ }
+
+ return new ObjectPair(bigTablePosition, connectedMapJoinSize + (totalSize / buckets));
+ }
+
+ /**
+ * Examines this operator and all the connected operators, for mapjoins that will be in the same work.
+ * @param parentOp potential big-table parent operator, explore up from this.
+ * @param joinOp potential mapjoin operator, explore down from this.
+ * @param ctx context to pass information.
+ * @return total size of parent mapjoins in same work as this operator.
+ */
+ private long getConnectedMapJoinSize(Operator<? extends OperatorDesc> parentOp, Operator joinOp, OptimizeSparkProcContext ctx) {
+ long result = 0;
+ for (Operator<? extends OperatorDesc> grandParentOp : parentOp.getParentOperators()) {
+ result += getConnectedParentMapJoinSize(grandParentOp, ctx);
+ }
+ result += getConnectedChildMapJoinSize(joinOp, ctx);
+ return result;
+ }
+
+ /**
+ * Examines this operator and all the parents, for mapjoins that will be in the same work.
+ * @param op given operator
+ * @param ctx context to pass information.
+ * @return total size of parent mapjoins in same work as this operator.
+ */
+ private long getConnectedParentMapJoinSize(Operator<? extends OperatorDesc> op, OptimizeSparkProcContext ctx) {
+ if ((op instanceof UnionOperator) || (op instanceof ReduceSinkOperator)) {
+ //Work Boundary, stop exploring.
+ return 0;
+ }
+
+ if (op instanceof MapJoinOperator) {
+ //found parent mapjoin operator. Its size should already reflect any other mapjoins connected to it.
+ long mjSize = ctx.getMjOpSizes().get(op);
+ return mjSize;
+ }
+
+ long result = 0;
+ for (Operator<? extends OperatorDesc> parentOp : op.getParentOperators()) {
+ //Else, recurse up the parents.
+ result += getConnectedParentMapJoinSize(parentOp, ctx);
+ }
+ return result;
+ }
+
+ /**
+ * Examines this operator and all the children, for mapjoins that will be in the same work.
+ * @param op given operator
+ * @param ctx context to pass information.
+ * @return total size of child mapjoins in same work as this operator.
+ */
+ private long getConnectedChildMapJoinSize(Operator<? extends OperatorDesc> op, OptimizeSparkProcContext ctx) {
+ if ((op instanceof UnionOperator) || (op instanceof ReduceSinkOperator)) {
+ //Work Boundary, stop exploring.
+ return 0;
+ }
+
+ if (op instanceof MapJoinOperator) {
+ //found child mapjoin operator. Its size should already reflect any mapjoins connected to it, so stop processing.
+ long mjSize = ctx.getMjOpSizes().get(op);
+ return mjSize;
+ }
+
+ long result = 0;
+ for (Operator<? extends OperatorDesc> childOp : op.getChildOperators()) {
+ //Else, recurse to the children.
+ result += getConnectedChildMapJoinSize(childOp, ctx);
+ }
+ return result;
}
/*
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java?rev=1643058&r1=1643057&r2=1643058&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java Wed Dec 3 05:48:21 2014
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.parse.spark;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
@@ -28,7 +29,9 @@ import org.apache.hadoop.hive.ql.parse.P
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import java.util.Deque;
+import java.util.HashMap;
import java.util.HashSet;
+import java.util.Map;
import java.util.Set;
/**
@@ -44,6 +47,7 @@ public class OptimizeSparkProcContext im
private final Set<ReadEntity> inputs;
private final Set<WriteEntity> outputs;
private final Set<ReduceSinkOperator> visitedReduceSinks = new HashSet<ReduceSinkOperator>();
+ private final Map<MapJoinOperator, Long> mjOpSizes = new HashMap<MapJoinOperator, Long>();
// rootOperators are all the table scan operators in sequence
// of traversal
@@ -83,4 +87,8 @@ public class OptimizeSparkProcContext im
public Deque<Operator<? extends OperatorDesc>> getRootOperators() {
return rootOperators;
}
+
+ public Map<MapJoinOperator, Long> getMjOpSizes() {
+ return mjOpSizes;
+ }
}
Added: hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats.q
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats.q?rev=1643058&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats.q (added)
+++ hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats.q Wed Dec 3 05:48:21 2014
@@ -0,0 +1,19 @@
+set hive.auto.convert.join = true;
+set hive.auto.convert.join.noconditionaltask.size=2660;
+
+-- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable).
+-- This query plan should thus not try to combine the mapjoin into a single work.
+
+create table smalltable(key string, value string) stored as textfile;
+load data local inpath '../../data/files/T1.txt' into table smalltable;
+analyze table smalltable compute statistics;
+
+explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key);
+select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key);
+
+create table smalltable2(key string, value string) stored as textfile;
+load data local inpath '../../data/files/T1.txt' into table smalltable2;
+analyze table smalltable compute statistics;
+
+explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key);
+select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key);
\ No newline at end of file
Added: hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats2.q
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats2.q?rev=1643058&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats2.q (added)
+++ hive/branches/spark/ql/src/test/queries/clientpositive/auto_join_stats2.q Wed Dec 3 05:48:21 2014
@@ -0,0 +1,17 @@
+set hive.auto.convert.join = true;
+
+-- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2.
+-- Hence forcing the third table to be smaller.
+
+create table smalltable(key string, value string) stored as textfile;
+load data local inpath '../../data/files/T1.txt' into table smalltable;
+
+explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key);
+select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key);
+
+create table smalltable2(key string, value string) stored as textfile;
+load data local inpath '../../data/files/T1.txt' into table smalltable2;
+analyze table smalltable compute statistics;
+
+explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key);
+select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key);
\ No newline at end of file
Added: hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats.q.out?rev=1643058&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats.q.out Wed Dec 3 05:48:21 2014
@@ -0,0 +1,545 @@
+PREHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable).
+-- This query plan should thus not try to combine the mapjoin into a single work.
+
+create table smalltable(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable).
+-- This query plan should thus not try to combine the mapjoin into a single work.
+
+create table smalltable(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: analyze table smalltable compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: analyze table smalltable compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-8 is a root stage , consists of Stage-10, Stage-11, Stage-1
+ Stage-10 has a backup stage: Stage-1
+ Stage-6 depends on stages: Stage-10
+ Stage-9 depends on stages: Stage-1, Stage-6, Stage-7
+ Stage-5 depends on stages: Stage-9
+ Stage-11 has a backup stage: Stage-1
+ Stage-7 depends on stages: Stage-11
+ Stage-1
+ Stage-0 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-10
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ src2
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ src2
+ TableScan
+ alias: src2
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ HashTable Sink Operator
+ condition expressions:
+ 0 {key}
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col5
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-9
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ smalltable
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ smalltable
+ TableScan
+ alias: smalltable
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: UDFToDouble(key) is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col5}
+ 1 {key}
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col5}
+ 1 {key}
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ outputColumnNames: _col0, _col5, _col10
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-11
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ src1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ src1
+ TableScan
+ alias: src1
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+
+ Stage: Stage-7
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col5
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0}
+ 1 {KEY.reducesinkkey0}
+ outputColumnNames: _col0, _col5
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4 4 8
+4 4 8
+PREHOOK: query: create table smalltable2(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: analyze table smalltable compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: analyze table smalltable compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-11 is a root stage , consists of Stage-13, Stage-14, Stage-1
+ Stage-13 has a backup stage: Stage-1
+ Stage-9 depends on stages: Stage-13
+ Stage-12 depends on stages: Stage-1, Stage-9, Stage-10
+ Stage-7 depends on stages: Stage-12
+ Stage-14 has a backup stage: Stage-1
+ Stage-10 depends on stages: Stage-14
+ Stage-1
+ Stage-0 depends on stages: Stage-7
+
+STAGE PLANS:
+ Stage: Stage-11
+ Conditional Operator
+
+ Stage: Stage-13
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ src2
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ src2
+ TableScan
+ alias: src2
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ HashTable Sink Operator
+ condition expressions:
+ 0 {key}
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+
+ Stage: Stage-9
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col5
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-12
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ smalltable
+ Fetch Operator
+ limit: -1
+ smalltable2
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ smalltable
+ TableScan
+ alias: smalltable
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: UDFToDouble(key) is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col5}
+ 1 {key}
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ smalltable2
+ TableScan
+ alias: smalltable2
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: UDFToDouble(key) is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col5} {_col10}
+ 1
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+
+ Stage: Stage-7
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col5}
+ 1 {key}
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ outputColumnNames: _col0, _col5, _col10
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col5} {_col10}
+ 1
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ outputColumnNames: _col0, _col5, _col10
+ Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-14
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ src1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ src1
+ TableScan
+ alias: src1
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+
+ Stage: Stage-10
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col5
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0}
+ 1 {KEY.reducesinkkey0}
+ outputColumnNames: _col0, _col5
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@smalltable2
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@smalltable2
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4 4 8
+4 4 8
+4 4 8
+4 4 8
Added: hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats2.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats2.q.out?rev=1643058&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats2.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/auto_join_stats2.q.out Wed Dec 3 05:48:21 2014
@@ -0,0 +1,311 @@
+PREHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2.
+-- Hence forcing the third table to be smaller.
+
+create table smalltable(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2.
+-- Hence forcing the third table to be smaller.
+
+create table smalltable(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-7 is a root stage
+ Stage-5 depends on stages: Stage-7
+ Stage-0 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-7
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ smalltable
+ Fetch Operator
+ limit: -1
+ src1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ smalltable
+ TableScan
+ alias: smalltable
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: UDFToDouble(key) is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col5}
+ 1 {key}
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ src1
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col5
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col5}
+ 1 {key}
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ outputColumnNames: _col0, _col5, _col10
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4 4 8
+4 4 8
+PREHOOK: query: create table smalltable2(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: analyze table smalltable compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: analyze table smalltable compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-10 is a root stage
+ Stage-7 depends on stages: Stage-10
+ Stage-0 depends on stages: Stage-7
+
+STAGE PLANS:
+ Stage: Stage-10
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ smalltable
+ Fetch Operator
+ limit: -1
+ smalltable2
+ Fetch Operator
+ limit: -1
+ src1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ smalltable
+ TableScan
+ alias: smalltable
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: UDFToDouble(key) is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col5}
+ 1 {key}
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ smalltable2
+ TableScan
+ alias: smalltable2
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: UDFToDouble(key) is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col5} {_col10}
+ 1
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ src1
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+
+ Stage: Stage-7
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col5
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col5}
+ 1 {key}
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ outputColumnNames: _col0, _col5, _col10
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col5} {_col10}
+ 1
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ outputColumnNames: _col0, _col5, _col10
+ Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@smalltable2
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@smalltable2
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4 4 8
+4 4 8
+4 4 8
+4 4 8
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out?rev=1643058&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats.q.out Wed Dec 3 05:48:21 2014
@@ -0,0 +1,347 @@
+PREHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable).
+-- This query plan should thus not try to combine the mapjoin into a single work.
+
+create table smalltable(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: -- Setting HTS(src2) < threshold < HTS(src2) + HTS(smalltable).
+-- This query plan should thus not try to combine the mapjoin into a single work.
+
+create table smalltable(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: analyze table smalltable compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: analyze table smalltable compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {key}
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col5
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: (_col0 + _col5) (type: double)
+ sort order: +
+ Map-reduce partition columns: (_col0 + _col5) (type: double)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col5 (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: smalltable
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: UDFToDouble(key) is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: UDFToDouble(key) (type: double)
+ sort order: +
+ Map-reduce partition columns: UDFToDouble(key) (type: double)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col5}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col5, _col10
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4 4 8
+4 4 8
+PREHOOK: query: create table smalltable2(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: analyze table smalltable compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: analyze table smalltable compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: smalltable2
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: UDFToDouble(key) is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col5} {_col10}
+ 1
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col5
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: (_col0 + _col5) (type: double)
+ sort order: +
+ Map-reduce partition columns: (_col0 + _col5) (type: double)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col5 (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: smalltable
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: UDFToDouble(key) is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: UDFToDouble(key) (type: double)
+ sort order: +
+ Map-reduce partition columns: UDFToDouble(key) (type: double)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: string)
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col5}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col5, _col10
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col5} {_col10}
+ 1
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ outputColumnNames: _col0, _col5, _col10
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {key}
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@smalltable2
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@smalltable2
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4 4 8
+4 4 8
+4 4 8
+4 4 8
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out?rev=1643058&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out Wed Dec 3 05:48:21 2014
@@ -0,0 +1,327 @@
+PREHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2.
+-- Hence forcing the third table to be smaller.
+
+create table smalltable(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: -- Auto_join2 no longer tests merging the mapjoin work if big-table selection is based on stats, as src3 is smaller statistically than src1 + src2.
+-- Hence forcing the third table to be smaller.
+
+create table smalltable(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {key}
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: smalltable
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: UDFToDouble(key) is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col5}
+ 1 {key}
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col5
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col5}
+ 1 {key}
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ outputColumnNames: _col0, _col5, _col10
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4 4 8
+4 4 8
+PREHOOK: query: create table smalltable2(key string, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: create table smalltable2(key string, value string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@smalltable2
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table smalltable2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@smalltable2
+PREHOOK: query: analyze table smalltable compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Output: default@smalltable
+POSTHOOK: query: analyze table smalltable compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Output: default@smalltable
+PREHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {key}
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: smalltable
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: UDFToDouble(key) is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col5}
+ 1 {key}
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ Local Work:
+ Map Reduce Local Work
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: smalltable2
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: UDFToDouble(key) is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col5} {_col10}
+ 1
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1 {key}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col5
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col5}
+ 1 {key}
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ outputColumnNames: _col0, _col5, _col10
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 + _col5) is not null (type: boolean)
+ Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col5} {_col10}
+ 1
+ keys:
+ 0 (_col0 + _col5) (type: double)
+ 1 UDFToDouble(key) (type: double)
+ outputColumnNames: _col0, _col5, _col10
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smalltable
+PREHOOK: Input: default@smalltable2
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smalltable
+POSTHOOK: Input: default@smalltable2
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4 4 8
+4 4 8
+4 4 8
+4 4 8