You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by dj...@apache.org on 2018/06/11 00:56:39 UTC
[8/8] hive git commit: HIVE-18875 : Enable SMB Join by default in Tez
(Deepak Jaiswal, reviewed by Gunther Hagleitner)
HIVE-18875 : Enable SMB Join by default in Tez (Deepak Jaiswal, reviewed by Gunther Hagleitner)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0a961aa8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0a961aa8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0a961aa8
Branch: refs/heads/master
Commit: 0a961aa8fd2187e901ec071ea9ddc3ab4d4970c5
Parents: 773034f
Author: Deepak Jaiswal <dj...@apache.org>
Authored: Sun Jun 10 00:25:34 2018 -0700
Committer: Deepak Jaiswal <dj...@apache.org>
Committed: Sun Jun 10 17:55:30 2018 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 2 +-
.../test/resources/testconfiguration.properties | 1 +
.../hive/ql/exec/CommonMergeJoinOperator.java | 31 +-
.../hadoop/hive/ql/exec/GroupByOperator.java | 2 +-
.../apache/hadoop/hive/ql/exec/Operator.java | 12 +
.../hive/ql/exec/tez/ReduceRecordSource.java | 10 +
.../hive/ql/optimizer/ConvertJoinMapJoin.java | 17 +
.../annotation/OpTraitsRulesProcFactory.java | 41 +-
.../clientpositive/auto_sortmerge_join_11.q | 1 +
.../clientpositive/auto_sortmerge_join_6.q | 4 +-
.../test/queries/clientpositive/skewjoinopt19.q | 1 +
.../test/queries/clientpositive/skewjoinopt20.q | 1 +
.../queries/clientpositive/smb_mapjoin_11.q | 3 +-
.../queries/clientpositive/smb_mapjoin_12.q | 2 +-
.../queries/clientpositive/smb_mapjoin_17.q | 1 +
.../queries/clientpositive/subquery_notin.q | 2 +-
.../llap/auto_sortmerge_join_6.q.out | 150 ++--
.../llap/correlationoptimizer2.q.out | 450 +++++-------
.../llap/correlationoptimizer6.q.out | 170 ++---
.../clientpositive/llap/explainuser_1.q.out | 143 ++--
.../clientpositive/llap/limit_pushdown.q.out | 83 +--
.../results/clientpositive/llap/mergejoin.q.out | 715 ++++---------------
.../test/results/clientpositive/llap/mrr.q.out | 132 ++--
.../llap/offset_limit_ppd_optimizer.q.out | 85 +--
.../results/clientpositive/llap/smb_cache.q.out | 50 +-
.../clientpositive/llap/smb_mapjoin_14.q.out | 644 ++++++-----------
.../clientpositive/llap/smb_mapjoin_15.q.out | 101 +--
.../clientpositive/llap/smb_mapjoin_4.q.out | 544 ++++----------
.../clientpositive/llap/smb_mapjoin_5.q.out | 544 ++++----------
.../clientpositive/llap/smb_mapjoin_6.q.out | 415 ++++-------
.../llap/subquery_in_having.q.out | 160 ++---
.../clientpositive/llap/subquery_notin.q.out | 124 ++--
.../llap/vectorized_bucketmapjoin1.q.out | 307 ++------
.../clientpositive/spark/bucketmapjoin1.q.out | 210 ++----
.../clientpositive/spark/smb_mapjoin_14.q.out | 711 +++++-------------
.../clientpositive/spark/smb_mapjoin_15.q.out | 399 ++---------
.../clientpositive/spark/smb_mapjoin_4.q.out | 640 ++++-------------
.../clientpositive/spark/smb_mapjoin_5.q.out | 640 ++++-------------
.../clientpositive/spark/smb_mapjoin_6.q.out | 331 +++------
.../clientpositive/spark/subquery_notin.q.out | 8 +-
40 files changed, 2329 insertions(+), 5558 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 87db42a..1e8a389 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2076,7 +2076,7 @@ public class HiveConf extends Configuration {
HIVE_ENFORCE_NOT_NULL_CONSTRAINT("hive.constraint.notnull.enforce", true,
"Should \"IS NOT NULL \" constraint be enforced?"),
- HIVE_AUTO_SORTMERGE_JOIN("hive.auto.convert.sortmerge.join", false,
+ HIVE_AUTO_SORTMERGE_JOIN("hive.auto.convert.sortmerge.join", true,
"Will the join be automatically converted to a sort-merge join, if the joined tables pass the criteria for sort-merge join."),
HIVE_AUTO_SORTMERGE_JOIN_REDUCE("hive.auto.convert.sortmerge.join.reduce.side", true,
"Whether hive.auto.convert.sortmerge.join (if enabled) should be applied to reduce side."),
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 0f13d62..c71c540 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -701,6 +701,7 @@ minillaplocal.query.files=\
tez_smb_1.q,\
tez_smb_empty.q,\
tez_smb_main.q,\
+ tez_smb_reduce_side.q,\
tez_union.q,\
tez_union2.q,\
tez_union_decimal.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
index aefaa05..8f98ace 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonMergeJoinOperator.java
@@ -25,6 +25,8 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
+
+import org.apache.hadoop.hive.ql.exec.tez.ReduceRecordSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
@@ -148,7 +150,7 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator<CommonMerge
for (byte pos = 0; pos < order.length; pos++) {
if (pos != posBigTable) {
- if ((parentOperators != null) && (parentOperators.isEmpty() == false)
+ if ((parentOperators != null) && !parentOperators.isEmpty()
&& (parentOperators.get(pos) instanceof TezDummyStoreOperator)) {
TezDummyStoreOperator dummyStoreOp = (TezDummyStoreOperator) parentOperators.get(pos);
fetchDone[pos] = dummyStoreOp.getFetchDone();
@@ -161,6 +163,15 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator<CommonMerge
sources = ((TezContext) MapredContext.get()).getRecordSources();
interruptChecker = new InterruptibleProcessing();
+
+ if (sources[0] instanceof ReduceRecordSource &&
+ parentOperators != null && !parentOperators.isEmpty()) {
+ // Tell ReduceRecordSource to flush last record as this is a reduce
+ // side SMB
+ for (RecordSource source : sources) {
+ ((ReduceRecordSource) source).setFlushLastRecord(true);
+ }
+ }
}
/*
@@ -230,7 +241,7 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator<CommonMerge
continue;
}
- if (foundNextKeyGroup[i] == false) {
+ if (!foundNextKeyGroup[i]) {
canEmit = false;
break;
}
@@ -258,13 +269,12 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator<CommonMerge
// catch up with the big table.
if (nextKeyGroup) {
assert tag == posBigTable;
- List<Byte> smallestPos = null;
+ List<Byte> listOfFetchNeeded = null;
do {
- smallestPos = joinOneGroup();
+ listOfFetchNeeded = joinOneGroup();
//jump out the loop if we need input from the big table
- } while (smallestPos != null && smallestPos.size() > 0
- && !smallestPos.contains(this.posBigTable));
-
+ } while (listOfFetchNeeded != null && listOfFetchNeeded.size() > 0
+ && !listOfFetchNeeded.contains(this.posBigTable));
return;
}
@@ -360,6 +370,9 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator<CommonMerge
}
private void fetchNextGroup(Byte t) throws HiveException {
+ if (keyWritables[t] != null) {
+ return; // First process the current key.
+ }
if (foundNextKeyGroup[t]) {
// first promote the next group to be the current group if we reached a
// new group in the previous fetch
@@ -530,6 +543,10 @@ public class CommonMergeJoinOperator extends AbstractMapJoinOperator<CommonMerge
} else {
int cmp = compareKeys(alias, key, keyWritable);
if (cmp != 0) {
+ // Cant overwrite existing keys
+ if (nextKeyWritables[alias] != null) {
+ throw new HiveException("Attempting to overwrite nextKeyWritables[" + alias + "]");
+ }
nextKeyWritables[alias] = key;
return true;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
index 4b76638..4882e61 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
@@ -734,7 +734,7 @@ public class GroupByOperator extends Operator<GroupByDesc> implements IConfigure
@Override
public void process(Object row, int tag) throws HiveException {
firstRow = false;
- ObjectInspector rowInspector = inputObjInspectors[tag];
+ ObjectInspector rowInspector = inputObjInspectors[0];
// Total number of input rows is needed for hash aggregation only
if (hashAggr) {
numRowsInput++;
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
index 108bb57..4e9784d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
@@ -663,6 +663,18 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C
public void flush() throws HiveException {
}
+ // Recursive flush to flush all the tree operators
+ public void flushRecursive() throws HiveException {
+ flush();
+ if (childOperators == null) {
+ return;
+ }
+
+ for (Operator<?> child : childOperators) {
+ child.flushRecursive();
+ }
+ }
+
public void processGroup(int tag) throws HiveException {
if (childOperators == null || childOperators.isEmpty()) {
return;
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
index fca783c..688fde8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
@@ -123,6 +123,9 @@ public class ReduceRecordSource implements RecordSource {
private long vectorizedVertexNum;
private int vectorizedTestingReducerBatchSize;
+ // Flush the last record when reader is out of records
+ private boolean flushLastRecord = false;
+
void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc,
TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag,
VectorizedRowBatchCtx batchContext, long vectorizedVertexNum,
@@ -254,6 +257,9 @@ public class ReduceRecordSource implements RecordSource {
try {
if (!reader.next()) {
+ if (flushLastRecord) {
+ reducer.flushRecursive();
+ }
return false;
}
@@ -508,4 +514,8 @@ public class ReduceRecordSource implements RecordSource {
public ObjectInspector getObjectInspector() {
return rowObjectInspector;
}
+
+ public void setFlushLastRecord(boolean flushLastRecord) {
+ this.flushLastRecord = flushLastRecord;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index 4019f13..9a7b1ea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -608,6 +608,23 @@ public class ConvertJoinMapJoin implements NodeProcessor {
return false;
}
ReduceSinkOperator rsOp = (ReduceSinkOperator) parentOp;
+ List<ExprNodeDesc> keyCols = rsOp.getConf().getKeyCols();
+
+ // For SMB, the key column(s) in RS should be same as bucket column(s) and sort column(s)`
+ List<String> sortCols = rsOp.getOpTraits().getSortCols().get(0);
+ List<String> bucketCols = rsOp.getOpTraits().getBucketColNames().get(0);
+ if (sortCols.size() != keyCols.size() || bucketCols.size() != keyCols.size()) {
+ return false;
+ }
+
+ // Check columns.
+ for (int i = 0; i < sortCols.size(); i++) {
+ ExprNodeDesc sortCol = rsOp.getColumnExprMap().get(sortCols.get(i));
+ ExprNodeDesc bucketCol = rsOp.getColumnExprMap().get(bucketCols.get(i));
+ if (!(sortCol.isSame(keyCols.get(i)) && bucketCol.isSame(keyCols.get(i)))) {
+ return false;
+ }
+ }
if (!checkColEquality(rsOp.getParentOperators().get(0).getOpTraits().getSortCols(), rsOp
.getOpTraits().getSortCols(), rsOp.getColumnExprMap(), false)) {
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
index 9e54465..dbcbbfd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
@@ -260,23 +260,43 @@ public class OpTraitsRulesProcFactory {
public static class SelectRule implements NodeProcessor {
+ boolean processSortCols = false;
+
+ // For bucket columns
+ // If all the columns match to the parent, put them in the bucket cols
+ // else, add empty list.
+ // For sort columns
+ // Keep the subset of all the columns as long as order is maintained.
public List<List<String>> getConvertedColNames(
List<List<String>> parentColNames, SelectOperator selOp) {
- List<List<String>> listBucketCols = new ArrayList<List<String>>();
+ List<List<String>> listBucketCols = new ArrayList<>();
if (selOp.getColumnExprMap() != null) {
if (parentColNames != null) {
for (List<String> colNames : parentColNames) {
- List<String> bucketColNames = new ArrayList<String>();
+ List<String> bucketColNames = new ArrayList<>();
+ boolean found = false;
for (String colName : colNames) {
for (Entry<String, ExprNodeDesc> entry : selOp.getColumnExprMap().entrySet()) {
- if (entry.getValue() instanceof ExprNodeColumnDesc) {
- if (((ExprNodeColumnDesc) (entry.getValue())).getColumn().equals(colName)) {
- bucketColNames.add(entry.getKey());
- }
+ if ((entry.getValue() instanceof ExprNodeColumnDesc) &&
+ (((ExprNodeColumnDesc) (entry.getValue())).getColumn().equals(colName))) {
+ bucketColNames.add(entry.getKey());
+ found = true;
+ break;
}
}
+ if (!found) {
+ // Bail out on first missed column.
+ break;
+ }
+ }
+ if (!processSortCols && !found) {
+ // While processing bucket columns, atleast one bucket column
+ // missed. This results in a different bucketing scheme.
+ // Add empty list
+ listBucketCols.add(new ArrayList<>());
+ } else {
+ listBucketCols.add(bucketColNames);
}
- listBucketCols.add(bucketColNames);
}
}
}
@@ -300,6 +320,7 @@ public class OpTraitsRulesProcFactory {
List<List<String>> parentSortColNames =
selOp.getParentOperators().get(0).getOpTraits().getSortCols();
if (parentSortColNames != null) {
+ processSortCols = true;
listSortCols = getConvertedColNames(parentSortColNames, selOp);
}
}
@@ -309,7 +330,11 @@ public class OpTraitsRulesProcFactory {
int bucketingVersion = -1;
OpTraits parentOpTraits = selOp.getParentOperators().get(0).getOpTraits();
if (parentOpTraits != null) {
- numBuckets = parentOpTraits.getNumBuckets();
+ // if bucket columns are empty, then numbuckets must be set to -1.
+ if (listBucketCols != null &&
+ !(listBucketCols.isEmpty() || listBucketCols.get(0).isEmpty())) {
+ numBuckets = parentOpTraits.getNumBuckets();
+ }
numReduceSinks = parentOpTraits.getNumReduceSinks();
bucketingVersion = parentOpTraits.getBucketingVersion();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
index 7416eb0..76e615f 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
@@ -22,6 +22,7 @@ load data local inpath '../../data/files/auto_sortmerge_join/big/000003_0' INTO
set hive.auto.convert.join=true;
-- disable hash joins
set hive.auto.convert.join.noconditionaltask.size=10;
+set hive.auto.convert.sortmerge.join=false;
explain extended select count(*) FROM bucket_small_n11 a JOIN bucket_big_n11 b ON a.key = b.key;
select count(*) FROM bucket_small_n11 a JOIN bucket_big_n11 b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q
index 551e5f7..0aeec0e 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q
@@ -53,7 +53,7 @@ select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.k
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key;
--- A SMB join is being followed by a regular join on a bucketed table on a different key
+-- The join order ensures there is no SMB
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value;
@@ -71,6 +71,6 @@ select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.k
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl3_n0 c on c.key = a.key;
--- A SMB join is being followed by a regular join on a bucketed table on a different key
+-- The join order ensures there is no SMB
explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value;
select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join tbl4 c on c.value = a.value;
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/skewjoinopt19.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/skewjoinopt19.q b/ql/src/test/queries/clientpositive/skewjoinopt19.q
index 02cadda..df8ab71 100644
--- a/ql/src/test/queries/clientpositive/skewjoinopt19.q
+++ b/ql/src/test/queries/clientpositive/skewjoinopt19.q
@@ -1,5 +1,6 @@
set hive.mapred.mode=nonstrict;
set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.sortmerge.join=false;
CREATE TABLE T1_n34(key STRING, val STRING)
CLUSTERED BY (key) INTO 4 BUCKETS
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/skewjoinopt20.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/skewjoinopt20.q b/ql/src/test/queries/clientpositive/skewjoinopt20.q
index 160e5b8..bebe007 100644
--- a/ql/src/test/queries/clientpositive/skewjoinopt20.q
+++ b/ql/src/test/queries/clientpositive/skewjoinopt20.q
@@ -1,5 +1,6 @@
set hive.mapred.mode=nonstrict;
set hive.optimize.skewjoin.compiletime = true;
+set hive.auto.convert.sortmerge.join=false;
CREATE TABLE T1_n103(key STRING, val STRING)
CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/smb_mapjoin_11.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_11.q b/ql/src/test/queries/clientpositive/smb_mapjoin_11.q
index 6ce49b8..d0cea5b 100644
--- a/ql/src/test/queries/clientpositive/smb_mapjoin_11.q
+++ b/ql/src/test/queries/clientpositive/smb_mapjoin_11.q
@@ -8,7 +8,8 @@ set hive.cbo.enable=false;
set hive.exec.reducers.max = 1;
set hive.merge.mapfiles=false;
-set hive.merge.mapredfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join=false;
-- This test verifies that the output of a sort merge join on 2 partitions (one on each side of the join) is bucketed
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/smb_mapjoin_12.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_12.q b/ql/src/test/queries/clientpositive/smb_mapjoin_12.q
index 753e4d3..6f9ecab 100644
--- a/ql/src/test/queries/clientpositive/smb_mapjoin_12.q
+++ b/ql/src/test/queries/clientpositive/smb_mapjoin_12.q
@@ -21,7 +21,7 @@ INSERT OVERWRITE TABLE test_table2_n6 PARTITION (ds = '2') SELECT *
INSERT OVERWRITE TABLE test_table2_n6 PARTITION (ds = '3') SELECT *;
-
+set hive.auto.convert.sortmerge.join=false;
-- Create a bucketed table
CREATE TABLE test_table3_n4 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS;
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/smb_mapjoin_17.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_17.q b/ql/src/test/queries/clientpositive/smb_mapjoin_17.q
index d68f5f3..7454445 100644
--- a/ql/src/test/queries/clientpositive/smb_mapjoin_17.q
+++ b/ql/src/test/queries/clientpositive/smb_mapjoin_17.q
@@ -43,6 +43,7 @@ SELECT * FROM src WHERE key < 10;
INSERT OVERWRITE TABLE test_table8
SELECT * FROM src WHERE key < 10;
+set hive.auto.convert.sortmerge.join=false;
-- Mapjoin followed by a aggregation should be performed in a single MR job upto 7 tables
EXPLAIN
SELECT /*+ mapjoin(b, c, d, e, f, g) */ count(*)
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/queries/clientpositive/subquery_notin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_notin.q b/ql/src/test/queries/clientpositive/subquery_notin.q
index 6494027..a2d93df 100644
--- a/ql/src/test/queries/clientpositive/subquery_notin.q
+++ b/ql/src/test/queries/clientpositive/subquery_notin.q
@@ -177,7 +177,7 @@ SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0);
-- corr
explain SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0 where t1_n0.c2=t2_n0.c1);
-SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0 where t1_n0.c1=t2_n0.c1);
+SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0 where t1_n0.c2=t2_n0.c1);
DROP TABLE t1_n0;
DROP TABLE t2_n0;
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out
index b13beab..828c6e1 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out
@@ -772,23 +772,13 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: c
- Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
- Map Operator Tree:
- TableScan
alias: a
Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -798,23 +788,36 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
Map 5
Map Operator Tree:
TableScan
+ alias: c
+ Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
alias: b
Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -838,6 +841,22 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
0 _col0 (type: int)
1 _col0 (type: int)
Statistics: Num rows: 1343 Data size: 10744 Basic stats: COMPLETE Column stats: COMPLETE
@@ -850,7 +869,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
- Reducer 3
+ Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -884,7 +903,7 @@ POSTHOOK: Input: default@tbl1_n4
POSTHOOK: Input: default@tbl2_n3
POSTHOOK: Input: default@tbl4
#### A masked pattern was here ####
-90
+2654
PREHOOK: query: explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.value = a.value
PREHOOK: type: QUERY
POSTHOOK: query: explain select count(*) FROM tbl1_n4 a JOIN tbl2_n3 b ON a.key = b.key join src c on c.value = a.value
@@ -1295,23 +1314,13 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: c
- Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: value is not null (type: boolean)
- Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
- Map Operator Tree:
- TableScan
alias: a
Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -1321,23 +1330,36 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
Map 5
Map Operator Tree:
TableScan
+ alias: c
+ Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
alias: b
Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
@@ -1361,6 +1383,22 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
0 _col0 (type: int)
1 _col0 (type: int)
Statistics: Num rows: 1343 Data size: 10744 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1373,7 +1411,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint)
- Reducer 3
+ Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -1407,4 +1445,4 @@ POSTHOOK: Input: default@tbl1_n4
POSTHOOK: Input: default@tbl2_n3
POSTHOOK: Input: default@tbl4
#### A masked pattern was here ####
-90
+2654
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
index 8e17d95..879c999 100644
--- a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
+++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
@@ -21,10 +21,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
- Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
- Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -49,7 +47,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: y
@@ -72,45 +70,43 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
- Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
- Reducer 3
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int)
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3)
- mode: hash
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
- Reducer 4
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
+ Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -125,21 +121,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
@@ -189,10 +170,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
- Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
- Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -217,7 +196,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: y
@@ -240,45 +219,43 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
- Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
- Reducer 3
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int)
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3)
- mode: hash
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ Statistics: Num rows: 9 Data size: 1701 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
- Reducer 4
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
+ Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -293,21 +270,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
@@ -357,10 +319,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
- Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
- Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -386,7 +346,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: y
@@ -406,45 +366,43 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
- Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
- Reducer 3
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Left Outer Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int)
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3)
- mode: hash
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
- Reducer 4
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
+ Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -459,21 +417,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
@@ -523,10 +466,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
- Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
- Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -552,7 +493,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: y
@@ -572,45 +513,43 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
- Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
- Reducer 3
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Left Outer Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int)
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3)
- mode: hash
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ Statistics: Num rows: 250 Data size: 24690 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
- Reducer 4
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
+ Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -625,21 +564,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
@@ -689,10 +613,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
- Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
- Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -714,7 +636,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 5
+ Map 3
Map Operator Tree:
TableScan
alias: y
@@ -737,8 +659,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Reducer 2
- Execution mode: vectorized, llap
+ Reducer 4
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -746,37 +667,36 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
- Reducer 3
Execution mode: llap
Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Right Outer Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int)
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3)
- mode: hash
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
- Reducer 4
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
+ Reducer 5
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -791,21 +711,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
@@ -855,10 +760,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
- Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
- Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -880,7 +783,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 5
+ Map 3
Map Operator Tree:
TableScan
alias: y
@@ -903,8 +806,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Reducer 2
- Execution mode: vectorized, llap
+ Reducer 4
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -912,37 +814,36 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
- Reducer 3
Execution mode: llap
Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Right Outer Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int)
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3)
- mode: hash
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ Statistics: Num rows: 12 Data size: 2078 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
- Reducer 4
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
+ Reducer 5
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -957,21 +858,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/0a961aa8/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out
index 9e424c2..1fcd6ed 100644
--- a/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out
+++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out
@@ -2659,11 +2659,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
- Reducer 5 <- Map 8 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE)
- Reducer 7 <- Map 6 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Map 7 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -2688,7 +2686,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 6
+ Map 5
Map Operator Tree:
TableScan
alias: y
@@ -2710,7 +2708,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 8
+ Map 7
Map Operator Tree:
TableScan
alias: z
@@ -2731,44 +2729,42 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
- Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
- Reducer 3
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col1), sum(_col3)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col3)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Reducer 4
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -2783,7 +2779,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Reducer 5
+ Reducer 4
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
@@ -2805,21 +2801,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 7
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
@@ -2913,11 +2894,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
- Reducer 5 <- Map 8 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE)
- Reducer 7 <- Map 6 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Map 7 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -2942,7 +2921,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 6
+ Map 5
Map Operator Tree:
TableScan
alias: y
@@ -2964,7 +2943,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 8
+ Map 7
Map Operator Tree:
TableScan
alias: z
@@ -2985,44 +2964,42 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
- Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
- Reducer 3
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col1), sum(_col3)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 9 Data size: 927 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col3)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Reducer 4
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -3037,7 +3014,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Reducer 5
+ Reducer 4
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
@@ -3059,21 +3036,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 7
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator