You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/12/15 04:07:46 UTC
svn commit: r1645554 - in /hive/branches/spark: itests/src/test/resources/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/
ql/src/java/org/apache/hadoop/hive/ql/parse/spark/
ql/src/test/results/clientpositive/spark/
Author: xuefu
Date: Mon Dec 15 03:07:45 2014
New Revision: 1645554
URL: http://svn.apache.org/r1645554
Log:
HIVE-7816: Enable map-join tests which Tez executes [Spark Branch] (Rui via Xuefu)
Added:
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java
hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out
Modified:
hive/branches/spark/itests/src/test/resources/testconfiguration.properties
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join0.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join23.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_join.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join0.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join23.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel_join0.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join5.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out
Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Mon Dec 15 03:07:45 2014
@@ -550,6 +550,8 @@ spark.query.files=add_part_multiple.q, \
count.q, \
create_merge_compressed.q, \
cross_join.q, \
+ cross_product_check_1.q, \
+ cross_product_check_2.q, \
ctas.q, \
custom_input_output_format.q, \
date_join1.q, \
@@ -561,6 +563,8 @@ spark.query.files=add_part_multiple.q, \
escape_distributeby1.q, \
escape_orderby1.q, \
escape_sortby1.q, \
+ filter_join_breaktask.q, \
+ filter_join_breaktask2.q, \
groupby1.q, \
groupby2.q, \
groupby3.q, \
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java Mon Dec 15 03:07:45 2014
@@ -241,7 +241,7 @@ public class CrossProductCheck implement
* <p>
* For MR the taskname is the StageName, for Tez it is the vertex name.
*/
- class MapJoinCheck implements NodeProcessor, NodeProcessorCtx {
+ public static class MapJoinCheck implements NodeProcessor, NodeProcessorCtx {
final List<String> warnings;
final String taskName;
@@ -302,7 +302,7 @@ public class CrossProductCheck implement
* in the Work. For Tez, you can restrict it to ReduceSinks for a particular output
* vertex.
*/
- static class ExtractReduceSinkInfo implements NodeProcessor, NodeProcessorCtx {
+ public static class ExtractReduceSinkInfo implements NodeProcessor, NodeProcessorCtx {
static class Info {
List<ExprNodeDesc> keyCols;
Added: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java?rev=1645554&view=auto
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java (added)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java Mon Dec 15 03:07:45 2014
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.TaskGraphWalker;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceWork;
+import org.apache.hadoop.hive.ql.plan.SparkWork;
+import org.apache.hadoop.hive.ql.session.SessionState;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+/**
+ * Check each MapJoin and ShuffleJoin Operator to see if they are performing a cross product.
+ * If yes, output a warning to the Session's console.
+ * The Checks made are the following:
+ * 1. Shuffle Join:
+ * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then
+ * this is a cross product.
+ * 2. Map Join:
+ * If the keys expr list on the mapJoin Desc is an empty list for any input,
+ * this implies a cross product.
+ */
+public class SparkCrossProductCheck implements PhysicalPlanResolver, Dispatcher {
+
+ @Override
+ public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs)
+ throws SemanticException {
+ Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd;
+ if (currTask instanceof SparkTask) {
+ SparkWork sparkWork = ((SparkTask) currTask).getWork();
+ checkShuffleJoin(sparkWork);
+ checkMapJoin((SparkTask) currTask);
+ }
+ return null;
+ }
+
+ @Override
+ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
+ TaskGraphWalker ogw = new TaskGraphWalker(this);
+
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(pctx.getRootTasks());
+
+ ogw.startWalking(topNodes, null);
+ return pctx;
+ }
+
+ private void warn(String msg) {
+ SessionState.getConsole().getInfoStream().println(
+ String.format("Warning: %s", msg));
+ }
+
+ private void checkShuffleJoin(SparkWork sparkWork) throws SemanticException {
+ for (ReduceWork reduceWork : sparkWork.getAllReduceWork()) {
+ Operator<? extends OperatorDesc> reducer = reduceWork.getReducer();
+ if (reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator) {
+ Map<Integer, CrossProductCheck.ExtractReduceSinkInfo.Info> rsInfo =
+ new HashMap<Integer, CrossProductCheck.ExtractReduceSinkInfo.Info>();
+ for (BaseWork parent : sparkWork.getParents(reduceWork)) {
+ rsInfo.putAll(new CrossProductCheck.ExtractReduceSinkInfo(null).analyze(parent));
+ }
+ checkForCrossProduct(reduceWork.getName(), reducer, rsInfo);
+ }
+ }
+ }
+
+ private void checkMapJoin(SparkTask sparkTask) throws SemanticException {
+ SparkWork sparkWork = sparkTask.getWork();
+ for (BaseWork baseWork : sparkWork.getAllWorkUnsorted()) {
+ List<String> warnings =
+ new CrossProductCheck.MapJoinCheck(sparkTask.toString()).analyze(baseWork);
+ for (String w : warnings) {
+ warn(w);
+ }
+ }
+ }
+
+ private void checkForCrossProduct(String workName,
+ Operator<? extends OperatorDesc> reducer,
+ Map<Integer, CrossProductCheck.ExtractReduceSinkInfo.Info> rsInfo) {
+ if (rsInfo.isEmpty()) {
+ return;
+ }
+ Iterator<CrossProductCheck.ExtractReduceSinkInfo.Info> it = rsInfo.values().iterator();
+ CrossProductCheck.ExtractReduceSinkInfo.Info info = it.next();
+ if (info.keyCols.size() == 0) {
+ List<String> iAliases = new ArrayList<String>();
+ iAliases.addAll(info.inputAliases);
+ while (it.hasNext()) {
+ info = it.next();
+ iAliases.addAll(info.inputAliases);
+ }
+ String warning = String.format(
+ "Shuffle Join %s[tables = %s] in Work '%s' is a cross product",
+ reducer.toString(),
+ iAliases,
+ workName);
+ warn(warning);
+ }
+ }
+}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java Mon Dec 15 03:07:45 2014
@@ -57,10 +57,10 @@ import org.apache.hadoop.hive.ql.lib.Rul
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.lib.TypeRule;
import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck;
import org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer;
import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
+import org.apache.hadoop.hive.ql.optimizer.physical.SparkCrossProductCheck;
import org.apache.hadoop.hive.ql.optimizer.physical.SparkMapJoinResolver;
import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
@@ -294,7 +294,7 @@ public class SparkCompiler extends TaskC
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
- physicalCtx = new CrossProductCheck().resolve(physicalCtx);
+ physicalCtx = new SparkCrossProductCheck().resolve(physicalCtx);
} else {
LOG.debug("Skipping cross product analysis");
}
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join0.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join0.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join0.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join0.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: explain
select sum(hash(a.k1,a.v1,a.k2, a.v2))
from (
@@ -130,6 +131,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2))
from (
SELECT src1.key as k1, src1.value as v1,
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join23.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join23.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join23.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join23.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: explain
SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value
PREHOOK: type: QUERY
@@ -89,6 +90,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value
PREHOOK: type: QUERY
PREHOOK: Input: default@src
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out Mon Dec 15 03:07:45 2014
@@ -14,6 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
@@ -23,6 +24,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@myinput1
#### A masked pattern was here ####
3078400
+Warning: Map Join MAPJOIN[15][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
@@ -32,6 +34,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@myinput1
#### A masked pattern was here ####
4937935
+Warning: Map Join MAPJOIN[15][bigTable=b] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
@@ -41,6 +44,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@myinput1
#### A masked pattern was here ####
3080335
+Warning: Shuffle Join JOIN[4][tables = [a, b]] in Work 'Reducer 2' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
@@ -296,6 +300,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@smb_input2
+Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
@@ -305,6 +310,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@myinput1
#### A masked pattern was here ####
3078400
+Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
@@ -314,6 +320,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@myinput1
#### A masked pattern was here ####
3078400
+Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
@@ -323,6 +330,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@myinput1
#### A masked pattern was here ####
3078400
+Warning: Shuffle Join JOIN[10][tables = [a, b]] in Work 'Reducer 2' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out Mon Dec 15 03:07:45 2014
@@ -14,6 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[11][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
@@ -23,6 +24,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@myinput1
#### A masked pattern was here ####
13630578
+Warning: Map Join MAPJOIN[11][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
@@ -32,6 +34,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@myinput1
#### A masked pattern was here ####
13630578
+Warning: Map Join MAPJOIN[11][bigTable=b] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out Mon Dec 15 03:07:45 2014
@@ -138,6 +138,7 @@ POSTHOOK: query: load data local inpath
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_medium@ds=2008-04-08
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
@@ -645,6 +646,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@bucket_big
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_join.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_join.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_join.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_join.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Shuffle Join JOIN[4][tables = [src, src2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: -- current
explain select src.key from src join src src2
PREHOOK: type: QUERY
@@ -60,6 +61,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[4][tables = [src, src2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: -- ansi cross join
explain select src.key from src cross join src src2
PREHOOK: type: QUERY
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out?rev=1645554&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out Mon Dec 15 03:07:45 2014
@@ -0,0 +1,544 @@
+PREHOOK: query: create table A as
+select * from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@A
+POSTHOOK: query: create table A as
+select * from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@A
+PREHOOK: query: create table B as
+select * from src
+limit 10
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@B
+POSTHOOK: query: create table B as
+select * from src
+limit 10
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@B
+Warning: Shuffle Join JOIN[4][tables = [a, b]] in Work 'Reducer 2' is a cross product
+PREHOOK: query: explain select * from A join B
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from A join B
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: string), value (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: string), value (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[10][tables = [d1, d2, a]] in Work 'Reducer 3' is a cross product
+PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
+ Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ value expressions: value (type: string)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ value expressions: value (type: string)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: string), value (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {KEY.reducesinkkey0} {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {VALUE._col6}
+ 1 {VALUE._col0} {VALUE._col1}
+ outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[16][tables = [a, od1]] in Work 'Reducer 4' is a cross product
+PREHOOK: query: explain select * from A join
+ (select d1.key
+ from B d1 join B d2 on d1.key = d2.key
+ where 1 = 1 group by d1.key) od1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from A join
+ (select d1.key
+ from B d1 join B d2 on d1.key = d2.key
+ where 1 = 1 group by d1.key) od1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3)
+ Reducer 4 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1)
+ Reducer 3 <- Reducer 2 (GROUP, 3)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: string), value (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0}
+ 1
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col5
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[4][tables = [d1, d2]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [a, od1]] in Work 'Reducer 4' is a cross product
+PREHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1)
+ Reducer 4 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1)
+ Reducer 3 <- Reducer 2 (GROUP, 3)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: string)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: key (type: string), value (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0}
+ 1
+ outputColumnNames: _col0
+ Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col5
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[21][tables = [ss, od1]] in Work 'Reducer 3' is a cross product
+PREHOOK: query: explain select * from
+(select A.key from A group by key) ss join
+(select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from
+(select A.key from A group by key) ss join
+(select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 3)
+ Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3)
+ Reducer 6 <- Reducer 5 (GROUP, 3)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0}
+ 1 {VALUE._col0}
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0}
+ 1
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out?rev=1645554&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out Mon Dec 15 03:07:45 2014
@@ -0,0 +1,633 @@
+PREHOOK: query: create table A as
+select * from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@A
+POSTHOOK: query: create table A as
+select * from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@A
+PREHOOK: query: create table B as
+select * from src
+limit 10
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@B
+POSTHOOK: query: create table B as
+select * from src
+limit 10
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@B
+Warning: Map Join MAPJOIN[7][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain select * from A join B
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from A join B
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {key} {value}
+ 1 {key} {value}
+ keys:
+ 0
+ 1
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {key} {value}
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col5, _col6
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Map Join MAPJOIN[16][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {key} {value}
+ 1 {value}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {key} {value}
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {_col0} {_col1} {_col5} {_col6}
+ 1 {key} {value}
+ keys:
+ 0
+ 1
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0} {_col1} {_col5} {_col6}
+ 1 {key} {value}
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Map Join MAPJOIN[22][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain select * from A join
+ (select d1.key
+ from B d1 join B d2 on d1.key = d2.key
+ where 1 = 1 group by d1.key) od1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from A join
+ (select d1.key
+ from B d1 join B d2 on d1.key = d2.key
+ where 1 = 1 group by d1.key) od1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {key}
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 3)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {key} {value}
+ 1 {_col0}
+ keys:
+ 0
+ 1
+
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {_col0}
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col5
+ input vertices:
+ 1 Reducer 2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Map Join MAPJOIN[18][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[17][bigTable=d1] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {key}
+ 1
+ keys:
+ 0
+ 1
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 3)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {key} {value}
+ 1 {_col0}
+ keys:
+ 0
+ 1
+
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {_col0}
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col5
+ input vertices:
+ 1 Reducer 2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain select * from
+(select A.key from A group by key) ss join
+(select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from
+(select A.key from A group by key) ss join
+(select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: d2
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {key}
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Spark
+ Edges:
+ Reducer 4 <- Map 3 (GROUP, 3)
+#### A masked pattern was here ####
+ Vertices:
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: d1
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0
+ input vertices:
+ 1 Map 5
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Reducer 4
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ condition expressions:
+ 0 {_col0}
+ 1 {_col0}
+ keys:
+ 0
+ 1
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 3)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0}
+ 1 {_col0}
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Reducer 4
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join0.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join0.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join0.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join0.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: EXPLAIN
SELECT src1.key as k1, src1.value as v1,
src2.key as k2, src2.value as v2 FROM
@@ -92,6 +93,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: EXPLAIN FORMATTED
SELECT src1.key as k1, src1.value as v1,
src2.key as k2, src2.value as v2 FROM
@@ -109,6 +111,7 @@ SELECT src1.key as k1, src1.value as v1,
SORT BY k1, v1, k2, v2
POSTHOOK: type: QUERY
#### A masked pattern was here ####
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: SELECT src1.key as k1, src1.value as v1,
src2.key as k2, src2.value as v2 FROM
(SELECT * FROM src WHERE src.key < 10) src1
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join23.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join23.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join23.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join23.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Shuffle Join JOIN[4][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: EXPLAIN
SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value
PREHOOK: type: QUERY
@@ -78,6 +79,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[4][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value
PREHOOK: type: QUERY
PREHOOK: Input: default@src
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Shuffle Join JOIN[4][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain select p1.p_name, p2.p_name
from part p1 , part p2
PREHOOK: type: QUERY
@@ -247,6 +248,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain select p1.p_name, p2.p_name, p3.p_name
from part p1 , part p2 , part p3
where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out Mon Dec 15 03:07:45 2014
@@ -176,6 +176,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain select *
from part p1 join part p2 join part p3 on p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name
PREHOOK: type: QUERY
@@ -276,6 +277,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain select *
from part p1 join part p2 join part p3 on p2.p_partkey = 1 and p3.p_name = p2.p_name
PREHOOK: type: QUERY
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out Mon Dec 15 03:07:45 2014
@@ -186,6 +186,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain select *
from part p1 join part p2 join part p3
where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name
@@ -291,6 +292,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain select *
from part p1 join part p2 join part p3
where p2.p_partkey = 1 and p3.p_name = p2.p_name
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out Mon Dec 15 03:07:45 2014
@@ -232,6 +232,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain select *
from part p1 join part2 p2 join part3 p3 on p2_partkey + p_partkey = p1.p_partkey and p3_name = p2_name
PREHOOK: type: QUERY
@@ -332,6 +333,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain select *
from part p1 join part2 p2 join part3 p3 on p2_partkey = 1 and p3_name = p2_name
PREHOOK: type: QUERY
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out Mon Dec 15 03:07:45 2014
@@ -242,6 +242,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain select *
from part p1 join part2 p2 join part3 p3
where p2_partkey + p1.p_partkey = p1.p_partkey and p3_name = p2_name
@@ -347,6 +348,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain select *
from part p1 join part2 p2 join part3 p3
where p2_partkey = 1 and p3_name = p2_name
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out Mon Dec 15 03:07:45 2014
@@ -1026,6 +1026,7 @@ POSTHOOK: Input: default@srcpart@ds=2008
#### A masked pattern was here ####
0
2000
+Warning: Shuffle Join JOIN[11][tables = [a, b]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain extended
select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b
PREHOOK: type: QUERY
@@ -1434,6 +1435,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[11][tables = [a, b]] in Work 'Reducer 2' is a cross product
PREHOOK: query: select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b
PREHOOK: type: QUERY
PREHOOK: Input: default@src
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel_join0.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel_join0.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel_join0.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel_join0.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: -- SORT_BEFORE_DIFF
EXPLAIN
@@ -96,6 +97,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: EXPLAIN FORMATTED
SELECT src1.key as k1, src1.value as v1,
src2.key as k2, src2.value as v2 FROM
@@ -113,6 +115,7 @@ SELECT src1.key as k1, src1.value as v1,
SORT BY k1, v1, k2, v2
POSTHOOK: type: QUERY
#### A masked pattern was here ####
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: SELECT src1.key as k1, src1.value as v1,
src2.key as k2, src2.value as v2 FROM
(SELECT * FROM src WHERE src.key < 10) src1
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join5.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join5.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join5.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join5.q.out Mon Dec 15 03:07:45 2014
@@ -32,6 +32,7 @@ POSTHOOK: Lineage: t1.id1 SIMPLE []
POSTHOOK: Lineage: t1.id2 SIMPLE []
POSTHOOK: Lineage: t2.d SIMPLE []
POSTHOOK: Lineage: t2.id SIMPLE []
+Warning: Shuffle Join JOIN[10][tables = [a, b, c]] in Work 'Reducer 3' is a cross product
PREHOOK: query: explain
select a.*,b.d d1,c.d d2 from
t1 a join t2 b on (a.id1 = b.id)
@@ -135,6 +136,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[10][tables = [a, b, c]] in Work 'Reducer 3' is a cross product
PREHOOK: query: explain
select * from (
select a.*,b.d d1,c.d d2 from
@@ -245,6 +247,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[10][tables = [a, b, c]] in Work 'Reducer 3' is a cross product
PREHOOK: query: select * from (
select a.*,b.d d1,c.d d2 from
t1 a join t2 b on (a.id1 = b.id)
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out Mon Dec 15 03:07:45 2014
@@ -32,6 +32,7 @@ POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@src_5
RUN: Stage-0:DDL
+Warning: Shuffle Join JOIN[31][tables = [sq_2_notin_nullcheck]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain
from src b
INSERT OVERWRITE TABLE src_4
@@ -274,6 +275,7 @@ STAGE PLANS:
Stage: Stage-4
Stats-Aggr Operator
+Warning: Shuffle Join JOIN[31][tables = [sq_2_notin_nullcheck]] in Work 'Reducer 2' is a cross product
PREHOOK: query: from src b
INSERT OVERWRITE TABLE src_4
select *
@@ -461,6 +463,7 @@ POSTHOOK: Input: default@src_5
199 val_199
199 val_199
2 val_2
+Warning: Map Join MAPJOIN[45][bigTable=b] in task 'Stage-2:MAPRED' is a cross product
PREHOOK: query: explain
from src b
INSERT OVERWRITE TABLE src_4
@@ -712,6 +715,7 @@ STAGE PLANS:
Stage: Stage-4
Stats-Aggr Operator
+Warning: Map Join MAPJOIN[45][bigTable=b] in task 'Stage-2:MAPRED' is a cross product
PREHOOK: query: from src b
INSERT OVERWRITE TABLE src_4
select *