You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/12/15 04:07:46 UTC

svn commit: r1645554 - in /hive/branches/spark: itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/ ql/src/test/results/clientpositive/spark/

Author: xuefu
Date: Mon Dec 15 03:07:45 2014
New Revision: 1645554

URL: http://svn.apache.org/r1645554
Log:
HIVE-7816: Enable map-join tests which Tez executes [Spark Branch] (Rui via Xuefu)

Added:
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java
    hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out
Modified:
    hive/branches/spark/itests/src/test/resources/testconfiguration.properties
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join0.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join23.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_join.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join0.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join23.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel_join0.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join5.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out

Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Mon Dec 15 03:07:45 2014
@@ -550,6 +550,8 @@ spark.query.files=add_part_multiple.q, \
   count.q, \
   create_merge_compressed.q, \
   cross_join.q, \
+  cross_product_check_1.q, \
+  cross_product_check_2.q, \
   ctas.q, \
   custom_input_output_format.q, \
   date_join1.q, \
@@ -561,6 +563,8 @@ spark.query.files=add_part_multiple.q, \
   escape_distributeby1.q, \
   escape_orderby1.q, \
   escape_sortby1.q, \
+  filter_join_breaktask.q, \
+  filter_join_breaktask2.q, \
   groupby1.q, \
   groupby2.q, \
   groupby3.q, \

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java Mon Dec 15 03:07:45 2014
@@ -241,7 +241,7 @@ public class CrossProductCheck implement
    * <p>
    * For MR the taskname is the StageName, for Tez it is the vertex name.
    */
-  class MapJoinCheck implements NodeProcessor, NodeProcessorCtx {
+  public static class MapJoinCheck implements NodeProcessor, NodeProcessorCtx {
 
     final List<String> warnings;
     final String taskName;
@@ -302,7 +302,7 @@ public class CrossProductCheck implement
    * in the Work. For Tez, you can restrict it to ReduceSinks for a particular output
    * vertex.
    */
-  static class ExtractReduceSinkInfo implements NodeProcessor, NodeProcessorCtx {
+  public static class ExtractReduceSinkInfo implements NodeProcessor, NodeProcessorCtx {
 
     static class Info {
       List<ExprNodeDesc> keyCols;

Added: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java?rev=1645554&view=auto
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java (added)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java Mon Dec 15 03:07:45 2014
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.TaskGraphWalker;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceWork;
+import org.apache.hadoop.hive.ql.plan.SparkWork;
+import org.apache.hadoop.hive.ql.session.SessionState;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+/**
+ * Check each MapJoin and ShuffleJoin Operator to see if they are performing a cross product.
+ * If yes, output a warning to the Session's console.
+ * The Checks made are the following:
+ * 1. Shuffle Join:
+ * Check the parent ReduceSinkOp of the JoinOp. If its keys list is size = 0, then
+ * this is a cross product.
+ * 2. Map Join:
+ * If the keys expr list on the mapJoin Desc is an empty list for any input,
+ * this implies a cross product.
+ */
+public class SparkCrossProductCheck implements PhysicalPlanResolver, Dispatcher {
+
+  @Override
+  public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs)
+      throws SemanticException {
+    Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd;
+    if (currTask instanceof SparkTask) {
+      SparkWork sparkWork = ((SparkTask) currTask).getWork();
+      checkShuffleJoin(sparkWork);
+      checkMapJoin((SparkTask) currTask);
+    }
+    return null;
+  }
+
+  @Override
+  public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
+    TaskGraphWalker ogw = new TaskGraphWalker(this);
+
+    ArrayList<Node> topNodes = new ArrayList<Node>();
+    topNodes.addAll(pctx.getRootTasks());
+
+    ogw.startWalking(topNodes, null);
+    return pctx;
+  }
+
+  private void warn(String msg) {
+    SessionState.getConsole().getInfoStream().println(
+        String.format("Warning: %s", msg));
+  }
+
+  private void checkShuffleJoin(SparkWork sparkWork) throws SemanticException {
+    for (ReduceWork reduceWork : sparkWork.getAllReduceWork()) {
+      Operator<? extends OperatorDesc> reducer = reduceWork.getReducer();
+      if (reducer instanceof JoinOperator || reducer instanceof CommonMergeJoinOperator) {
+        Map<Integer, CrossProductCheck.ExtractReduceSinkInfo.Info> rsInfo =
+            new HashMap<Integer, CrossProductCheck.ExtractReduceSinkInfo.Info>();
+        for (BaseWork parent : sparkWork.getParents(reduceWork)) {
+          rsInfo.putAll(new CrossProductCheck.ExtractReduceSinkInfo(null).analyze(parent));
+        }
+        checkForCrossProduct(reduceWork.getName(), reducer, rsInfo);
+      }
+    }
+  }
+
+  private void checkMapJoin(SparkTask sparkTask) throws SemanticException {
+    SparkWork sparkWork = sparkTask.getWork();
+    for (BaseWork baseWork : sparkWork.getAllWorkUnsorted()) {
+      List<String> warnings =
+          new CrossProductCheck.MapJoinCheck(sparkTask.toString()).analyze(baseWork);
+      for (String w : warnings) {
+        warn(w);
+      }
+    }
+  }
+
+  private void checkForCrossProduct(String workName,
+      Operator<? extends OperatorDesc> reducer,
+      Map<Integer, CrossProductCheck.ExtractReduceSinkInfo.Info> rsInfo) {
+    if (rsInfo.isEmpty()) {
+      return;
+    }
+    Iterator<CrossProductCheck.ExtractReduceSinkInfo.Info> it = rsInfo.values().iterator();
+    CrossProductCheck.ExtractReduceSinkInfo.Info info = it.next();
+    if (info.keyCols.size() == 0) {
+      List<String> iAliases = new ArrayList<String>();
+      iAliases.addAll(info.inputAliases);
+      while (it.hasNext()) {
+        info = it.next();
+        iAliases.addAll(info.inputAliases);
+      }
+      String warning = String.format(
+          "Shuffle Join %s[tables = %s] in Work '%s' is a cross product",
+          reducer.toString(),
+          iAliases,
+          workName);
+      warn(warning);
+    }
+  }
+}

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java Mon Dec 15 03:07:45 2014
@@ -57,10 +57,10 @@ import org.apache.hadoop.hive.ql.lib.Rul
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.lib.TypeRule;
 import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck;
 import org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer;
 import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer;
 import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
+import org.apache.hadoop.hive.ql.optimizer.physical.SparkCrossProductCheck;
 import org.apache.hadoop.hive.ql.optimizer.physical.SparkMapJoinResolver;
 import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
 import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
@@ -294,7 +294,7 @@ public class SparkCompiler extends TaskC
     }
 
     if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
-      physicalCtx = new CrossProductCheck().resolve(physicalCtx);
+      physicalCtx = new SparkCrossProductCheck().resolve(physicalCtx);
     } else {
       LOG.debug("Skipping cross product analysis");
     }

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join0.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join0.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join0.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join0.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: explain 
 select sum(hash(a.k1,a.v1,a.k2, a.v2))
 from (
@@ -130,6 +131,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2))
 from (
 SELECT src1.key as k1, src1.value as v1, 

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join23.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join23.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join23.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join23.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: explain 
 SELECT  *  FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value
 PREHOOK: type: QUERY
@@ -89,6 +90,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: SELECT  *  FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_filters.q.out Mon Dec 15 03:07:45 2014
@@ -14,6 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1
@@ -23,6 +24,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@myinput1
 #### A masked pattern was here ####
 3078400
+Warning: Map Join MAPJOIN[15][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1
@@ -32,6 +34,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@myinput1
 #### A masked pattern was here ####
 4937935
+Warning: Map Join MAPJOIN[15][bigTable=b] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1
@@ -41,6 +44,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@myinput1
 #### A masked pattern was here ####
 3080335
+Warning: Shuffle Join JOIN[4][tables = [a, b]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1
@@ -296,6 +300,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@smb_input2
+Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1
@@ -305,6 +310,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@myinput1
 #### A masked pattern was here ####
 3078400
+Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1
@@ -314,6 +320,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@myinput1
 #### A masked pattern was here ####
 3078400
+Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1
@@ -323,6 +330,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@myinput1
 #### A masked pattern was here ####
 3078400
+Warning: Shuffle Join JOIN[10][tables = [a, b]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out Mon Dec 15 03:07:45 2014
@@ -14,6 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[11][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1
@@ -23,6 +24,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@myinput1
 #### A masked pattern was here ####
 13630578
+Warning: Map Join MAPJOIN[11][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1
@@ -32,6 +34,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@myinput1
 #### A masked pattern was here ####
 13630578
+Warning: Map Join MAPJOIN[11][bigTable=b] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out Mon Dec 15 03:07:45 2014
@@ -138,6 +138,7 @@ POSTHOOK: query: load data local inpath
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@bucket_medium@ds=2008-04-08
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
 PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
@@ -645,6 +646,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
 PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key
 PREHOOK: type: QUERY
 PREHOOK: Input: default@bucket_big

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_join.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_join.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_join.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_join.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Shuffle Join JOIN[4][tables = [src, src2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: -- current
 explain select src.key from src join src src2
 PREHOOK: type: QUERY
@@ -60,6 +61,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[4][tables = [src, src2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: -- ansi cross join
 explain select src.key from src cross join src src2
 PREHOOK: type: QUERY

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out?rev=1645554&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out Mon Dec 15 03:07:45 2014
@@ -0,0 +1,544 @@
+PREHOOK: query: create table A as
+select * from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@A
+POSTHOOK: query: create table A as
+select * from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@A
+PREHOOK: query: create table B as
+select * from src
+limit 10
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@B
+POSTHOOK: query: create table B as
+select * from src
+limit 10
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@B
+Warning: Shuffle Join JOIN[4][tables = [a, b]] in Work 'Reducer 2' is a cross product
+PREHOOK: query: explain select * from A join B
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from A join B
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: key (type: string), value (type: string)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: key (type: string), value (type: string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {VALUE._col0} {VALUE._col1}
+                  1 {VALUE._col0} {VALUE._col1}
+                outputColumnNames: _col0, _col1, _col5, _col6
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[10][tables = [d1, d2, a]] in Work 'Reducer 3' is a cross product
+PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3)
+        Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: d1
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: value (type: string)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: d2
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: value (type: string)
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: key (type: string), value (type: string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {KEY.reducesinkkey0} {VALUE._col0}
+                  1 {KEY.reducesinkkey0} {VALUE._col0}
+                outputColumnNames: _col0, _col1, _col5, _col6
+                Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+        Reducer 3 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {VALUE._col6}
+                  1 {VALUE._col0} {VALUE._col1}
+                outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[16][tables = [a, od1]] in Work 'Reducer 4' is a cross product
+PREHOOK: query: explain select * from A join 
+         (select d1.key 
+          from B d1 join B d2 on d1.key = d2.key 
+          where 1 = 1 group by d1.key) od1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from A join 
+         (select d1.key 
+          from B d1 join B d2 on d1.key = d2.key 
+          where 1 = 1 group by d1.key) od1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3)
+        Reducer 4 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1)
+        Reducer 3 <- Reducer 2 (GROUP, 3)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: d1
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: d2
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: key (type: string), value (type: string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {KEY.reducesinkkey0}
+                  1 
+                outputColumnNames: _col0
+                Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+        Reducer 4 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {VALUE._col0} {VALUE._col1}
+                  1 {VALUE._col0}
+                outputColumnNames: _col0, _col1, _col5
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[4][tables = [d1, d2]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[14][tables = [a, od1]] in Work 'Reducer 4' is a cross product
+PREHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1)
+        Reducer 4 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1)
+        Reducer 3 <- Reducer 2 (GROUP, 3)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: d1
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: key (type: string)
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: d2
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: key (type: string), value (type: string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {VALUE._col0}
+                  1 
+                outputColumnNames: _col0
+                Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+        Reducer 4 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {VALUE._col0} {VALUE._col1}
+                  1 {VALUE._col0}
+                outputColumnNames: _col0, _col1, _col5
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[21][tables = [ss, od1]] in Work 'Reducer 3' is a cross product
+PREHOOK: query: explain select * from 
+(select A.key from A group by key) ss join 
+(select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from 
+(select A.key from A group by key) ss join 
+(select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 3)
+        Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3)
+        Reducer 6 <- Reducer 5 (GROUP, 3)
+        Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: d1
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: d2
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+        Reducer 3 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {VALUE._col0}
+                  1 {VALUE._col0}
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {KEY.reducesinkkey0}
+                  1 
+                outputColumnNames: _col0
+                Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+        Reducer 6 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out?rev=1645554&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out Mon Dec 15 03:07:45 2014
@@ -0,0 +1,633 @@
+PREHOOK: query: create table A as
+select * from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@A
+POSTHOOK: query: create table A as
+select * from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@A
+PREHOOK: query: create table B as
+select * from src
+limit 10
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@B
+POSTHOOK: query: create table B as
+select * from src
+limit 10
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@B
+Warning: Map Join MAPJOIN[7][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain select * from A join B
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from A join B
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Spark HashTable Sink Operator
+                    condition expressions:
+                      0 {key} {value}
+                      1 {key} {value}
+                    keys:
+                      0 
+                      1 
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    condition expressions:
+                      0 {key} {value}
+                      1 {key} {value}
+                    keys:
+                      0 
+                      1 
+                    outputColumnNames: _col0, _col1, _col5, _col6
+                    input vertices:
+                      1 Map 2
+                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Map Join MAPJOIN[16][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-3 is a root stage
+  Stage-2 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-3
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: d2
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 {key} {value}
+                        1 {value}
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: d1
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      condition expressions:
+                        0 {key} {value}
+                        1 {key} {value}
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      outputColumnNames: _col0, _col1, _col5, _col6
+                      input vertices:
+                        1 Map 2
+                      Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                      Spark HashTable Sink Operator
+                        condition expressions:
+                          0 {_col0} {_col1} {_col5} {_col6}
+                          1 {key} {value}
+                        keys:
+                          0 
+                          1 
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    condition expressions:
+                      0 {_col0} {_col1} {_col5} {_col6}
+                      1 {key} {value}
+                    keys:
+                      0 
+                      1 
+                    outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11
+                    input vertices:
+                      0 Map 1
+                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                      Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Map Join MAPJOIN[22][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain select * from A join 
+         (select d1.key 
+          from B d1 join B d2 on d1.key = d2.key 
+          where 1 = 1 group by d1.key) od1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from A join 
+         (select d1.key 
+          from B d1 join B d2 on d1.key = d2.key 
+          where 1 = 1 group by d1.key) od1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-3 is a root stage
+  Stage-2 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-3
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: d2
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 {key}
+                        1 
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-2
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 3)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: d1
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      condition expressions:
+                        0 {key}
+                        1 
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      outputColumnNames: _col0
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+            Local Work:
+              Map Reduce Local Work
+        Reducer 2 
+            Local Work:
+              Map Reduce Local Work
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                Spark HashTable Sink Operator
+                  condition expressions:
+                    0 {key} {value}
+                    1 {_col0}
+                  keys:
+                    0 
+                    1 
+
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    condition expressions:
+                      0 {key} {value}
+                      1 {_col0}
+                    keys:
+                      0 
+                      1 
+                    outputColumnNames: _col0, _col1, _col5
+                    input vertices:
+                      1 Reducer 2
+                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Map Join MAPJOIN[18][bigTable=a] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[17][bigTable=d1] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-3 is a root stage
+  Stage-2 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-3
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: d2
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+                  Spark HashTable Sink Operator
+                    condition expressions:
+                      0 {key}
+                      1 
+                    keys:
+                      0 
+                      1 
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-2
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 3)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: d1
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    condition expressions:
+                      0 {key}
+                      1 
+                    keys:
+                      0 
+                      1 
+                    outputColumnNames: _col0
+                    input vertices:
+                      1 Map 3
+                    Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+            Local Work:
+              Map Reduce Local Work
+        Reducer 2 
+            Local Work:
+              Map Reduce Local Work
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+                Spark HashTable Sink Operator
+                  condition expressions:
+                    0 {key} {value}
+                    1 {_col0}
+                  keys:
+                    0 
+                    1 
+
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    condition expressions:
+                      0 {key} {value}
+                      1 {_col0}
+                    keys:
+                      0 
+                      1 
+                    outputColumnNames: _col0, _col1, _col5
+                    input vertices:
+                      1 Reducer 2
+                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain select * from 
+(select A.key from A group by key) ss join 
+(select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from 
+(select A.key from A group by key) ss join 
+(select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-3 is a root stage
+  Stage-2 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-3
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: d2
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 {key}
+                        1 
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-2
+    Spark
+      Edges:
+        Reducer 4 <- Map 3 (GROUP, 3)
+#### A masked pattern was here ####
+      Vertices:
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: d1
+                  Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      condition expressions:
+                        0 {key}
+                        1 
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      outputColumnNames: _col0
+                      input vertices:
+                        1 Map 5
+                      Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+            Local Work:
+              Map Reduce Local Work
+        Reducer 4 
+            Local Work:
+              Map Reduce Local Work
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+                Spark HashTable Sink Operator
+                  condition expressions:
+                    0 {_col0}
+                    1 {_col0}
+                  keys:
+                    0 
+                    1 
+
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 3)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Local Work:
+              Map Reduce Local Work
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  condition expressions:
+                    0 {_col0}
+                    1 {_col0}
+                  keys:
+                    0 
+                    1 
+                  outputColumnNames: _col0, _col1
+                  input vertices:
+                    1 Reducer 4
+                  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join0.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join0.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join0.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join0.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: EXPLAIN
 SELECT src1.key as k1, src1.value as v1, 
        src2.key as k2, src2.value as v2 FROM 
@@ -92,6 +93,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: EXPLAIN FORMATTED
 SELECT src1.key as k1, src1.value as v1, 
        src2.key as k2, src2.value as v2 FROM 
@@ -109,6 +111,7 @@ SELECT src1.key as k1, src1.value as v1,
   SORT BY k1, v1, k2, v2
 POSTHOOK: type: QUERY
 #### A masked pattern was here ####
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: SELECT src1.key as k1, src1.value as v1, 
        src2.key as k2, src2.value as v2 FROM 
   (SELECT * FROM src WHERE src.key < 10) src1 

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join23.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join23.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join23.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join23.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Shuffle Join JOIN[4][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: EXPLAIN
 SELECT *  FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value
 PREHOOK: type: QUERY
@@ -78,6 +79,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[4][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: SELECT *  FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Shuffle Join JOIN[4][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: explain select p1.p_name, p2.p_name
 from part p1 , part p2
 PREHOOK: type: QUERY
@@ -247,6 +248,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: explain select p1.p_name, p2.p_name, p3.p_name
 from part p1 , part p2 , part p3 
 where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out Mon Dec 15 03:07:45 2014
@@ -176,6 +176,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: explain select *
 from part p1 join part p2 join part p3 on p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name
 PREHOOK: type: QUERY
@@ -276,6 +277,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: explain select *
 from part p1 join part p2 join part p3 on p2.p_partkey = 1 and p3.p_name = p2.p_name
 PREHOOK: type: QUERY

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out Mon Dec 15 03:07:45 2014
@@ -186,6 +186,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: explain select *
 from part p1 join part p2 join part p3 
 where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name
@@ -291,6 +292,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: explain select *
 from part p1 join part p2 join part p3 
 where p2.p_partkey = 1 and p3.p_name = p2.p_name

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out Mon Dec 15 03:07:45 2014
@@ -232,6 +232,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: explain select *
 from part p1 join part2 p2 join part3 p3 on p2_partkey + p_partkey = p1.p_partkey and p3_name = p2_name
 PREHOOK: type: QUERY
@@ -332,6 +333,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: explain select *
 from part p1 join part2 p2 join part3 p3 on p2_partkey = 1 and p3_name = p2_name
 PREHOOK: type: QUERY

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out Mon Dec 15 03:07:45 2014
@@ -242,6 +242,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: explain select *
 from part p1 join part2 p2 join part3 p3 
 where p2_partkey + p1.p_partkey = p1.p_partkey and p3_name = p2_name
@@ -347,6 +348,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[5][tables = [p1, p2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: explain select *
 from part p1 join part2 p2 join part3 p3 
 where p2_partkey = 1 and p3_name = p2_name

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out Mon Dec 15 03:07:45 2014
@@ -1026,6 +1026,7 @@ POSTHOOK: Input: default@srcpart@ds=2008
 #### A masked pattern was here ####
 0
 2000
+Warning: Shuffle Join JOIN[11][tables = [a, b]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: explain extended
 select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b
 PREHOOK: type: QUERY
@@ -1434,6 +1435,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[11][tables = [a, b]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel_join0.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel_join0.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel_join0.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/parallel_join0.q.out Mon Dec 15 03:07:45 2014
@@ -1,3 +1,4 @@
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: -- SORT_BEFORE_DIFF
 
 EXPLAIN
@@ -96,6 +97,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: EXPLAIN FORMATTED
 SELECT src1.key as k1, src1.value as v1, 
        src2.key as k2, src2.value as v2 FROM 
@@ -113,6 +115,7 @@ SELECT src1.key as k1, src1.value as v1,
   SORT BY k1, v1, k2, v2
 POSTHOOK: type: QUERY
 #### A masked pattern was here ####
+Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: SELECT src1.key as k1, src1.value as v1, 
        src2.key as k2, src2.value as v2 FROM 
   (SELECT * FROM src WHERE src.key < 10) src1 

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join5.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join5.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join5.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/ppd_join5.q.out Mon Dec 15 03:07:45 2014
@@ -32,6 +32,7 @@ POSTHOOK: Lineage: t1.id1 SIMPLE []
 POSTHOOK: Lineage: t1.id2 SIMPLE []
 POSTHOOK: Lineage: t2.d SIMPLE []
 POSTHOOK: Lineage: t2.id SIMPLE []
+Warning: Shuffle Join JOIN[10][tables = [a, b, c]] in Work 'Reducer 3' is a cross product
 PREHOOK: query: explain
 select a.*,b.d d1,c.d d2 from
   t1 a join t2 b on (a.id1 = b.id)
@@ -135,6 +136,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[10][tables = [a, b, c]] in Work 'Reducer 3' is a cross product
 PREHOOK: query: explain
 select * from (
 select a.*,b.d d1,c.d d2 from
@@ -245,6 +247,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+Warning: Shuffle Join JOIN[10][tables = [a, b, c]] in Work 'Reducer 3' is a cross product
 PREHOOK: query: select * from (
 select a.*,b.d d1,c.d d2 from
   t1 a join t2 b on (a.id1 = b.id)

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out?rev=1645554&r1=1645553&r2=1645554&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out Mon Dec 15 03:07:45 2014
@@ -32,6 +32,7 @@ POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@src_5
 RUN: Stage-0:DDL
+Warning: Shuffle Join JOIN[31][tables = [sq_2_notin_nullcheck]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: explain
 from src b 
 INSERT OVERWRITE TABLE src_4 
@@ -274,6 +275,7 @@ STAGE PLANS:
   Stage: Stage-4
     Stats-Aggr Operator
 
+Warning: Shuffle Join JOIN[31][tables = [sq_2_notin_nullcheck]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: from src b 
 INSERT OVERWRITE TABLE src_4 
   select * 
@@ -461,6 +463,7 @@ POSTHOOK: Input: default@src_5
 199	val_199
 199	val_199
 2	val_2
+Warning: Map Join MAPJOIN[45][bigTable=b] in task 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: explain
 from src b 
 INSERT OVERWRITE TABLE src_4 
@@ -712,6 +715,7 @@ STAGE PLANS:
   Stage: Stage-4
     Stats-Aggr Operator
 
+Warning: Map Join MAPJOIN[45][bigTable=b] in task 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: from src b 
 INSERT OVERWRITE TABLE src_4 
   select *