You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/12/13 18:44:42 UTC
svn commit: r1645338 [1/9] - in /hive/branches/spark: data/conf/spark/ itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/test/results/clientpositive/spark/

Author: xuefu
Date: Sat Dec 13 17:44:41 2014
New Revision: 1645338

URL: http://svn.apache.org/r1645338
Log:
HIVE-8911: Enable mapjoin hints [Spark Branch] (Chao via Xuefu)

Added:
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkMapJoinProcessor.java
Modified:
    hive/branches/spark/data/conf/spark/hive-site.xml
    hive/branches/spark/itests/src/test/resources/testconfiguration.properties
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join25.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join26.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join27.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join30.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join36.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join37.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join38.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join39.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join40.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/mapjoin1.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/semijoin.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoin.q.out

Modified: hive/branches/spark/data/conf/spark/hive-site.xml
URL: http://svn.apache.org/viewvc/hive/branches/spark/data/conf/spark/hive-site.xml?rev=1645338&r1=1645337&r2=1645338&view=diff
==============================================================================
--- hive/branches/spark/data/conf/spark/hive-site.xml (original)
+++ hive/branches/spark/data/conf/spark/hive-site.xml Sat Dec 13 17:44:41 2014
@@ -162,7 +162,7 @@
 
 <property>
   <name>hive.ignore.mapjoin.hint</name>
-  <value>true</value>
+  <value>false</value>
   <description>Whether Hive ignores the mapjoin hint</description>
 </property>
 

Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1645338&r1=1645337&r2=1645338&view=diff
==============================================================================
--- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Sat Dec 13 17:44:41 2014
@@ -506,7 +506,6 @@ spark.query.files=add_part_multiple.q, \
   auto_sortmerge_join_8.q, \
   auto_sortmerge_join_9.q, \
   auto_sortmerge_join_10.q, \
-  auto_sortmerge_join_11.q, \
   auto_sortmerge_join_12.q, \
   auto_sortmerge_join_13.q, \
   auto_sortmerge_join_14.q, \
@@ -524,7 +523,6 @@ spark.query.files=add_part_multiple.q, \
   bucketmapjoin3.q, \
   bucketmapjoin4.q, \
   bucketmapjoin5.q, \
-  bucketmapjoin6.q, \
   bucketmapjoin7.q, \
   bucketmapjoin8.q, \
   bucketmapjoin9.q, \
@@ -671,13 +669,11 @@ spark.query.files=add_part_multiple.q, \
   join_cond_pushdown_unqual3.q, \
   join_cond_pushdown_unqual4.q, \
   join_empty.q \
-  join_filters.q, \
   join_filters_overlap.q, \
   join_hive_626.q, \
   join_map_ppr.q, \
   join_merge_multi_expressions.q, \
   join_merging.q, \
-  join_nulls.q, \
   join_rc.q, \
   join_reorder.q, \
   join_reorder2.q, \
@@ -808,21 +804,6 @@ spark.query.files=add_part_multiple.q, \
   skewjoin_noskew.q, \
   skewjoin_union_remove_1.q, \
   skewjoin_union_remove_2.q, \
-  smb_mapjoin9.q, \
-  smb_mapjoin_1.q, \
-  smb_mapjoin_2.q, \
-  smb_mapjoin_3.q, \
-  smb_mapjoin_4.q, \
-  smb_mapjoin_5.q, \
-  smb_mapjoin_6.q, \
-  smb_mapjoin_7.q, \
-  smb_mapjoin_8.q, \
-  smb_mapjoin_10.q, \
-  smb_mapjoin_13.q, \
-  smb_mapjoin_14.q, \
-  smb_mapjoin_15.q, \
-  smb_mapjoin_16.q, \
-  smb_mapjoin_17.q, \
   smb_mapjoin_18.q, \
   smb_mapjoin_19.q, \
   smb_mapjoin_20.q, \
@@ -830,14 +811,6 @@ spark.query.files=add_part_multiple.q, \
   smb_mapjoin_22.q, \
   smb_mapjoin_25.q, \
   sort.q, \
-  sort_merge_join_desc_1.q, \
-  sort_merge_join_desc_2.q, \
-  sort_merge_join_desc_3.q, \
-  sort_merge_join_desc_4.q, \
-  sort_merge_join_desc_5.q, \
-  sort_merge_join_desc_6.q, \
-  sort_merge_join_desc_7.q, \
-  sort_merge_join_desc_8.q, \
   spark_test.q, \
   stats_counter.q, \
   stats_counter_partitioned.q, \
@@ -951,7 +924,6 @@ spark.query.files=add_part_multiple.q, \
   vectorization_part_project.q, \
   vectorization_pushdown.q, \
   vectorization_short_regress.q, \
-  vectorized_bucketmapjoin1.q, \
   vectorized_case.q, \
   vectorized_mapjoin.q, \
   vectorized_math_funcs.q, \

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java?rev=1645338&r1=1645337&r2=1645338&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java Sat Dec 13 17:44:41 2014
@@ -521,7 +521,7 @@ abstract public class AbstractSMBJoinPro
     JoinOperator joinOp,
     SortBucketJoinProcCtx joinContext,
     ParseContext parseContext) throws SemanticException {
-    MapJoinOperator mapJoinOp = MapJoinProcessor.convertMapJoin(
+    MapJoinOperator mapJoinOp = new MapJoinProcessor().convertMapJoin(
       parseContext.getConf(),
       parseContext.getOpParseCtx(),
       joinOp,

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java?rev=1645338&r1=1645337&r2=1645338&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java Sat Dec 13 17:44:41 2014
@@ -233,7 +233,7 @@ public class MapJoinProcessor implements
         newWork.getMapWork().getOpParseCtxMap();
     QBJoinTree newJoinTree = newWork.getMapWork().getJoinTree();
     // generate the map join operator; already checked the map join
-    MapJoinOperator newMapJoinOp = MapJoinProcessor.convertMapJoin(conf, opParseCtxMap, op,
+    MapJoinOperator newMapJoinOp = new MapJoinProcessor().convertMapJoin(conf, opParseCtxMap, op,
         newJoinTree, mapJoinPos, true, false);
     genLocalWorkForMapJoin(newWork, newMapJoinOp, mapJoinPos);
   }
@@ -302,8 +302,9 @@ public class MapJoinProcessor implements
    *          position of the source to be read as part of map-reduce framework. All other sources
    *          are cached in memory
    * @param noCheckOuterJoin
+   * @param validateMapJoinTree
    */
-  public static MapJoinOperator convertMapJoin(HiveConf conf,
+  public MapJoinOperator convertMapJoin(HiveConf conf,
     LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtxMap,
     JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin,
     boolean validateMapJoinTree)
@@ -598,7 +599,7 @@ public class MapJoinProcessor implements
     return mapJoinPos;
   }
 
-  private void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException {
+  protected void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException {
     List<Operator<? extends OperatorDesc>> childOps = input.getChildOperators();
     input.setChildOperators(null);
 

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java?rev=1645338&r1=1645337&r2=1645338&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java Sat Dec 13 17:44:41 2014
@@ -101,7 +101,9 @@ public class Optimizer {
       transformations.add(new RewriteGBUsingIndex());
     }
     transformations.add(new SamplePruner());
-    transformations.add(new MapJoinProcessor());
+
+    MapJoinProcessor mapJoinProcessor = isSparkExecEngine ? new SparkMapJoinProcessor() : new MapJoinProcessor();
+    transformations.add(mapJoinProcessor);
 
     if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN)) && !isTezExecEngine && !isSparkExecEngine) {
       transformations.add(new BucketMapJoinOptimizer());

Added: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkMapJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkMapJoinProcessor.java?rev=1645338&view=auto
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkMapJoinProcessor.java (added)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkMapJoinProcessor.java Sat Dec 13 17:44:41 2014
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.LinkedHashMap;
+import java.util.List;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.parse.OpParseContext;
+import org.apache.hadoop.hive.ql.parse.QBJoinTree;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+public class SparkMapJoinProcessor extends MapJoinProcessor {
+
+  /**
+   * convert a regular join to a a map-side join.
+   *
+   * @param conf
+   * @param opParseCtxMap
+   * @param op join operator
+   * @param joinTree qb join tree
+   * @param bigTablePos position of the source to be read as part of
+   *                   map-reduce framework. All other sources are cached in memory
+   * @param noCheckOuterJoin
+   * @param validateMapJoinTree
+   */
+  @Override
+  public MapJoinOperator convertMapJoin(HiveConf conf,
+                                        LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtxMap,
+                                        JoinOperator op, QBJoinTree joinTree, int bigTablePos,
+                                        boolean noCheckOuterJoin,
+                                        boolean validateMapJoinTree) throws SemanticException {
+
+    // outer join cannot be performed on a table which is being cached
+    JoinCondDesc[] condns = op.getConf().getConds();
+
+    if (!noCheckOuterJoin) {
+      if (checkMapJoin(bigTablePos, condns) < 0) {
+        throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
+      }
+    }
+
+    // create the map-join operator
+    MapJoinOperator mapJoinOp = convertJoinOpMapJoinOp(conf, opParseCtxMap,
+        op, joinTree, bigTablePos, noCheckOuterJoin);
+
+    // 1. remove RS as parent for the big table branch
+    // 2. remove old join op from child set of all the RSs
+    List<Operator<? extends OperatorDesc>> parentOps = mapJoinOp.getParentOperators();
+    for (int i = 0; i < parentOps.size(); i++) {
+      Operator<? extends OperatorDesc> parentOp = parentOps.get(i);
+      parentOp.getChildOperators().remove(op);
+      if (i == bigTablePos) {
+        List<Operator<? extends OperatorDesc>> grandParentOps = parentOp.getParentOperators();
+        Preconditions.checkArgument(grandParentOps.size() == 1,
+            "AssertionError: expect number of parents to be 1, but was " + grandParentOps.size());
+        Operator<? extends OperatorDesc> grandParentOp = grandParentOps.get(0);
+        grandParentOp.replaceChild(parentOp, mapJoinOp);
+        mapJoinOp.replaceParent(parentOp, grandParentOp);
+      }
+    }
+
+    return mapJoinOp;
+  }
+}

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out?rev=1645338&r1=1645337&r2=1645338&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out Sat Dec 13 17:44:41 2014
@@ -104,59 +104,60 @@ TOK_QUERY
 
 
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-2
     Spark
-      Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
-        Reducer 3 <- Reducer 2 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 3 
             Map Operator Tree:
                 TableScan
-                  alias: a
-                  Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE
+                  alias: b
+                  Statistics: Num rows: 0 Data size: 21 Basic stats: PARTIAL Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: (key is not null and value is not null) (type: boolean)
                     Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: string), value (type: string)
-                      sort order: ++
-                      Map-reduce partition columns: key (type: string), value (type: string)
-                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                      tag: 0
-                      auto parallelism: false
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 
+                        1 
+                      keys:
+                        0 key (type: string), value (type: string)
+                        1 key (type: string), value (type: string)
+                      Position of Big Table: 0
+            Local Work:
+              Map Reduce Local Work
             Path -> Alias:
 #### A masked pattern was here ####
             Path -> Partition:
 #### A masked pattern was here ####
                 Partition
-                  base file name: table1
+                  base file name: table2
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
                     COLUMN_STATS_ACCURATE true
                     SORTBUCKETCOLSPREFIX TRUE
                     bucket_count 1
-                    bucket_field_name key
+                    bucket_field_name value
                     columns key,value
                     columns.comments 
                     columns.types string:string
 #### A masked pattern was here ####
-                    name default.table1
+                    name default.table2
                     numFiles 1
                     numRows 0
                     rawDataSize 0
-                    serialization.ddl struct table1 { string key, string value}
+                    serialization.ddl struct table2 { string key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 20
+                    totalSize 21
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 
@@ -166,67 +167,93 @@ STAGE PLANS:
                       COLUMN_STATS_ACCURATE true
                       SORTBUCKETCOLSPREFIX TRUE
                       bucket_count 1
-                      bucket_field_name key
+                      bucket_field_name value
                       columns key,value
                       columns.comments 
                       columns.types string:string
 #### A masked pattern was here ####
-                      name default.table1
+                      name default.table2
                       numFiles 1
                       numRows 0
                       rawDataSize 0
-                      serialization.ddl struct table1 { string key, string value}
+                      serialization.ddl struct table2 { string key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                      totalSize 20
+                      totalSize 21
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.table1
-                  name: default.table1
+                    name: default.table2
+                  name: default.table2
             Truncated Path -> Alias:
-              /table1 [a]
-        Map 4 
+              /table2 [b]
+
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: b
-                  Statistics: Num rows: 0 Data size: 21 Basic stats: PARTIAL Column stats: NONE
+                  alias: a
+                  Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: (key is not null and value is not null) (type: boolean)
                     Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: string), value (type: string)
-                      sort order: ++
-                      Map-reduce partition columns: key (type: string), value (type: string)
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      condition expressions:
+                        0 
+                        1 
+                      keys:
+                        0 key (type: string), value (type: string)
+                        1 key (type: string), value (type: string)
+                      input vertices:
+                        1 Map 3
+                      Position of Big Table: 0
                       Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                      tag: 1
-                      auto parallelism: false
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          tag: -1
+                          value expressions: _col0 (type: bigint)
+                          auto parallelism: false
+            Local Work:
+              Map Reduce Local Work
             Path -> Alias:
 #### A masked pattern was here ####
             Path -> Partition:
 #### A masked pattern was here ####
                 Partition
-                  base file name: table2
+                  base file name: table1
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
                     COLUMN_STATS_ACCURATE true
                     SORTBUCKETCOLSPREFIX TRUE
                     bucket_count 1
-                    bucket_field_name value
+                    bucket_field_name key
                     columns key,value
                     columns.comments 
                     columns.types string:string
 #### A masked pattern was here ####
-                    name default.table2
+                    name default.table1
                     numFiles 1
                     numRows 0
                     rawDataSize 0
-                    serialization.ddl struct table2 { string key, string value}
+                    serialization.ddl struct table1 { string key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 21
+                    totalSize 20
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 
@@ -236,47 +263,26 @@ STAGE PLANS:
                       COLUMN_STATS_ACCURATE true
                       SORTBUCKETCOLSPREFIX TRUE
                       bucket_count 1
-                      bucket_field_name value
+                      bucket_field_name key
                       columns key,value
                       columns.comments 
                       columns.types string:string
 #### A masked pattern was here ####
-                      name default.table2
+                      name default.table1
                       numFiles 1
                       numRows 0
                       rawDataSize 0
-                      serialization.ddl struct table2 { string key, string value}
+                      serialization.ddl struct table1 { string key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                      totalSize 21
+                      totalSize 20
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.table2
-                  name: default.table2
+                    name: default.table1
+                  name: default.table1
             Truncated Path -> Alias:
-              /table2 [b]
+              /table1 [a]
         Reducer 2 
-            Needs Tagging: true
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                condition expressions:
-                  0 
-                  1 
-                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                Group By Operator
-                  aggregations: count()
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    sort order: 
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                    tag: -1
-                    value expressions: _col0 (type: bigint)
-                    auto parallelism: false
-        Reducer 3 
             Needs Tagging: false
             Reduce Operator Tree:
               Group By Operator

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out?rev=1645338&r1=1645337&r2=1645338&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out Sat Dec 13 17:44:41 2014
@@ -104,59 +104,60 @@ TOK_QUERY
 
 
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-2
     Spark
-      Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
-        Reducer 3 <- Reducer 2 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 3 
             Map Operator Tree:
                 TableScan
-                  alias: a
-                  Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE
+                  alias: b
+                  Statistics: Num rows: 0 Data size: 21 Basic stats: PARTIAL Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: (key is not null and value is not null) (type: boolean)
                     Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: string), value (type: string)
-                      sort order: ++
-                      Map-reduce partition columns: key (type: string), value (type: string)
-                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                      tag: 0
-                      auto parallelism: false
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 
+                        1 
+                      keys:
+                        0 key (type: string), value (type: string)
+                        1 key (type: string), value (type: string)
+                      Position of Big Table: 0
+            Local Work:
+              Map Reduce Local Work
             Path -> Alias:
 #### A masked pattern was here ####
             Path -> Partition:
 #### A masked pattern was here ####
                 Partition
-                  base file name: table1
+                  base file name: table2
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
                     COLUMN_STATS_ACCURATE true
                     SORTBUCKETCOLSPREFIX TRUE
                     bucket_count 1
-                    bucket_field_name key
+                    bucket_field_name value
                     columns key,value
                     columns.comments 
                     columns.types string:string
 #### A masked pattern was here ####
-                    name default.table1
+                    name default.table2
                     numFiles 1
                     numRows 0
                     rawDataSize 0
-                    serialization.ddl struct table1 { string key, string value}
+                    serialization.ddl struct table2 { string key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 20
+                    totalSize 21
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 
@@ -166,67 +167,93 @@ STAGE PLANS:
                       COLUMN_STATS_ACCURATE true
                       SORTBUCKETCOLSPREFIX TRUE
                       bucket_count 1
-                      bucket_field_name key
+                      bucket_field_name value
                       columns key,value
                       columns.comments 
                       columns.types string:string
 #### A masked pattern was here ####
-                      name default.table1
+                      name default.table2
                       numFiles 1
                       numRows 0
                       rawDataSize 0
-                      serialization.ddl struct table1 { string key, string value}
+                      serialization.ddl struct table2 { string key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                      totalSize 20
+                      totalSize 21
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.table1
-                  name: default.table1
+                    name: default.table2
+                  name: default.table2
             Truncated Path -> Alias:
-              /table1 [a]
-        Map 4 
+              /table2 [b]
+
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: b
-                  Statistics: Num rows: 0 Data size: 21 Basic stats: PARTIAL Column stats: NONE
+                  alias: a
+                  Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: (key is not null and value is not null) (type: boolean)
                     Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: string), value (type: string)
-                      sort order: ++
-                      Map-reduce partition columns: key (type: string), value (type: string)
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      condition expressions:
+                        0 
+                        1 
+                      keys:
+                        0 key (type: string), value (type: string)
+                        1 key (type: string), value (type: string)
+                      input vertices:
+                        1 Map 3
+                      Position of Big Table: 0
                       Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                      tag: 1
-                      auto parallelism: false
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          tag: -1
+                          value expressions: _col0 (type: bigint)
+                          auto parallelism: false
+            Local Work:
+              Map Reduce Local Work
             Path -> Alias:
 #### A masked pattern was here ####
             Path -> Partition:
 #### A masked pattern was here ####
                 Partition
-                  base file name: table2
+                  base file name: table1
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
                     COLUMN_STATS_ACCURATE true
                     SORTBUCKETCOLSPREFIX TRUE
                     bucket_count 1
-                    bucket_field_name value
+                    bucket_field_name key
                     columns key,value
                     columns.comments 
                     columns.types string:string
 #### A masked pattern was here ####
-                    name default.table2
+                    name default.table1
                     numFiles 1
                     numRows 0
                     rawDataSize 0
-                    serialization.ddl struct table2 { string key, string value}
+                    serialization.ddl struct table1 { string key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 21
+                    totalSize 20
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 
@@ -236,47 +263,26 @@ STAGE PLANS:
                       COLUMN_STATS_ACCURATE true
                       SORTBUCKETCOLSPREFIX TRUE
                       bucket_count 1
-                      bucket_field_name value
+                      bucket_field_name key
                       columns key,value
                       columns.comments 
                       columns.types string:string
 #### A masked pattern was here ####
-                      name default.table2
+                      name default.table1
                       numFiles 1
                       numRows 0
                       rawDataSize 0
-                      serialization.ddl struct table2 { string key, string value}
+                      serialization.ddl struct table1 { string key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                      totalSize 21
+                      totalSize 20
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.table2
-                  name: default.table2
+                    name: default.table1
+                  name: default.table1
             Truncated Path -> Alias:
-              /table2 [b]
+              /table1 [a]
         Reducer 2 
-            Needs Tagging: true
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                condition expressions:
-                  0 
-                  1 
-                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                Group By Operator
-                  aggregations: count()
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    sort order: 
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                    tag: -1
-                    value expressions: _col0 (type: bigint)
-                    auto parallelism: false
-        Reducer 3 
             Needs Tagging: false
             Reduce Operator Tree:
               Group By Operator

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out?rev=1645338&r1=1645337&r2=1645338&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out Sat Dec 13 17:44:41 2014
@@ -91,54 +91,26 @@ TOK_QUERY
 
 
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 2 
+            Local Work:
+              Map Reduce Local Work
+
   Stage: Stage-1
     Spark
-      Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
-        Map 3 
-        Reducer 2 
-            Needs Tagging: true
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                condition expressions:
-                  0 {KEY.reducesinkkey0} {VALUE._col0}
-                  1 {VALUE._col0}
-                outputColumnNames: _col0, _col1, _col7
-                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 0
-#### A masked pattern was here ####
-                    NumFilesPerFileSink: 1
-                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-#### A masked pattern was here ####
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        properties:
-                          columns _col0,_col1,_col2
-                          columns.types int:string:string
-                          escape.delim \
-                          hive.serialization.extend.nesting.levels true
-                          serialization.format 1
-                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    TotalFiles: 1
-                    GatherStats: false
-                    MultiFileSpray: false
+            Local Work:
+              Map Reduce Local Work
 
   Stage: Stage-0
     Fetch Operator
@@ -227,54 +199,26 @@ TOK_QUERY
 
 
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-2
     Spark
-      Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
-        Map 3 
-        Reducer 2 
-            Needs Tagging: true
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                condition expressions:
-                  0 {KEY.reducesinkkey0} {VALUE._col0}
-                  1 {VALUE._col0}
-                outputColumnNames: _col0, _col1, _col7
-                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 0
-#### A masked pattern was here ####
-                    NumFilesPerFileSink: 1
-                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-#### A masked pattern was here ####
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        properties:
-                          columns _col0,_col1,_col2
-                          columns.types int:string:string
-                          escape.delim \
-                          hive.serialization.extend.nesting.levels true
-                          serialization.format 1
-                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    TotalFiles: 1
-                    GatherStats: false
-                    MultiFileSpray: false
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 2 
+            Local Work:
+              Map Reduce Local Work
 
   Stage: Stage-0
     Fetch Operator
@@ -456,193 +400,196 @@ TOK_QUERY
 
 
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-3 is a root stage
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
   Stage-2 depends on stages: Stage-0
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-3
     Spark
-      Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 2 
             Map Operator Tree:
                 TableScan
-                  alias: a
-                  Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
+                  alias: b
+                  Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: int)
-                      sort order: +
-                      Map-reduce partition columns: key (type: int)
-                      Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
-                      tag: 0
-                      value expressions: value (type: string)
-                      auto parallelism: false
+                    Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 {key} {value}
+                        1 {value}
+                      keys:
+                        0 key (type: int)
+                        1 key (type: int)
+                      Position of Big Table: 0
+            Local Work:
+              Map Reduce Local Work
             Path -> Alias:
 #### A masked pattern was here ####
             Path -> Partition:
 #### A masked pattern was here ####
                 Partition
-                  base file name: srcbucket_mapjoin
+                  base file name: ds=2008-04-08
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  partition values:
+                    ds 2008-04-08
                   properties:
                     COLUMN_STATS_ACCURATE true
-                    bucket_count 2
+                    bucket_count 4
                     bucket_field_name key
                     columns key,value
                     columns.comments 
                     columns.types int:string
 #### A masked pattern was here ####
-                    name default.srcbucket_mapjoin
-                    numFiles 2
-                    serialization.ddl struct srcbucket_mapjoin { i32 key, string value}
+                    name default.srcbucket_mapjoin_part
+                    numFiles 4
+                    numRows 0
+                    partition_columns ds
+                    partition_columns.types string
+                    rawDataSize 0
+                    serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 2750
+                    totalSize 5812
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     properties:
-                      COLUMN_STATS_ACCURATE true
-                      bucket_count 2
+                      bucket_count 4
                       bucket_field_name key
                       columns key,value
                       columns.comments 
                       columns.types int:string
 #### A masked pattern was here ####
-                      name default.srcbucket_mapjoin
-                      numFiles 2
-                      serialization.ddl struct srcbucket_mapjoin { i32 key, string value}
+                      name default.srcbucket_mapjoin_part
+                      partition_columns ds
+                      partition_columns.types string
+                      serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                      totalSize 2750
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.srcbucket_mapjoin
-                  name: default.srcbucket_mapjoin
+                    name: default.srcbucket_mapjoin_part
+                  name: default.srcbucket_mapjoin_part
             Truncated Path -> Alias:
-              /srcbucket_mapjoin [a]
-        Map 3 
+              /srcbucket_mapjoin_part/ds=2008-04-08 [b]
+
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: b
-                  Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+                  alias: a
+                  Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: int)
-                      sort order: +
-                      Map-reduce partition columns: key (type: int)
-                      Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
-                      tag: 1
-                      value expressions: value (type: string)
-                      auto parallelism: false
+                    Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      condition expressions:
+                        0 {key} {value}
+                        1 {value}
+                      keys:
+                        0 key (type: int)
+                        1 key (type: int)
+                      outputColumnNames: _col0, _col1, _col6
+                      input vertices:
+                        1 Map 2
+                      Position of Big Table: 0
+                      Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
+                        outputColumnNames: _col0, _col1, _col2
+                        Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          GlobalTableId: 1
+#### A masked pattern was here ####
+                          NumFilesPerFileSink: 1
+                          Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                          table:
+                              input format: org.apache.hadoop.mapred.TextInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                              properties:
+                                bucket_count -1
+                                columns key,value1,value2
+                                columns.comments 
+                                columns.types string:string:string
+#### A masked pattern was here ####
+                                name default.bucketmapjoin_tmp_result
+                                serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+                                serialization.format 1
+                                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                              name: default.bucketmapjoin_tmp_result
+                          TotalFiles: 1
+                          GatherStats: true
+                          MultiFileSpray: false
+            Local Work:
+              Map Reduce Local Work
             Path -> Alias:
 #### A masked pattern was here ####
             Path -> Partition:
 #### A masked pattern was here ####
                 Partition
-                  base file name: ds=2008-04-08
+                  base file name: srcbucket_mapjoin
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  partition values:
-                    ds 2008-04-08
                   properties:
                     COLUMN_STATS_ACCURATE true
-                    bucket_count 4
+                    bucket_count 2
                     bucket_field_name key
                     columns key,value
                     columns.comments 
                     columns.types int:string
 #### A masked pattern was here ####
-                    name default.srcbucket_mapjoin_part
-                    numFiles 4
-                    numRows 0
-                    partition_columns ds
-                    partition_columns.types string
-                    rawDataSize 0
-                    serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
+                    name default.srcbucket_mapjoin
+                    numFiles 2
+                    serialization.ddl struct srcbucket_mapjoin { i32 key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 5812
+                    totalSize 2750
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     properties:
-                      bucket_count 4
+                      COLUMN_STATS_ACCURATE true
+                      bucket_count 2
                       bucket_field_name key
                       columns key,value
                       columns.comments 
                       columns.types int:string
 #### A masked pattern was here ####
-                      name default.srcbucket_mapjoin_part
-                      partition_columns ds
-                      partition_columns.types string
-                      serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value}
+                      name default.srcbucket_mapjoin
+                      numFiles 2
+                      serialization.ddl struct srcbucket_mapjoin { i32 key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      totalSize 2750
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.srcbucket_mapjoin_part
-                  name: default.srcbucket_mapjoin_part
+                    name: default.srcbucket_mapjoin
+                  name: default.srcbucket_mapjoin
             Truncated Path -> Alias:
-              /srcbucket_mapjoin_part/ds=2008-04-08 [b]
-        Reducer 2 
-            Needs Tagging: true
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                condition expressions:
-                  0 {KEY.reducesinkkey0} {VALUE._col0}
-                  1 {VALUE._col0}
-                outputColumnNames: _col0, _col1, _col6
-                Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 1
-#### A masked pattern was here ####
-                    NumFilesPerFileSink: 1
-                    Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        properties:
-                          bucket_count -1
-                          columns key,value1,value2
-                          columns.comments 
-                          columns.types string:string:string
-#### A masked pattern was here ####
-                          name default.bucketmapjoin_tmp_result
-                          serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
-                          serialization.format 1
-                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.bucketmapjoin_tmp_result
-                    TotalFiles: 1
-                    GatherStats: true
-                    MultiFileSpray: false
+              /srcbucket_mapjoin [a]
 
   Stage: Stage-0
     Move Operator
@@ -841,15 +788,14 @@ TOK_QUERY
 
 
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-3 is a root stage
+  Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
   Stage-2 depends on stages: Stage-0
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-3
     Spark
-      Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -862,14 +808,16 @@ STAGE PLANS:
                     isSamplingPred: false
                     predicate: key is not null (type: boolean)
                     Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: int)
-                      sort order: +
-                      Map-reduce partition columns: key (type: int)
-                      Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE
-                      tag: 0
-                      value expressions: value (type: string)
-                      auto parallelism: false
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 {value}
+                        1 {value}
+                      keys:
+                        0 key (type: int)
+                        1 key (type: int)
+                      Position of Big Table: 1
+            Local Work:
+              Map Reduce Local Work
             Path -> Alias:
 #### A masked pattern was here ####
             Path -> Partition:
@@ -917,7 +865,12 @@ STAGE PLANS:
                   name: default.srcbucket_mapjoin
             Truncated Path -> Alias:
               /srcbucket_mapjoin [a]
-        Map 3 
+
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 2 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -927,14 +880,57 @@ STAGE PLANS:
                     isSamplingPred: false
                     predicate: key is not null (type: boolean)
                     Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: int)
-                      sort order: +
-                      Map-reduce partition columns: key (type: int)
-                      Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
-                      tag: 1
-                      value expressions: value (type: string)
-                      auto parallelism: false
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      condition expressions:
+                        0 {key} {value}
+                        1 {value}
+                      keys:
+                        0 key (type: int)
+                        1 key (type: int)
+                      outputColumnNames: _col0, _col1, _col6
+                      input vertices:
+                        0 Map 1
+                      Position of Big Table: 1
+                      Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
+                        outputColumnNames: _col0, _col1, _col2
+                        Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          GlobalTableId: 1
+#### A masked pattern was here ####
+                          NumFilesPerFileSink: 1
+                          Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                          table:
+                              input format: org.apache.hadoop.mapred.TextInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                              properties:
+                                COLUMN_STATS_ACCURATE true
+                                bucket_count -1
+                                columns key,value1,value2
+                                columns.comments 
+                                columns.types string:string:string
+#### A masked pattern was here ####
+                                name default.bucketmapjoin_tmp_result
+                                numFiles 1
+                                numRows 464
+                                rawDataSize 8519
+                                serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
+                                serialization.format 1
+                                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                                totalSize 8983
+#### A masked pattern was here ####
+                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                              name: default.bucketmapjoin_tmp_result
+                          TotalFiles: 1
+                          GatherStats: true
+                          MultiFileSpray: false
+            Local Work:
+              Map Reduce Local Work
             Path -> Alias:
 #### A masked pattern was here ####
             Path -> Partition:
@@ -987,52 +983,6 @@ STAGE PLANS:
                   name: default.srcbucket_mapjoin_part
             Truncated Path -> Alias:
               /srcbucket_mapjoin_part/ds=2008-04-08 [b]
-        Reducer 2 
-            Needs Tagging: true
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                condition expressions:
-                  0 {KEY.reducesinkkey0} {VALUE._col0}
-                  1 {VALUE._col0}
-                outputColumnNames: _col0, _col1, _col6
-                Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    GlobalTableId: 1
-#### A masked pattern was here ####
-                    NumFilesPerFileSink: 1
-                    Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        properties:
-                          COLUMN_STATS_ACCURATE true
-                          bucket_count -1
-                          columns key,value1,value2
-                          columns.comments 
-                          columns.types string:string:string
-#### A masked pattern was here ####
-                          name default.bucketmapjoin_tmp_result
-                          numFiles 3
-                          numRows 464
-                          rawDataSize 8519
-                          serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}
-                          serialization.format 1
-                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                          totalSize 8983
-#### A masked pattern was here ####
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.bucketmapjoin_tmp_result
-                    TotalFiles: 1
-                    GatherStats: true
-                    MultiFileSpray: false
 
   Stage: Stage-0
     Move Operator
@@ -1050,7 +1000,7 @@ STAGE PLANS:
                 columns.types string:string:string
 #### A masked pattern was here ####
                 name default.bucketmapjoin_tmp_result
-                numFiles 3
+                numFiles 1
                 numRows 464
                 rawDataSize 8519
                 serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2}

Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out?rev=1645338&r1=1645337&r2=1645338&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out Sat Dec 13 17:44:41 2014
@@ -192,34 +192,35 @@ TOK_QUERY
 
 
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-1
+  Stage: Stage-2
     Spark
-      Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
-        Reducer 3 <- Reducer 2 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
-        Map 1 
+        Map 3 
             Map Operator Tree:
                 TableScan
-                  alias: a
+                  alias: b
                   Statistics: Num rows: 1737 Data size: 6950 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: key is not null (type: boolean)
                     Statistics: Num rows: 869 Data size: 3477 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: int)
-                      sort order: +
-                      Map-reduce partition columns: key (type: int)
-                      Statistics: Num rows: 869 Data size: 3477 Basic stats: COMPLETE Column stats: NONE
-                      tag: 0
-                      auto parallelism: false
+                    Spark HashTable Sink Operator
+                      condition expressions:
+                        0 
+                        1 
+                      keys:
+                        0 key (type: int)
+                        1 key (type: int)
+                      Position of Big Table: 0
+            Local Work:
+              Map Reduce Local Work
             Path -> Alias:
 #### A masked pattern was here ####
             Path -> Partition:
@@ -232,22 +233,22 @@ STAGE PLANS:
                     part 1
                   properties:
                     COLUMN_STATS_ACCURATE true
-                    bucket_count 2
+                    bucket_count 3
                     bucket_field_name key
                     columns key,value
                     columns.comments 
                     columns.types int:string
 #### A masked pattern was here ####
-                    name default.srcbucket_mapjoin_part_1
-                    numFiles 2
+                    name default.srcbucket_mapjoin_part_2
+                    numFiles 3
                     numRows 0
                     partition_columns part
                     partition_columns.types string
                     rawDataSize 0
-                    serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+                    serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 2750
+                    totalSize 4200
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 
@@ -260,16 +261,16 @@ STAGE PLANS:
                       columns.comments 
                       columns.types int:string
 #### A masked pattern was here ####
-                      name default.srcbucket_mapjoin_part_1
+                      name default.srcbucket_mapjoin_part_2
                       partition_columns part
                       partition_columns.types string
-                      serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+                      serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.srcbucket_mapjoin_part_1
-                  name: default.srcbucket_mapjoin_part_1
+                    name: default.srcbucket_mapjoin_part_2
+                  name: default.srcbucket_mapjoin_part_2
 #### A masked pattern was here ####
                 Partition
                   base file name: part=2
@@ -279,22 +280,22 @@ STAGE PLANS:
                     part 2
                   properties:
                     COLUMN_STATS_ACCURATE true
-                    bucket_count 3
+                    bucket_count 2
                     bucket_field_name key
                     columns key,value
                     columns.comments 
                     columns.types int:string
 #### A masked pattern was here ####
-                    name default.srcbucket_mapjoin_part_1
-                    numFiles 3
+                    name default.srcbucket_mapjoin_part_2
+                    numFiles 2
                     numRows 0
                     partition_columns part
                     partition_columns.types string
                     rawDataSize 0
-                    serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+                    serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 4200
+                    totalSize 2750
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 
@@ -307,36 +308,62 @@ STAGE PLANS:
                       columns.comments 
                       columns.types int:string
 #### A masked pattern was here ####
-                      name default.srcbucket_mapjoin_part_1
+                      name default.srcbucket_mapjoin_part_2
                       partition_columns part
                       partition_columns.types string
-                      serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+                      serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.srcbucket_mapjoin_part_1
-                  name: default.srcbucket_mapjoin_part_1
+                    name: default.srcbucket_mapjoin_part_2
+                  name: default.srcbucket_mapjoin_part_2
             Truncated Path -> Alias:
-              /srcbucket_mapjoin_part_1/part=1 [a]
-              /srcbucket_mapjoin_part_1/part=2 [a]
-        Map 4 
+              /srcbucket_mapjoin_part_2/part=1 [b]
+              /srcbucket_mapjoin_part_2/part=2 [b]
+
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: b
+                  alias: a
                   Statistics: Num rows: 1737 Data size: 6950 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: key is not null (type: boolean)
                     Statistics: Num rows: 869 Data size: 3477 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: int)
-                      sort order: +
-                      Map-reduce partition columns: key (type: int)
-                      Statistics: Num rows: 869 Data size: 3477 Basic stats: COMPLETE Column stats: NONE
-                      tag: 1
-                      auto parallelism: false
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      condition expressions:
+                        0 
+                        1 
+                      keys:
+                        0 key (type: int)
+                        1 key (type: int)
+                      input vertices:
+                        1 Map 3
+                      Position of Big Table: 0
+                      Statistics: Num rows: 955 Data size: 3824 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          tag: -1
+                          value expressions: _col0 (type: bigint)
+                          auto parallelism: false
+            Local Work:
+              Map Reduce Local Work
             Path -> Alias:
 #### A masked pattern was here ####
             Path -> Partition:
@@ -349,22 +376,22 @@ STAGE PLANS:
                     part 1
                   properties:
                     COLUMN_STATS_ACCURATE true
-                    bucket_count 3
+                    bucket_count 2
                     bucket_field_name key
                     columns key,value
                     columns.comments 
                     columns.types int:string
 #### A masked pattern was here ####
-                    name default.srcbucket_mapjoin_part_2
-                    numFiles 3
+                    name default.srcbucket_mapjoin_part_1
+                    numFiles 2
                     numRows 0
                     partition_columns part
                     partition_columns.types string
                     rawDataSize 0
-                    serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+                    serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 4200
+                    totalSize 2750
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 
@@ -377,16 +404,16 @@ STAGE PLANS:
                       columns.comments 
                       columns.types int:string
 #### A masked pattern was here ####
-                      name default.srcbucket_mapjoin_part_2
+                      name default.srcbucket_mapjoin_part_1
                       partition_columns part
                       partition_columns.types string
-                      serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+                      serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.srcbucket_mapjoin_part_2
-                  name: default.srcbucket_mapjoin_part_2
+                    name: default.srcbucket_mapjoin_part_1
+                  name: default.srcbucket_mapjoin_part_1
 #### A masked pattern was here ####
                 Partition
                   base file name: part=2
@@ -396,22 +423,22 @@ STAGE PLANS:
                     part 2
                   properties:
                     COLUMN_STATS_ACCURATE true
-                    bucket_count 2
+                    bucket_count 3
                     bucket_field_name key
                     columns key,value
                     columns.comments 
                     columns.types int:string
 #### A masked pattern was here ####
-                    name default.srcbucket_mapjoin_part_2
-                    numFiles 2
+                    name default.srcbucket_mapjoin_part_1
+                    numFiles 3
                     numRows 0
                     partition_columns part
                     partition_columns.types string
                     rawDataSize 0
-                    serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+                    serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 2750
+                    totalSize 4200
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 
@@ -424,41 +451,20 @@ STAGE PLANS:
                       columns.comments 
                       columns.types int:string
 #### A masked pattern was here ####
-                      name default.srcbucket_mapjoin_part_2
+                      name default.srcbucket_mapjoin_part_1
                       partition_columns part
                       partition_columns.types string
-                      serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value}
+                      serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.srcbucket_mapjoin_part_2
-                  name: default.srcbucket_mapjoin_part_2
+                    name: default.srcbucket_mapjoin_part_1
+                  name: default.srcbucket_mapjoin_part_1
             Truncated Path -> Alias:
-              /srcbucket_mapjoin_part_2/part=1 [b]
-              /srcbucket_mapjoin_part_2/part=2 [b]
+              /srcbucket_mapjoin_part_1/part=1 [a]
+              /srcbucket_mapjoin_part_1/part=2 [a]
         Reducer 2 
-            Needs Tagging: true
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                condition expressions:
-                  0 
-                  1 
-                Statistics: Num rows: 955 Data size: 3824 Basic stats: COMPLETE Column stats: NONE
-                Group By Operator
-                  aggregations: count()
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    sort order: 
-                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                    tag: -1
-                    value expressions: _col0 (type: bigint)
-                    auto parallelism: false
-        Reducer 3 
             Needs Tagging: false
             Reduce Operator Tree:
               Group By Operator