You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/04/24 03:33:07 UTC
svn commit: r1471224 [2/8] - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/ ql/sr...

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java?rev=1471224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java Wed Apr 24 01:33:05 2013
@@ -0,0 +1,455 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.Serializable;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.exec.ConditionalTask;
+import org.apache.hadoop.hive.ql.exec.DummyStoreOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor;
+import org.apache.hadoop.hive.ql.parse.OpParseContext;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.QBJoinTree;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ConditionalResolverCommonJoin;
+import org.apache.hadoop.hive.ql.plan.ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx;
+import org.apache.hadoop.hive.ql.plan.ConditionalWork;
+import org.apache.hadoop.hive.ql.plan.FetchWork;
+import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
+
+/**
+ * Iterator over each task. If the task has a smb join, convert the task to a conditional task.
+ * The conditional task will first try all mapjoin possibilities, and go the the smb join if the
+ * mapjoin fails. The smb join will be a backup task for all the mapjoin tasks.
+ */
+public class SortMergeJoinTaskDispatcher extends AbstractJoinTaskDispatcher implements Dispatcher {
+
+  public SortMergeJoinTaskDispatcher(PhysicalContext context) {
+    super(context);
+  }
+
+  // Convert the work in the SMB plan to a regular join
+  // Note that the operator tree is not fixed, only the path/alias mappings in the
+  // plan are fixed. The operator tree will still contain the SMBJoinOperator
+  private void genSMBJoinWork(MapredWork currWork, SMBMapJoinOperator smbJoinOp) {
+    // Remove the paths which are not part of aliasToPartitionInfo
+    Map<String, PartitionDesc> aliasToPartitionInfo = currWork.getAliasToPartnInfo();
+    List<String> removePaths = new ArrayList<String>();
+
+    for (Map.Entry<String, ArrayList<String>> entry : currWork.getPathToAliases().entrySet()) {
+      boolean keepPath = false;
+      for (String alias : entry.getValue()) {
+        if (aliasToPartitionInfo.containsKey(alias)) {
+          keepPath = true;
+          break;
+        }
+      }
+
+      // Remove if the path is not present
+      if (!keepPath) {
+        removePaths.add(entry.getKey());
+      }
+    }
+
+    List<String> removeAliases = new ArrayList<String>();
+    for (String removePath : removePaths) {
+      removeAliases.addAll(currWork.getPathToAliases().get(removePath));
+      currWork.getPathToAliases().remove(removePath);
+      currWork.getPathToPartitionInfo().remove(removePath);
+    }
+
+    for (String alias : removeAliases) {
+      currWork.getAliasToPartnInfo().remove(alias);
+      currWork.getAliasToWork().remove(alias);
+    }
+
+    // Get the MapredLocalWork
+    MapredLocalWork localWork = smbJoinOp.getConf().getLocalWork();
+
+    for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : localWork.getAliasToWork()
+        .entrySet()) {
+      String alias = entry.getKey();
+      Operator<? extends OperatorDesc> op = entry.getValue();
+      FetchWork fetchWork = localWork.getAliasToFetchWork().get(alias);
+
+      // Add the entry in mapredwork
+      currWork.getAliasToWork().put(alias, op);
+
+      PartitionDesc partitionInfo = currWork.getAliasToPartnInfo().get(alias);
+      if (fetchWork.getTblDir() != null) {
+        ArrayList<String> aliases = new ArrayList<String>();
+        aliases.add(alias);
+        currWork.getPathToAliases().put(fetchWork.getTblDir(), aliases);
+        currWork.getPathToPartitionInfo().put(fetchWork.getTblDir(), partitionInfo);
+      }
+      else {
+        for (String pathDir : fetchWork.getPartDir()) {
+          ArrayList<String> aliases = new ArrayList<String>();
+          aliases.add(alias);
+          currWork.getPathToAliases().put(pathDir, aliases);
+          currWork.getPathToPartitionInfo().put(pathDir, partitionInfo);
+        }
+      }
+    }
+
+    // Remove the dummy store operator from the tree
+    for (Operator<? extends OperatorDesc> parentOp : smbJoinOp.getParentOperators()) {
+      if (parentOp instanceof DummyStoreOperator) {
+        Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
+        smbJoinOp.replaceParent(parentOp, grandParentOp);
+        grandParentOp.setChildOperators(parentOp.getChildOperators());
+        parentOp.setParentOperators(null);
+        parentOp.setParentOperators(null);
+      }
+    }
+  }
+
+  /*
+   * Convert the work containing to sort-merge join into a work, as if it had a regular join.
+   * Note that the operator tree is not changed - is still contains the SMB join, but the
+   * plan is changed (aliasToWork etc.) to contain all the paths as if it was a regular join.
+   */
+  private MapredWork convertSMBWorkToJoinWork(MapredWork currWork, SMBMapJoinOperator oldSMBJoinOp)
+      throws SemanticException {
+    try {
+      String xml = currWork.toXML();
+
+      // deep copy a new mapred work
+      InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8"));
+      MapredWork currJoinWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf());
+      SMBMapJoinOperator newSMBJoinOp = getSMBMapJoinOp(currJoinWork);
+
+      // Add the row resolver for the new operator
+      Map<Operator<? extends OperatorDesc>, OpParseContext> opParseContextMap =
+          physicalContext.getParseContext().getOpParseCtx();
+      opParseContextMap.put(newSMBJoinOp, opParseContextMap.get(oldSMBJoinOp));
+      // change the newly created map-red plan as if it was a join operator
+      genSMBJoinWork(currJoinWork, newSMBJoinOp);
+      return currJoinWork;
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw new SemanticException("Generate Map Join Task Error: " + e.getMessage());
+    }
+  }
+
+  // create map join task and set big table as bigTablePosition
+  private ObjectPair<MapRedTask, String> convertSMBTaskToMapJoinTask(String xml,
+      int bigTablePosition,
+      SMBMapJoinOperator smbJoinOp,
+      QBJoinTree joinTree)
+      throws UnsupportedEncodingException, SemanticException {
+    // deep copy a new mapred work from xml
+    InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8"));
+    MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf());
+    // create a mapred task for this work
+    MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext
+        .getParseContext().getConf());
+    // generate the map join operator; already checked the map join
+    MapJoinOperator newMapJoinOp =
+        getMapJoinOperator(newTask, newWork, smbJoinOp, joinTree, bigTablePosition);
+    // The reducer needs to be restored - Consider a query like:
+    // select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+    // The reducer contains a groupby, which needs to be restored.
+    Operator<? extends OperatorDesc> reducer = newWork.getReducer();
+
+    // create the local work for this plan
+    String bigTableAlias =
+        MapJoinProcessor.genLocalWorkForMapJoin(newWork, newMapJoinOp, bigTablePosition);
+
+    // restore the reducer
+    newWork.setReducer(reducer);
+    return new ObjectPair<MapRedTask, String>(newTask, bigTableAlias);
+  }
+
+  private boolean isEligibleForOptimization(SMBMapJoinOperator originalSMBJoinOp) {
+    if (originalSMBJoinOp == null) {
+      return false;
+    }
+
+    // Only create a map-join if the user explicitly gave a join (without a mapjoin hint)
+    if (!originalSMBJoinOp.isConvertedAutomaticallySMBJoin()) {
+      return false;
+    }
+
+    Operator<? extends OperatorDesc> currOp = originalSMBJoinOp;
+    while (true) {
+      if (currOp.getChildOperators() == null) {
+        if (currOp instanceof FileSinkOperator) {
+          FileSinkOperator fsOp = (FileSinkOperator)currOp;
+          // The query has enforced that a sort-merge join should be performed.
+          // For more details, look at 'removedReduceSinkBucketSort' in FileSinkDesc.java
+          return !fsOp.getConf().isRemovedReduceSinkBucketSort();
+        }
+
+        // If it contains a reducer, the optimization is always on.
+        // Since there exists a reducer, the sorting/bucketing properties due to the
+        // sort-merge join operator are lost anyway. So, the plan cannot be wrong by
+        // changing the sort-merge join to a map-join
+        if (currOp instanceof ReduceSinkOperator) {
+          return true;
+        }
+        return false;
+      }
+
+      if (currOp.getChildOperators().size() > 1) {
+        return true;
+      }
+
+      currOp = currOp.getChildOperators().get(0);
+    }
+  }
+
+  @Override
+  public Task<? extends Serializable> processCurrentTask(MapRedTask currTask,
+      ConditionalTask conditionalTask, Context context)
+      throws SemanticException {
+
+    // whether it contains a sort merge join operator
+    MapredWork currWork = currTask.getWork();
+    SMBMapJoinOperator originalSMBJoinOp = getSMBMapJoinOp(currWork);
+    if (!isEligibleForOptimization(originalSMBJoinOp)) {
+      return null;
+    }
+
+    currTask.setTaskTag(Task.CONVERTED_SORTMERGEJOIN);
+
+    // get parseCtx for this Join Operator
+    ParseContext parseCtx = physicalContext.getParseContext();
+    QBJoinTree joinTree = parseCtx.getSmbMapJoinContext().get(originalSMBJoinOp);
+
+    // Convert the work containing to sort-merge join into a work, as if it had a regular join.
+    // Note that the operator tree is not changed - is still contains the SMB join, but the
+    // plan is changed (aliasToWork etc.) to contain all the paths as if it was a regular join.
+    // This is used to convert the plan to a map-join, and then the original SMB join plan is used
+    // as a backup task.
+    MapredWork currJoinWork = convertSMBWorkToJoinWork(currWork, originalSMBJoinOp);
+    SMBMapJoinOperator newSMBJoinOp = getSMBMapJoinOp(currJoinWork);
+
+    currWork.setOpParseCtxMap(parseCtx.getOpParseCtx());
+    currWork.setJoinTree(joinTree);
+    currJoinWork.setOpParseCtxMap(parseCtx.getOpParseCtx());
+    currJoinWork.setJoinTree(joinTree);
+
+    // create conditional work list and task list
+    List<Serializable> listWorks = new ArrayList<Serializable>();
+    List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
+
+    // create alias to task mapping and alias to input file mapping for resolver
+    HashMap<String, Task<? extends Serializable>> aliasToTask =
+        new HashMap<String, Task<? extends Serializable>>();
+    // Note that pathToAlias will behave as if the original plan was a join plan
+    HashMap<String, ArrayList<String>> pathToAliases = currJoinWork.getPathToAliases();
+
+    // generate a map join task for the big table
+    SMBJoinDesc originalSMBJoinDesc = originalSMBJoinOp.getConf();
+    Byte[] order = originalSMBJoinDesc.getTagOrder();
+    int numAliases = order.length;
+    Set<Integer> bigTableCandidates =
+        MapJoinProcessor.getBigTableCandidates(originalSMBJoinDesc.getConds());
+
+    // no table could be the big table; there is no need to convert
+    if (bigTableCandidates == null) {
+      return null;
+    }
+
+    HashMap<String, Long> aliasToSize = new HashMap<String, Long>();
+    Configuration conf = context.getConf();
+    try {
+      long aliasTotalKnownInputSize = getTotalKnownInputSize(context, currJoinWork,
+          pathToAliases, aliasToSize);
+
+      String xml = currJoinWork.toXML();
+      long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(conf,
+          HiveConf.ConfVars.HIVESMALLTABLESFILESIZE);
+
+      for (int bigTablePosition = 0; bigTablePosition < numAliases; bigTablePosition++) {
+        // this table cannot be big table
+        if (!bigTableCandidates.contains(bigTablePosition)) {
+          continue;
+        }
+
+        // create map join task for the given big table position
+        ObjectPair<MapRedTask, String> newTaskAlias =
+            convertSMBTaskToMapJoinTask(xml, bigTablePosition, newSMBJoinOp, joinTree);
+        MapRedTask newTask = newTaskAlias.getFirst();
+        String bigTableAlias = newTaskAlias.getSecond();
+
+        Long aliasKnownSize = aliasToSize.get(bigTableAlias);
+        if (aliasKnownSize != null && aliasKnownSize.longValue() > 0) {
+          long smallTblTotalKnownSize = aliasTotalKnownInputSize
+              - aliasKnownSize.longValue();
+          if (smallTblTotalKnownSize > ThresholdOfSmallTblSizeSum) {
+            // this table is not good to be a big table.
+            continue;
+          }
+        }
+
+        // add into conditional task
+        listWorks.add(newTask.getWork());
+        listTasks.add(newTask);
+        newTask.setTaskTag(Task.CONVERTED_MAPJOIN);
+
+        // set up backup task
+        newTask.setBackupTask(currTask);
+        newTask.setBackupChildrenTasks(currTask.getChildTasks());
+
+        // put the mapping alias to task
+        aliasToTask.put(bigTableAlias, newTask);
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw new SemanticException("Generate Map Join Task Error: ", e);
+    }
+
+    // insert current common join task to conditional task
+    listWorks.add(currTask.getWork());
+    listTasks.add(currTask);
+    // clear JoinTree and OP Parse Context
+    currWork.setOpParseCtxMap(null);
+    currWork.setJoinTree(null);
+
+    // create conditional task and insert conditional task into task tree
+    ConditionalWork cndWork = new ConditionalWork(listWorks);
+    ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, parseCtx.getConf());
+    cndTsk.setListTasks(listTasks);
+
+    // set resolver and resolver context
+    cndTsk.setResolver(new ConditionalResolverCommonJoin());
+    ConditionalResolverCommonJoinCtx resolverCtx = new ConditionalResolverCommonJoinCtx();
+    resolverCtx.setPathToAliases(pathToAliases);
+    resolverCtx.setAliasToKnownSize(aliasToSize);
+    resolverCtx.setAliasToTask(aliasToTask);
+    resolverCtx.setCommonJoinTask(currTask);
+    resolverCtx.setLocalTmpDir(context.getLocalScratchDir(false));
+    resolverCtx.setHdfsTmpDir(context.getMRScratchDir());
+    cndTsk.setResolverCtx(resolverCtx);
+
+    // replace the current task with the new generated conditional task
+    replaceTaskWithConditionalTask(currTask, cndTsk, physicalContext);
+    return cndTsk;
+  }
+
+  /**
+   * If a join/union is followed by a SMB join, this cannot be converted to a conditional task.
+   */
+  private boolean reducerAllowedSMBJoinOp(Operator<? extends OperatorDesc> reducer) {
+    while (reducer != null) {
+      if (!reducer.opAllowedBeforeSortMergeJoin()) {
+        return false;
+      }
+
+      List<Operator<? extends OperatorDesc>> childOps = reducer.getChildOperators();
+      if ((childOps == null) || (childOps.isEmpty())) {
+        return true;
+      }
+
+      // multi-table inserts not supported
+      if (childOps.size() > 1) {
+        return false;
+      }
+      reducer = childOps.get(0);
+    }
+
+    return true;
+  }
+
+  private SMBMapJoinOperator getSMBMapJoinOp(Operator<? extends OperatorDesc> currOp,
+      Operator<? extends OperatorDesc> reducer) {
+    SMBMapJoinOperator ret = null;
+    while (true) {
+      if (currOp instanceof SMBMapJoinOperator) {
+        if (ret != null) {
+          return null;
+        }
+        ret = (SMBMapJoinOperator) currOp;
+      }
+
+      // Does any operator in the tree stop the task from being converted to a conditional task
+      if (!currOp.opAllowedBeforeSortMergeJoin()) {
+        return null;
+      }
+
+      List<Operator<? extends OperatorDesc>> childOps = currOp.getChildOperators();
+      if ((childOps == null) || (childOps.isEmpty())) {
+        return reducerAllowedSMBJoinOp(reducer) ? ret : null;
+      }
+
+      // multi-table inserts not supported
+      if (childOps.size() > 1) {
+        return null;
+      }
+      currOp = childOps.get(0);
+    }
+  }
+
+  private SMBMapJoinOperator getSMBMapJoinOp(MapredWork work) throws SemanticException {
+    if (work != null) {
+      Operator<? extends OperatorDesc> reducer = work.getReducer();
+      for (Operator<? extends OperatorDesc> op : work.getAliasToWork().values()) {
+        SMBMapJoinOperator smbMapJoinOp = getSMBMapJoinOp(op, reducer);
+        if (smbMapJoinOp != null) {
+          return smbMapJoinOp;
+        }
+      }
+    }
+    return null;
+  }
+
+  private MapJoinOperator getMapJoinOperator(MapRedTask task,
+      MapredWork work,
+      SMBMapJoinOperator oldSMBJoinOp,
+      QBJoinTree joinTree,
+      int mapJoinPos) throws SemanticException {
+    SMBMapJoinOperator newSMBJoinOp = getSMBMapJoinOp(task.getWork());
+
+    // Add the row resolver for the new operator
+    Map<Operator<? extends OperatorDesc>, OpParseContext> opParseContextMap =
+        physicalContext.getParseContext().getOpParseCtx();
+    opParseContextMap.put(newSMBJoinOp, opParseContextMap.get(oldSMBJoinOp));
+
+    // generate the map join operator
+    return MapJoinProcessor.convertSMBJoinToMapJoin(opParseContextMap, newSMBJoinOp,
+        joinTree, mapJoinPos, true);
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java Wed Apr 24 01:33:05 2013
@@ -71,7 +71,7 @@ public class ConditionalResolverCommonJo
     }
 
     public HashMap<String, Long> getAliasToKnownSize() {
-      return aliasToKnownSize;
+      return aliasToKnownSize == null ? new HashMap<String, Long>() : aliasToKnownSize;
     }
 
     public void setAliasToKnownSize(HashMap<String, Long> aliasToKnownSize) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java Wed Apr 24 01:33:05 2013
@@ -50,6 +50,16 @@ public class FileSinkDesc extends Abstra
   private String staticSpec; // static partition spec ends with a '/'
   private boolean gatherStats;
 
+  // Consider a query like:
+  // insert overwrite table T3 select ... from T1 join T2 on T1.key = T2.key;
+  // where T1, T2 and T3 are sorted and bucketed by key into the same number of buckets,
+  // We dont need a reducer to enforce bucketing and sorting for T3.
+  // The field below captures the fact that the reducer introduced to enforce sorting/
+  // bucketing of T3 has been removed.
+  // In this case, a sort-merge join is needed, and so the sort-merge join between T1 and T2
+  // cannot be performed as a map-only job
+  private transient boolean removedReduceSinkBucketSort;
+
   // This file descriptor is linked to other file descriptors.
   // One use case is that, a union->select (star)->file sink, is broken down.
   // For eg: consider a query like:
@@ -364,4 +374,11 @@ public class FileSinkDesc extends Abstra
     this.statsCollectRawDataSize = statsCollectRawDataSize;
   }
 
+  public boolean isRemovedReduceSinkBucketSort() {
+    return removedReduceSinkBucketSort;
+  }
+
+  public void setRemovedReduceSinkBucketSort(boolean removedReduceSinkBucketSort) {
+    this.removedReduceSinkBucketSort = removedReduceSinkBucketSort;
+  }
 }

Added: hive/trunk/ql/src/test/queries/clientnegative/auto_sortmerge_join_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/auto_sortmerge_join_1.q?rev=1471224&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/auto_sortmerge_join_1.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/auto_sortmerge_join_1.q Wed Apr 24 01:33:05 2013
@@ -0,0 +1,25 @@
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 1;
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+insert overwrite table tbl1 select * from src where key < 20;
+insert overwrite table tbl2 select * from src where key < 10;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.join=true;
+
+-- Since tbl1 is the bigger table, tbl1 Left Outer Join tbl2 can be performed
+explain
+select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key;
+
+-- Since tbl1 is the bigger table, tbl1 Right Outer Join tbl2 cannot be performed
+explain
+select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key;

Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q Wed Apr 24 01:33:05 2013
@@ -1,6 +1,7 @@
 -- small 1 part, 2 bucket & big 2 part, 4 bucket
 
-CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) 
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
 
@@ -20,6 +21,7 @@ set hive.auto.convert.sortmerge.join=tru
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
 
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
 
 -- Since size is being used to find the big table, the order of the tables in the join does not matter
@@ -28,3 +30,7 @@ select count(*) FROM bucket_small a JOIN
 
 explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
 select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;

Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q Wed Apr 24 01:33:05 2013
@@ -17,7 +17,7 @@ set hive.optimize.bucketmapjoin.sortedme
 set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
 
 set hive.auto.convert.sortmerge.join=true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
 -- One of the subqueries contains a union, so it should not be converted to a sort-merge join.
 explain
 select count(*) from 

Added: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q?rev=1471224&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q Wed Apr 24 01:33:05 2013
@@ -0,0 +1,81 @@
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 1;
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+insert overwrite table tbl1 select * from src where key < 10;
+insert overwrite table tbl2 select * from src where key < 10;
+
+CREATE TABLE dest1(k1 int, k2 int);
+CREATE TABLE dest2(k1 string, k2 string);
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.join=true;
+
+-- A SMB join followed by a mutli-insert
+explain 
+from (
+  SELECT a.key key1, a.value value1, b.key key2, b.value value2 
+  FROM tbl1 a JOIN tbl2 b 
+  ON a.key = b.key ) subq
+INSERT OVERWRITE TABLE dest1 select key1, key2
+INSERT OVERWRITE TABLE dest2 select value1, value2;
+
+from (
+  SELECT a.key key1, a.value value1, b.key key2, b.value value2 
+  FROM tbl1 a JOIN tbl2 b 
+  ON a.key = b.key ) subq
+INSERT OVERWRITE TABLE dest1 select key1, key2
+INSERT OVERWRITE TABLE dest2 select value1, value2;
+
+select * from dest1 order by k1, k2;
+select * from dest2 order by k1, k2;
+
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=200;
+
+-- A SMB join followed by a mutli-insert
+explain 
+from (
+  SELECT a.key key1, a.value value1, b.key key2, b.value value2 
+  FROM tbl1 a JOIN tbl2 b 
+  ON a.key = b.key ) subq
+INSERT OVERWRITE TABLE dest1 select key1, key2
+INSERT OVERWRITE TABLE dest2 select value1, value2;
+
+from (
+  SELECT a.key key1, a.value value1, b.key key2, b.value value2 
+  FROM tbl1 a JOIN tbl2 b 
+  ON a.key = b.key ) subq
+INSERT OVERWRITE TABLE dest1 select key1, key2
+INSERT OVERWRITE TABLE dest2 select value1, value2;
+
+select * from dest1 order by k1, k2;
+select * from dest2 order by k1, k2;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+-- A SMB join followed by a mutli-insert
+explain 
+from (
+  SELECT a.key key1, a.value value1, b.key key2, b.value value2 
+  FROM tbl1 a JOIN tbl2 b 
+  ON a.key = b.key ) subq
+INSERT OVERWRITE TABLE dest1 select key1, key2
+INSERT OVERWRITE TABLE dest2 select value1, value2;
+
+from (
+  SELECT a.key key1, a.value value1, b.key key2, b.value value2 
+  FROM tbl1 a JOIN tbl2 b 
+  ON a.key = b.key ) subq
+INSERT OVERWRITE TABLE dest1 select key1, key2
+INSERT OVERWRITE TABLE dest2 select value1, value2;
+
+select * from dest1 order by k1, k2;
+select * from dest2 order by k1, k2;

Added: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q?rev=1471224&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q Wed Apr 24 01:33:05 2013
@@ -0,0 +1,29 @@
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 1;
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+insert overwrite table tbl1 select * from src where key < 20;
+insert overwrite table tbl2 select * from src where key < 10;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.join=true;
+
+-- Since tbl1 is the bigger table, tbl1 Left Outer Join tbl2 can be performed
+explain
+select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key;
+select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key;
+
+insert overwrite table tbl2 select * from src where key < 200;
+
+-- Since tbl2 is the bigger table, tbl1 Right Outer Join tbl2 can be performed
+explain
+select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key;
+select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key;

Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q Wed Apr 24 01:33:05 2013
@@ -16,9 +16,17 @@ set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
 
 -- Since the leftmost table is assumed as the big table, arrange the tables in the join accordingly
 explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
 select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+set hive.mapjoin.localtask.max.memory.usage = 0.0001;
+set hive.mapjoin.check.memory.rows = 2;
+
+-- The mapjoin should fail resulting in the sort-merge join
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;

Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q Wed Apr 24 01:33:05 2013
@@ -16,7 +16,7 @@ set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
 
 -- Since size is being used to find the big table, the order of the tables in the join does not matter
@@ -25,3 +25,7 @@ select count(*) FROM bucket_small a JOIN
 
 explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
 select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;

Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q Wed Apr 24 01:33:05 2013
@@ -18,7 +18,7 @@ set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ;
 
 -- Since size is being used to find the big table, the order of the tables in the join does not matter
@@ -27,3 +27,7 @@ select count(*) FROM bucket_small a JOIN
 
 explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
 select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;

Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q Wed Apr 24 01:33:05 2013
@@ -12,7 +12,7 @@ load data local inpath '../data/files/sr
 set hive.auto.convert.sortmerge.join=true;
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ;
 
 -- Since size is being used to find the big table, the order of the tables in the join does not matter
@@ -21,3 +21,8 @@ select count(*) FROM bucket_small a JOIN
 
 explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
 select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+set hive.auto.convert.join=true;
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;

Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q Wed Apr 24 01:33:05 2013
@@ -18,6 +18,24 @@ set hive.optimize.bucketmapjoin.sortedme
 set hive.auto.convert.join=true;
 set hive.auto.convert.join.noconditionaltask=true;
 set hive.auto.convert.join.noconditionaltask.size=200;
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
+-- A SMB join is being followed by a regular join on a non-bucketed table on a different key
+explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value;
+select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value;
+
+-- A SMB join is being followed by a regular join on a non-bucketed table on the same key
+explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key;
+select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key;
+
+-- A SMB join is being followed by a regular join on a bucketed table on the same key
+explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key;
+select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key;
+
+-- A SMB join is being followed by a regular join on a bucketed table on a different key
+explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value;
+select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
 
 -- A SMB join is being followed by a regular join on a non-bucketed table on a different key
 explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value;

Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q Wed Apr 24 01:33:05 2013
@@ -21,7 +21,7 @@ set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ;
 
 -- Since size is being used to find the big table, the order of the tables in the join does not matter
@@ -30,3 +30,7 @@ select count(*) FROM bucket_small a JOIN
 
 explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
 select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;

Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q Wed Apr 24 01:33:05 2013
@@ -21,7 +21,7 @@ set hive.auto.convert.join=true;
 set hive.auto.convert.sortmerge.join=true;
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
 
 -- Since size is being used to find the big table, the order of the tables in the join does not matter
@@ -30,3 +30,11 @@ select count(*) FROM bucket_small a JOIN
 
 explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
 select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+set hive.mapjoin.localtask.max.memory.usage = 0.0001;
+set hive.mapjoin.check.memory.rows = 2;
+
+-- The mapjoin should fail resulting in the sort-merge join
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;

Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q Wed Apr 24 01:33:05 2013
@@ -15,7 +15,7 @@ set hive.auto.convert.join=true;
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
 set hive.auto.convert.sortmerge.join=true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
 -- The join is being performed as part of sub-query. It should be converted to a sort-merge join
 explain
 select count(*) from (
@@ -281,3 +281,256 @@ select count(*) from (
   ) subq2
 join tbl2 b
 on subq2.key = b.key) a;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
+-- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from (
+  select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1;
+
+select count(*) from (
+  select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1;
+
+-- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+-- Add a order by at the end to make the results deterministic.
+explain
+select key, count(*) from 
+(
+  select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+group by key
+order by key;
+
+select key, count(*) from 
+(
+  select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+group by key
+order by key;
+
+-- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from
+(
+  select key, count(*) from 
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq1
+  group by key
+) subq2;
+
+select count(*) from
+(
+  select key, count(*) from 
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq1
+  group by key
+) subq2;
+
+-- A join is being performed across different sub-queries, where a join is being performed in each of them.
+-- Each sub-query should be converted to a sort-merge join.
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+  select key, count(*) as cnt1 from 
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq1 group by key
+) src1
+join
+(
+  select key, count(*) as cnt1 from 
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+  select key, count(*) as cnt1 from 
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq1 group by key
+) src1
+join
+(
+  select key, count(*) as cnt1 from 
+  (
+    select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+  ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+-- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should 
+-- be converted to a sort-merge join.
+explain
+select count(*) from 
+  (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+  on subq1.key = subq2.key;
+
+select count(*) from 
+  (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+  on subq1.key = subq2.key;
+
+-- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should 
+-- be converted to a sort-merge join, although there is more than one level of sub-query
+explain
+select count(*) from 
+  (
+  select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq1 
+  where key < 6
+  ) subq2
+  join tbl2 b
+  on subq2.key = b.key;
+
+select count(*) from 
+  (
+  select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq1 
+  where key < 6
+  ) subq2
+  join tbl2 b
+  on subq2.key = b.key;
+
+-- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from 
+  (
+  select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq1 
+  where key < 6
+  ) subq2
+  join
+  (
+  select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq3 
+  where key < 6
+  ) subq4
+  on subq2.key = subq4.key;
+
+select count(*) from 
+  (
+  select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq1 
+  where key < 6
+  ) subq2
+  join
+  (
+  select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq3 
+  where key < 6
+  ) subq4
+  on subq2.key = subq4.key;
+
+-- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
+-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one 
+-- item, but that is not part of the join key.
+explain
+select count(*) from 
+  (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 
+    join
+  (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+  on subq1.key = subq2.key;
+
+select count(*) from 
+  (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 
+    join
+  (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+  on subq1.key = subq2.key;
+
+-- The left table is a sub-query and the right table is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from 
+  (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 
+    join tbl2 a on subq1.key = a.key;
+
+select count(*) from 
+  (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 
+    join tbl2 a on subq1.key = a.key;
+
+-- The right table is a sub-query and the left table is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from tbl1 a
+  join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 
+  on a.key = subq1.key;
+
+select count(*) from tbl1 a
+  join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 
+  on a.key = subq1.key;
+
+-- There are more than 2 inputs to the join, all of them being sub-queries. 
+-- It should be converted to to a sort-merge join
+explain
+select count(*) from 
+  (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+  on (subq1.key = subq2.key)
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+  on (subq1.key = subq3.key);
+
+select count(*) from 
+  (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+  on subq1.key = subq2.key
+    join
+  (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+  on (subq1.key = subq3.key);
+
+-- The join is being performed on a nested sub-query, and an aggregation is performed after that.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from (
+  select subq2.key as key, subq2.value as value1, b.value as value2 from
+  (
+    select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq1
+    where key < 6
+  ) subq2
+join tbl2 b
+on subq2.key = b.key) a;
+
+select count(*) from (
+  select subq2.key as key, subq2.value as value1, b.value as value2 from
+  (
+    select * from
+    (
+      select a.key as key, a.value as value from tbl1 a where key < 8
+    ) subq1
+    where key < 6
+  ) subq2
+join tbl2 b
+on subq2.key = b.key) a;

Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_1.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_1.q Wed Apr 24 01:33:05 2013
@@ -6,6 +6,8 @@ set hive.exec.reducers.max = 1;
 set hive.merge.mapfiles=false;
 set hive.merge.mapredfiles=false; 
 
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
 -- Create two bucketed and sorted tables
 CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;

Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q Wed Apr 24 01:33:05 2013
@@ -9,6 +9,8 @@ set hive.merge.mapfiles=false;
 set hive.merge.mapredfiles=false; 
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
 
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
 -- Create two bucketed and sorted tables
 CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;

Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q Wed Apr 24 01:33:05 2013
@@ -6,6 +6,8 @@ set hive.exec.reducers.max = 1;
 set hive.merge.mapfiles=false;
 set hive.merge.mapredfiles=false; 
 
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
 -- Create two bucketed and sorted tables
 CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;

Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_4.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_4.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_4.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_4.q Wed Apr 24 01:33:05 2013
@@ -9,6 +9,8 @@ set hive.merge.mapfiles=false;
 set hive.merge.mapredfiles=false; 
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
 
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
 -- Create two bucketed and sorted tables
 CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;

Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_5.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_5.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_5.q Wed Apr 24 01:33:05 2013
@@ -9,6 +9,8 @@ set hive.merge.mapfiles=false;
 set hive.merge.mapredfiles=false; 
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
 
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
 -- Create two bucketed and sorted tables
 CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;

Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q Wed Apr 24 01:33:05 2013
@@ -9,6 +9,8 @@ set hive.merge.mapfiles=false;
 set hive.merge.mapredfiles=false; 
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
 
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
 -- Create two bucketed and sorted tables
 CREATE TABLE test_table1 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
 CLUSTERED BY (key, key2) SORTED BY (key ASC, key2 DESC) INTO 2 BUCKETS;

Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q Wed Apr 24 01:33:05 2013
@@ -9,6 +9,8 @@ set hive.merge.mapfiles=false;
 set hive.merge.mapredfiles=false; 
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
 
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
 -- Create two bucketed and sorted tables
 CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;

Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_8.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_8.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_8.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_8.q Wed Apr 24 01:33:05 2013
@@ -9,6 +9,8 @@ set hive.merge.mapfiles=false;
 set hive.merge.mapredfiles=false; 
 set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
 
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
 -- Create two bucketed and sorted tables
 CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;

Added: hive/trunk/ql/src/test/results/clientnegative/auto_sortmerge_join_1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/auto_sortmerge_join_1.q.out?rev=1471224&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/auto_sortmerge_join_1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/auto_sortmerge_join_1.q.out Wed Apr 24 01:33:05 2013
@@ -0,0 +1,184 @@
+PREHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@tbl1
+PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@tbl2
+PREHOOK: query: insert overwrite table tbl1 select * from src where key < 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: insert overwrite table tbl1 select * from src where key < 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl1
+POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table tbl2 select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: insert overwrite table tbl2 select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl2
+POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Since tbl1 is the bigger table, tbl1 Left Outer Join tbl2 can be performed
+explain
+select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Since tbl1 is the bigger table, tbl1 Left Outer Join tbl2 can be performed
+explain
+select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage , consists of Stage-5, Stage-1
+  Stage-5 has a backup stage: Stage-1
+  Stage-3 depends on stages: Stage-5
+  Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-4
+    Conditional Operator
+
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        b 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        b 
+          TableScan
+            alias: b
+            HashTable Sink Operator
+              condition expressions:
+                0 
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              Position of Big Table: 0
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Map Join Operator
+              condition map:
+                   Left Outer Join0 to 1
+              condition expressions:
+                0 
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              Position of Big Table: 0
+              Select Operator
+                Group By Operator
+                  aggregations:
+                        expr: count()
+                  bucketGroup: false
+                  mode: hash
+                  outputColumnNames: _col0
+                  Reduce Output Operator
+                    sort order: 
+                    tag: -1
+                    value expressions:
+                          expr: _col0
+                          type: bigint
+      Local Work:
+        Map Reduce Local Work
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          mode: mergepartial
+          outputColumnNames: _col0
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: bigint
+            outputColumnNames: _col0
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Sorted Merge Bucket Map Join Operator
+              condition map:
+                   Left Outer Join0 to 1
+              condition expressions:
+                0 
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              Position of Big Table: 0
+              Select Operator
+                Group By Operator
+                  aggregations:
+                        expr: count()
+                  bucketGroup: false
+                  mode: hash
+                  outputColumnNames: _col0
+                  Reduce Output Operator
+                    sort order: 
+                    tag: -1
+                    value expressions:
+                          expr: _col0
+                          type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          mode: mergepartial
+          outputColumnNames: _col0
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: bigint
+            outputColumnNames: _col0
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+FAILED: SemanticException [Error 10057]: MAPJOIN cannot be performed with OUTER JOIN