You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/04/24 03:33:07 UTC
svn commit: r1471224 [2/8] - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/ conf/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/ ql/sr...
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java?rev=1471224&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java Wed Apr 24 01:33:05 2013
@@ -0,0 +1,455 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.Serializable;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.exec.ConditionalTask;
+import org.apache.hadoop.hive.ql.exec.DummyStoreOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor;
+import org.apache.hadoop.hive.ql.parse.OpParseContext;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.QBJoinTree;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ConditionalResolverCommonJoin;
+import org.apache.hadoop.hive.ql.plan.ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx;
+import org.apache.hadoop.hive.ql.plan.ConditionalWork;
+import org.apache.hadoop.hive.ql.plan.FetchWork;
+import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
+
+/**
+ * Iterator over each task. If the task has a smb join, convert the task to a conditional task.
+ * The conditional task will first try all mapjoin possibilities, and go the the smb join if the
+ * mapjoin fails. The smb join will be a backup task for all the mapjoin tasks.
+ */
+public class SortMergeJoinTaskDispatcher extends AbstractJoinTaskDispatcher implements Dispatcher {
+
+ public SortMergeJoinTaskDispatcher(PhysicalContext context) {
+ super(context);
+ }
+
+ // Convert the work in the SMB plan to a regular join
+ // Note that the operator tree is not fixed, only the path/alias mappings in the
+ // plan are fixed. The operator tree will still contain the SMBJoinOperator
+ private void genSMBJoinWork(MapredWork currWork, SMBMapJoinOperator smbJoinOp) {
+ // Remove the paths which are not part of aliasToPartitionInfo
+ Map<String, PartitionDesc> aliasToPartitionInfo = currWork.getAliasToPartnInfo();
+ List<String> removePaths = new ArrayList<String>();
+
+ for (Map.Entry<String, ArrayList<String>> entry : currWork.getPathToAliases().entrySet()) {
+ boolean keepPath = false;
+ for (String alias : entry.getValue()) {
+ if (aliasToPartitionInfo.containsKey(alias)) {
+ keepPath = true;
+ break;
+ }
+ }
+
+ // Remove if the path is not present
+ if (!keepPath) {
+ removePaths.add(entry.getKey());
+ }
+ }
+
+ List<String> removeAliases = new ArrayList<String>();
+ for (String removePath : removePaths) {
+ removeAliases.addAll(currWork.getPathToAliases().get(removePath));
+ currWork.getPathToAliases().remove(removePath);
+ currWork.getPathToPartitionInfo().remove(removePath);
+ }
+
+ for (String alias : removeAliases) {
+ currWork.getAliasToPartnInfo().remove(alias);
+ currWork.getAliasToWork().remove(alias);
+ }
+
+ // Get the MapredLocalWork
+ MapredLocalWork localWork = smbJoinOp.getConf().getLocalWork();
+
+ for (Map.Entry<String, Operator<? extends OperatorDesc>> entry : localWork.getAliasToWork()
+ .entrySet()) {
+ String alias = entry.getKey();
+ Operator<? extends OperatorDesc> op = entry.getValue();
+ FetchWork fetchWork = localWork.getAliasToFetchWork().get(alias);
+
+ // Add the entry in mapredwork
+ currWork.getAliasToWork().put(alias, op);
+
+ PartitionDesc partitionInfo = currWork.getAliasToPartnInfo().get(alias);
+ if (fetchWork.getTblDir() != null) {
+ ArrayList<String> aliases = new ArrayList<String>();
+ aliases.add(alias);
+ currWork.getPathToAliases().put(fetchWork.getTblDir(), aliases);
+ currWork.getPathToPartitionInfo().put(fetchWork.getTblDir(), partitionInfo);
+ }
+ else {
+ for (String pathDir : fetchWork.getPartDir()) {
+ ArrayList<String> aliases = new ArrayList<String>();
+ aliases.add(alias);
+ currWork.getPathToAliases().put(pathDir, aliases);
+ currWork.getPathToPartitionInfo().put(pathDir, partitionInfo);
+ }
+ }
+ }
+
+ // Remove the dummy store operator from the tree
+ for (Operator<? extends OperatorDesc> parentOp : smbJoinOp.getParentOperators()) {
+ if (parentOp instanceof DummyStoreOperator) {
+ Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0);
+ smbJoinOp.replaceParent(parentOp, grandParentOp);
+ grandParentOp.setChildOperators(parentOp.getChildOperators());
+ parentOp.setParentOperators(null);
+ parentOp.setParentOperators(null);
+ }
+ }
+ }
+
+ /*
+ * Convert the work containing to sort-merge join into a work, as if it had a regular join.
+ * Note that the operator tree is not changed - is still contains the SMB join, but the
+ * plan is changed (aliasToWork etc.) to contain all the paths as if it was a regular join.
+ */
+ private MapredWork convertSMBWorkToJoinWork(MapredWork currWork, SMBMapJoinOperator oldSMBJoinOp)
+ throws SemanticException {
+ try {
+ String xml = currWork.toXML();
+
+ // deep copy a new mapred work
+ InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8"));
+ MapredWork currJoinWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf());
+ SMBMapJoinOperator newSMBJoinOp = getSMBMapJoinOp(currJoinWork);
+
+ // Add the row resolver for the new operator
+ Map<Operator<? extends OperatorDesc>, OpParseContext> opParseContextMap =
+ physicalContext.getParseContext().getOpParseCtx();
+ opParseContextMap.put(newSMBJoinOp, opParseContextMap.get(oldSMBJoinOp));
+ // change the newly created map-red plan as if it was a join operator
+ genSMBJoinWork(currJoinWork, newSMBJoinOp);
+ return currJoinWork;
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new SemanticException("Generate Map Join Task Error: " + e.getMessage());
+ }
+ }
+
+ // create map join task and set big table as bigTablePosition
+ private ObjectPair<MapRedTask, String> convertSMBTaskToMapJoinTask(String xml,
+ int bigTablePosition,
+ SMBMapJoinOperator smbJoinOp,
+ QBJoinTree joinTree)
+ throws UnsupportedEncodingException, SemanticException {
+ // deep copy a new mapred work from xml
+ InputStream in = new ByteArrayInputStream(xml.getBytes("UTF-8"));
+ MapredWork newWork = Utilities.deserializeMapRedWork(in, physicalContext.getConf());
+ // create a mapred task for this work
+ MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork, physicalContext
+ .getParseContext().getConf());
+ // generate the map join operator; already checked the map join
+ MapJoinOperator newMapJoinOp =
+ getMapJoinOperator(newTask, newWork, smbJoinOp, joinTree, bigTablePosition);
+ // The reducer needs to be restored - Consider a query like:
+ // select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+ // The reducer contains a groupby, which needs to be restored.
+ Operator<? extends OperatorDesc> reducer = newWork.getReducer();
+
+ // create the local work for this plan
+ String bigTableAlias =
+ MapJoinProcessor.genLocalWorkForMapJoin(newWork, newMapJoinOp, bigTablePosition);
+
+ // restore the reducer
+ newWork.setReducer(reducer);
+ return new ObjectPair<MapRedTask, String>(newTask, bigTableAlias);
+ }
+
+ private boolean isEligibleForOptimization(SMBMapJoinOperator originalSMBJoinOp) {
+ if (originalSMBJoinOp == null) {
+ return false;
+ }
+
+ // Only create a map-join if the user explicitly gave a join (without a mapjoin hint)
+ if (!originalSMBJoinOp.isConvertedAutomaticallySMBJoin()) {
+ return false;
+ }
+
+ Operator<? extends OperatorDesc> currOp = originalSMBJoinOp;
+ while (true) {
+ if (currOp.getChildOperators() == null) {
+ if (currOp instanceof FileSinkOperator) {
+ FileSinkOperator fsOp = (FileSinkOperator)currOp;
+ // The query has enforced that a sort-merge join should be performed.
+ // For more details, look at 'removedReduceSinkBucketSort' in FileSinkDesc.java
+ return !fsOp.getConf().isRemovedReduceSinkBucketSort();
+ }
+
+ // If it contains a reducer, the optimization is always on.
+ // Since there exists a reducer, the sorting/bucketing properties due to the
+ // sort-merge join operator are lost anyway. So, the plan cannot be wrong by
+ // changing the sort-merge join to a map-join
+ if (currOp instanceof ReduceSinkOperator) {
+ return true;
+ }
+ return false;
+ }
+
+ if (currOp.getChildOperators().size() > 1) {
+ return true;
+ }
+
+ currOp = currOp.getChildOperators().get(0);
+ }
+ }
+
+ @Override
+ public Task<? extends Serializable> processCurrentTask(MapRedTask currTask,
+ ConditionalTask conditionalTask, Context context)
+ throws SemanticException {
+
+ // whether it contains a sort merge join operator
+ MapredWork currWork = currTask.getWork();
+ SMBMapJoinOperator originalSMBJoinOp = getSMBMapJoinOp(currWork);
+ if (!isEligibleForOptimization(originalSMBJoinOp)) {
+ return null;
+ }
+
+ currTask.setTaskTag(Task.CONVERTED_SORTMERGEJOIN);
+
+ // get parseCtx for this Join Operator
+ ParseContext parseCtx = physicalContext.getParseContext();
+ QBJoinTree joinTree = parseCtx.getSmbMapJoinContext().get(originalSMBJoinOp);
+
+ // Convert the work containing to sort-merge join into a work, as if it had a regular join.
+ // Note that the operator tree is not changed - is still contains the SMB join, but the
+ // plan is changed (aliasToWork etc.) to contain all the paths as if it was a regular join.
+ // This is used to convert the plan to a map-join, and then the original SMB join plan is used
+ // as a backup task.
+ MapredWork currJoinWork = convertSMBWorkToJoinWork(currWork, originalSMBJoinOp);
+ SMBMapJoinOperator newSMBJoinOp = getSMBMapJoinOp(currJoinWork);
+
+ currWork.setOpParseCtxMap(parseCtx.getOpParseCtx());
+ currWork.setJoinTree(joinTree);
+ currJoinWork.setOpParseCtxMap(parseCtx.getOpParseCtx());
+ currJoinWork.setJoinTree(joinTree);
+
+ // create conditional work list and task list
+ List<Serializable> listWorks = new ArrayList<Serializable>();
+ List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
+
+ // create alias to task mapping and alias to input file mapping for resolver
+ HashMap<String, Task<? extends Serializable>> aliasToTask =
+ new HashMap<String, Task<? extends Serializable>>();
+ // Note that pathToAlias will behave as if the original plan was a join plan
+ HashMap<String, ArrayList<String>> pathToAliases = currJoinWork.getPathToAliases();
+
+ // generate a map join task for the big table
+ SMBJoinDesc originalSMBJoinDesc = originalSMBJoinOp.getConf();
+ Byte[] order = originalSMBJoinDesc.getTagOrder();
+ int numAliases = order.length;
+ Set<Integer> bigTableCandidates =
+ MapJoinProcessor.getBigTableCandidates(originalSMBJoinDesc.getConds());
+
+ // no table could be the big table; there is no need to convert
+ if (bigTableCandidates == null) {
+ return null;
+ }
+
+ HashMap<String, Long> aliasToSize = new HashMap<String, Long>();
+ Configuration conf = context.getConf();
+ try {
+ long aliasTotalKnownInputSize = getTotalKnownInputSize(context, currJoinWork,
+ pathToAliases, aliasToSize);
+
+ String xml = currJoinWork.toXML();
+ long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(conf,
+ HiveConf.ConfVars.HIVESMALLTABLESFILESIZE);
+
+ for (int bigTablePosition = 0; bigTablePosition < numAliases; bigTablePosition++) {
+ // this table cannot be big table
+ if (!bigTableCandidates.contains(bigTablePosition)) {
+ continue;
+ }
+
+ // create map join task for the given big table position
+ ObjectPair<MapRedTask, String> newTaskAlias =
+ convertSMBTaskToMapJoinTask(xml, bigTablePosition, newSMBJoinOp, joinTree);
+ MapRedTask newTask = newTaskAlias.getFirst();
+ String bigTableAlias = newTaskAlias.getSecond();
+
+ Long aliasKnownSize = aliasToSize.get(bigTableAlias);
+ if (aliasKnownSize != null && aliasKnownSize.longValue() > 0) {
+ long smallTblTotalKnownSize = aliasTotalKnownInputSize
+ - aliasKnownSize.longValue();
+ if (smallTblTotalKnownSize > ThresholdOfSmallTblSizeSum) {
+ // this table is not good to be a big table.
+ continue;
+ }
+ }
+
+ // add into conditional task
+ listWorks.add(newTask.getWork());
+ listTasks.add(newTask);
+ newTask.setTaskTag(Task.CONVERTED_MAPJOIN);
+
+ // set up backup task
+ newTask.setBackupTask(currTask);
+ newTask.setBackupChildrenTasks(currTask.getChildTasks());
+
+ // put the mapping alias to task
+ aliasToTask.put(bigTableAlias, newTask);
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new SemanticException("Generate Map Join Task Error: ", e);
+ }
+
+ // insert current common join task to conditional task
+ listWorks.add(currTask.getWork());
+ listTasks.add(currTask);
+ // clear JoinTree and OP Parse Context
+ currWork.setOpParseCtxMap(null);
+ currWork.setJoinTree(null);
+
+ // create conditional task and insert conditional task into task tree
+ ConditionalWork cndWork = new ConditionalWork(listWorks);
+ ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, parseCtx.getConf());
+ cndTsk.setListTasks(listTasks);
+
+ // set resolver and resolver context
+ cndTsk.setResolver(new ConditionalResolverCommonJoin());
+ ConditionalResolverCommonJoinCtx resolverCtx = new ConditionalResolverCommonJoinCtx();
+ resolverCtx.setPathToAliases(pathToAliases);
+ resolverCtx.setAliasToKnownSize(aliasToSize);
+ resolverCtx.setAliasToTask(aliasToTask);
+ resolverCtx.setCommonJoinTask(currTask);
+ resolverCtx.setLocalTmpDir(context.getLocalScratchDir(false));
+ resolverCtx.setHdfsTmpDir(context.getMRScratchDir());
+ cndTsk.setResolverCtx(resolverCtx);
+
+ // replace the current task with the new generated conditional task
+ replaceTaskWithConditionalTask(currTask, cndTsk, physicalContext);
+ return cndTsk;
+ }
+
+ /**
+ * If a join/union is followed by a SMB join, this cannot be converted to a conditional task.
+ */
+ private boolean reducerAllowedSMBJoinOp(Operator<? extends OperatorDesc> reducer) {
+ while (reducer != null) {
+ if (!reducer.opAllowedBeforeSortMergeJoin()) {
+ return false;
+ }
+
+ List<Operator<? extends OperatorDesc>> childOps = reducer.getChildOperators();
+ if ((childOps == null) || (childOps.isEmpty())) {
+ return true;
+ }
+
+ // multi-table inserts not supported
+ if (childOps.size() > 1) {
+ return false;
+ }
+ reducer = childOps.get(0);
+ }
+
+ return true;
+ }
+
+ private SMBMapJoinOperator getSMBMapJoinOp(Operator<? extends OperatorDesc> currOp,
+ Operator<? extends OperatorDesc> reducer) {
+ SMBMapJoinOperator ret = null;
+ while (true) {
+ if (currOp instanceof SMBMapJoinOperator) {
+ if (ret != null) {
+ return null;
+ }
+ ret = (SMBMapJoinOperator) currOp;
+ }
+
+ // Does any operator in the tree stop the task from being converted to a conditional task
+ if (!currOp.opAllowedBeforeSortMergeJoin()) {
+ return null;
+ }
+
+ List<Operator<? extends OperatorDesc>> childOps = currOp.getChildOperators();
+ if ((childOps == null) || (childOps.isEmpty())) {
+ return reducerAllowedSMBJoinOp(reducer) ? ret : null;
+ }
+
+ // multi-table inserts not supported
+ if (childOps.size() > 1) {
+ return null;
+ }
+ currOp = childOps.get(0);
+ }
+ }
+
+ private SMBMapJoinOperator getSMBMapJoinOp(MapredWork work) throws SemanticException {
+ if (work != null) {
+ Operator<? extends OperatorDesc> reducer = work.getReducer();
+ for (Operator<? extends OperatorDesc> op : work.getAliasToWork().values()) {
+ SMBMapJoinOperator smbMapJoinOp = getSMBMapJoinOp(op, reducer);
+ if (smbMapJoinOp != null) {
+ return smbMapJoinOp;
+ }
+ }
+ }
+ return null;
+ }
+
+ private MapJoinOperator getMapJoinOperator(MapRedTask task,
+ MapredWork work,
+ SMBMapJoinOperator oldSMBJoinOp,
+ QBJoinTree joinTree,
+ int mapJoinPos) throws SemanticException {
+ SMBMapJoinOperator newSMBJoinOp = getSMBMapJoinOp(task.getWork());
+
+ // Add the row resolver for the new operator
+ Map<Operator<? extends OperatorDesc>, OpParseContext> opParseContextMap =
+ physicalContext.getParseContext().getOpParseCtx();
+ opParseContextMap.put(newSMBJoinOp, opParseContextMap.get(oldSMBJoinOp));
+
+ // generate the map join operator
+ return MapJoinProcessor.convertSMBJoinToMapJoin(opParseContextMap, newSMBJoinOp,
+ joinTree, mapJoinPos, true);
+ }
+}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java Wed Apr 24 01:33:05 2013
@@ -71,7 +71,7 @@ public class ConditionalResolverCommonJo
}
public HashMap<String, Long> getAliasToKnownSize() {
- return aliasToKnownSize;
+ return aliasToKnownSize == null ? new HashMap<String, Long>() : aliasToKnownSize;
}
public void setAliasToKnownSize(HashMap<String, Long> aliasToKnownSize) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java Wed Apr 24 01:33:05 2013
@@ -50,6 +50,16 @@ public class FileSinkDesc extends Abstra
private String staticSpec; // static partition spec ends with a '/'
private boolean gatherStats;
+ // Consider a query like:
+ // insert overwrite table T3 select ... from T1 join T2 on T1.key = T2.key;
+ // where T1, T2 and T3 are sorted and bucketed by key into the same number of buckets,
+ // We dont need a reducer to enforce bucketing and sorting for T3.
+ // The field below captures the fact that the reducer introduced to enforce sorting/
+ // bucketing of T3 has been removed.
+ // In this case, a sort-merge join is needed, and so the sort-merge join between T1 and T2
+ // cannot be performed as a map-only job
+ private transient boolean removedReduceSinkBucketSort;
+
// This file descriptor is linked to other file descriptors.
// One use case is that, a union->select (star)->file sink, is broken down.
// For eg: consider a query like:
@@ -364,4 +374,11 @@ public class FileSinkDesc extends Abstra
this.statsCollectRawDataSize = statsCollectRawDataSize;
}
+ public boolean isRemovedReduceSinkBucketSort() {
+ return removedReduceSinkBucketSort;
+ }
+
+ public void setRemovedReduceSinkBucketSort(boolean removedReduceSinkBucketSort) {
+ this.removedReduceSinkBucketSort = removedReduceSinkBucketSort;
+ }
}
Added: hive/trunk/ql/src/test/queries/clientnegative/auto_sortmerge_join_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/auto_sortmerge_join_1.q?rev=1471224&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientnegative/auto_sortmerge_join_1.q (added)
+++ hive/trunk/ql/src/test/queries/clientnegative/auto_sortmerge_join_1.q Wed Apr 24 01:33:05 2013
@@ -0,0 +1,25 @@
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 1;
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+insert overwrite table tbl1 select * from src where key < 20;
+insert overwrite table tbl2 select * from src where key < 10;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.join=true;
+
+-- Since tbl1 is the bigger table, tbl1 Left Outer Join tbl2 can be performed
+explain
+select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key;
+
+-- Since tbl1 is the bigger table, tbl1 Right Outer Join tbl2 cannot be performed
+explain
+select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key;
Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q Wed Apr 24 01:33:05 2013
@@ -1,6 +1,7 @@
-- small 1 part, 2 bucket & big 2 part, 4 bucket
-CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE bucket_small (key string, value string) partitioned by (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
load data local inpath '../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
load data local inpath '../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
@@ -20,6 +21,7 @@ set hive.auto.convert.sortmerge.join=tru
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
@@ -28,3 +30,7 @@ select count(*) FROM bucket_small a JOIN
explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q Wed Apr 24 01:33:05 2013
@@ -17,7 +17,7 @@ set hive.optimize.bucketmapjoin.sortedme
set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.auto.convert.sortmerge.join=true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
-- One of the subqueries contains a union, so it should not be converted to a sort-merge join.
explain
select count(*) from
Added: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q?rev=1471224&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q Wed Apr 24 01:33:05 2013
@@ -0,0 +1,81 @@
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 1;
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+insert overwrite table tbl1 select * from src where key < 10;
+insert overwrite table tbl2 select * from src where key < 10;
+
+CREATE TABLE dest1(k1 int, k2 int);
+CREATE TABLE dest2(k1 string, k2 string);
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.join=true;
+
+-- A SMB join followed by a mutli-insert
+explain
+from (
+ SELECT a.key key1, a.value value1, b.key key2, b.value value2
+ FROM tbl1 a JOIN tbl2 b
+ ON a.key = b.key ) subq
+INSERT OVERWRITE TABLE dest1 select key1, key2
+INSERT OVERWRITE TABLE dest2 select value1, value2;
+
+from (
+ SELECT a.key key1, a.value value1, b.key key2, b.value value2
+ FROM tbl1 a JOIN tbl2 b
+ ON a.key = b.key ) subq
+INSERT OVERWRITE TABLE dest1 select key1, key2
+INSERT OVERWRITE TABLE dest2 select value1, value2;
+
+select * from dest1 order by k1, k2;
+select * from dest2 order by k1, k2;
+
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=200;
+
+-- A SMB join followed by a mutli-insert
+explain
+from (
+ SELECT a.key key1, a.value value1, b.key key2, b.value value2
+ FROM tbl1 a JOIN tbl2 b
+ ON a.key = b.key ) subq
+INSERT OVERWRITE TABLE dest1 select key1, key2
+INSERT OVERWRITE TABLE dest2 select value1, value2;
+
+from (
+ SELECT a.key key1, a.value value1, b.key key2, b.value value2
+ FROM tbl1 a JOIN tbl2 b
+ ON a.key = b.key ) subq
+INSERT OVERWRITE TABLE dest1 select key1, key2
+INSERT OVERWRITE TABLE dest2 select value1, value2;
+
+select * from dest1 order by k1, k2;
+select * from dest2 order by k1, k2;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+-- A SMB join followed by a mutli-insert
+explain
+from (
+ SELECT a.key key1, a.value value1, b.key key2, b.value value2
+ FROM tbl1 a JOIN tbl2 b
+ ON a.key = b.key ) subq
+INSERT OVERWRITE TABLE dest1 select key1, key2
+INSERT OVERWRITE TABLE dest2 select value1, value2;
+
+from (
+ SELECT a.key key1, a.value value1, b.key key2, b.value value2
+ FROM tbl1 a JOIN tbl2 b
+ ON a.key = b.key ) subq
+INSERT OVERWRITE TABLE dest1 select key1, key2
+INSERT OVERWRITE TABLE dest2 select value1, value2;
+
+select * from dest1 order by k1, k2;
+select * from dest2 order by k1, k2;
Added: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q?rev=1471224&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q Wed Apr 24 01:33:05 2013
@@ -0,0 +1,29 @@
+set hive.enforce.bucketing = true;
+set hive.enforce.sorting = true;
+set hive.exec.reducers.max = 1;
+
+CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+insert overwrite table tbl1 select * from src where key < 20;
+insert overwrite table tbl2 select * from src where key < 10;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.auto.convert.join=true;
+
+-- Since tbl1 is the bigger table, tbl1 Left Outer Join tbl2 can be performed
+explain
+select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key;
+select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key;
+
+insert overwrite table tbl2 select * from src where key < 200;
+
+-- Since tbl2 is the bigger table, tbl1 Right Outer Join tbl2 can be performed
+explain
+select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key;
+select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key;
Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q Wed Apr 24 01:33:05 2013
@@ -16,9 +16,17 @@ set hive.auto.convert.join=true;
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
-- Since the leftmost table is assumed as the big table, arrange the tables in the join accordingly
explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+set hive.mapjoin.localtask.max.memory.usage = 0.0001;
+set hive.mapjoin.check.memory.rows = 2;
+
+-- The mapjoin should fail resulting in the sort-merge join
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q Wed Apr 24 01:33:05 2013
@@ -16,7 +16,7 @@ set hive.auto.convert.join=true;
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
@@ -25,3 +25,7 @@ select count(*) FROM bucket_small a JOIN
explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q Wed Apr 24 01:33:05 2013
@@ -18,7 +18,7 @@ set hive.auto.convert.join=true;
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
@@ -27,3 +27,7 @@ select count(*) FROM bucket_small a JOIN
explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_5.q Wed Apr 24 01:33:05 2013
@@ -12,7 +12,7 @@ load data local inpath '../data/files/sr
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
@@ -21,3 +21,8 @@ select count(*) FROM bucket_small a JOIN
explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+set hive.auto.convert.join=true;
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_6.q Wed Apr 24 01:33:05 2013
@@ -18,6 +18,24 @@ set hive.optimize.bucketmapjoin.sortedme
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=200;
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
+-- A SMB join is being followed by a regular join on a non-bucketed table on a different key
+explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value;
+select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value;
+
+-- A SMB join is being followed by a regular join on a non-bucketed table on the same key
+explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key;
+select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.key = a.key;
+
+-- A SMB join is being followed by a regular join on a bucketed table on the same key
+explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key;
+select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl3 c on c.key = a.key;
+
+-- A SMB join is being followed by a regular join on a bucketed table on a different key
+explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value;
+select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join tbl4 c on c.value = a.value;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
-- A SMB join is being followed by a regular join on a non-bucketed table on a different key
explain select count(*) FROM tbl1 a JOIN tbl2 b ON a.key = b.key join src c on c.value = a.value;
Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q Wed Apr 24 01:33:05 2013
@@ -21,7 +21,7 @@ set hive.auto.convert.join=true;
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
@@ -30,3 +30,7 @@ select count(*) FROM bucket_small a JOIN
explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q Wed Apr 24 01:33:05 2013
@@ -21,7 +21,7 @@ set hive.auto.convert.join=true;
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
@@ -30,3 +30,11 @@ select count(*) FROM bucket_small a JOIN
explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+set hive.mapjoin.localtask.max.memory.usage = 0.0001;
+set hive.mapjoin.check.memory.rows = 2;
+
+-- The mapjoin should fail resulting in the sort-merge join
+explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
+select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
Modified: hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q Wed Apr 24 01:33:05 2013
@@ -15,7 +15,7 @@ set hive.auto.convert.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join=true;
-
+set hive.auto.convert.sortmerge.join.to.mapjoin=false;
-- The join is being performed as part of sub-query. It should be converted to a sort-merge join
explain
select count(*) from (
@@ -281,3 +281,256 @@ select count(*) from (
) subq2
join tbl2 b
on subq2.key = b.key) a;
+
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
+-- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1;
+
+select count(*) from (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1;
+
+-- The join is being performed as part of sub-query. It should be converted to a sort-merge join
+-- Add a order by at the end to make the results deterministic.
+explain
+select key, count(*) from
+(
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+group by key
+order by key;
+
+select key, count(*) from
+(
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+) subq1
+group by key
+order by key;
+
+-- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join
+explain
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2;
+
+select count(*) from
+(
+ select key, count(*) from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1
+ group by key
+) subq2;
+
+-- A join is being performed across different sub-queries, where a join is being performed in each of them.
+-- Each sub-query should be converted to a sort-merge join.
+explain
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+select src1.key, src1.cnt1, src2.cnt1 from
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq1 group by key
+) src1
+join
+(
+ select key, count(*) as cnt1 from
+ (
+ select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key
+ ) subq2 group by key
+) src2
+on src1.key = src2.key
+order by src1.key, src1.cnt1, src2.cnt1;
+
+-- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key;
+
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key;
+
+-- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should
+-- be converted to a sort-merge join, although there is more than one level of sub-query
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key;
+
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join tbl2 b
+ on subq2.key = b.key;
+
+-- Both the tables are nested sub-queries i.e more then 1 level of sub-query.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key;
+
+select count(*) from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+ join
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq3
+ where key < 6
+ ) subq4
+ on subq2.key = subq4.key;
+
+-- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key
+-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one
+-- item, but that is not part of the join key.
+explain
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key;
+
+select count(*) from
+ (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1
+ join
+ (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2
+ on subq1.key = subq2.key;
+
+-- The left table is a sub-query and the right table is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key;
+
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join tbl2 a on subq1.key = a.key;
+
+-- The right table is a sub-query and the left table is not.
+-- It should be converted to a sort-merge join.
+explain
+select count(*) from tbl1 a
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq1
+ on a.key = subq1.key;
+
+select count(*) from tbl1 a
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq1
+ on a.key = subq1.key;
+
+-- There are more than 2 inputs to the join, all of them being sub-queries.
+-- It should be converted to to a sort-merge join
+explain
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on (subq1.key = subq2.key)
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key);
+
+select count(*) from
+ (select a.key as key, a.value as value from tbl1 a where key < 6) subq1
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq2
+ on subq1.key = subq2.key
+ join
+ (select a.key as key, a.value as value from tbl2 a where key < 6) subq3
+ on (subq1.key = subq3.key);
+
+-- The join is being performed on a nested sub-query, and an aggregation is performed after that.
+-- The join should be converted to a sort-merge join
+explain
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a;
+
+select count(*) from (
+ select subq2.key as key, subq2.value as value1, b.value as value2 from
+ (
+ select * from
+ (
+ select a.key as key, a.value as value from tbl1 a where key < 8
+ ) subq1
+ where key < 6
+ ) subq2
+join tbl2 b
+on subq2.key = b.key) a;
Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_1.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_1.q Wed Apr 24 01:33:05 2013
@@ -6,6 +6,8 @@ set hive.exec.reducers.max = 1;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
-- Create two bucketed and sorted tables
CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q Wed Apr 24 01:33:05 2013
@@ -9,6 +9,8 @@ set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
-- Create two bucketed and sorted tables
CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q Wed Apr 24 01:33:05 2013
@@ -6,6 +6,8 @@ set hive.exec.reducers.max = 1;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
-- Create two bucketed and sorted tables
CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_4.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_4.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_4.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_4.q Wed Apr 24 01:33:05 2013
@@ -9,6 +9,8 @@ set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
-- Create two bucketed and sorted tables
CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_5.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_5.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_5.q Wed Apr 24 01:33:05 2013
@@ -9,6 +9,8 @@ set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
-- Create two bucketed and sorted tables
CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q Wed Apr 24 01:33:05 2013
@@ -9,6 +9,8 @@ set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
-- Create two bucketed and sorted tables
CREATE TABLE test_table1 (key INT, key2 INT, value STRING) PARTITIONED BY (ds STRING)
CLUSTERED BY (key, key2) SORTED BY (key ASC, key2 DESC) INTO 2 BUCKETS;
Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q Wed Apr 24 01:33:05 2013
@@ -9,6 +9,8 @@ set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
-- Create two bucketed and sorted tables
CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_8.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_8.q?rev=1471224&r1=1471223&r2=1471224&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_8.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_8.q Wed Apr 24 01:33:05 2013
@@ -9,6 +9,8 @@ set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+
-- Create two bucketed and sorted tables
CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
Added: hive/trunk/ql/src/test/results/clientnegative/auto_sortmerge_join_1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/auto_sortmerge_join_1.q.out?rev=1471224&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/auto_sortmerge_join_1.q.out (added)
+++ hive/trunk/ql/src/test/results/clientnegative/auto_sortmerge_join_1.q.out Wed Apr 24 01:33:05 2013
@@ -0,0 +1,184 @@
+PREHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@tbl1
+PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@tbl2
+PREHOOK: query: insert overwrite table tbl1 select * from src where key < 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: insert overwrite table tbl1 select * from src where key < 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl1
+POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table tbl2 select * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: insert overwrite table tbl2 select * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tbl2
+POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Since tbl1 is the bigger table, tbl1 Left Outer Join tbl2 can be performed
+explain
+select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Since tbl1 is the bigger table, tbl1 Left Outer Join tbl2 can be performed
+explain
+select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF (TOK_TABNAME tbl1) a) (TOK_TABREF (TOK_TABNAME tbl2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage , consists of Stage-5, Stage-1
+ Stage-5 has a backup stage: Stage-1
+ Stage-3 depends on stages: Stage-5
+ Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-4
+ Conditional Operator
+
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ b
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ b
+ TableScan
+ alias: b
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+FAILED: SemanticException [Error 10057]: MAPJOIN cannot be performed with OUTER JOIN