You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/06/15 22:48:47 UTC
svn commit: r1350792 [1/2] - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/if/
ql/src/gen/thrift/gen-cpp/
ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/
ql/src/gen/thrift/gen-php/queryplan/ ql/src/gen/thrift/...
Author: namit
Date: Fri Jun 15 20:48:46 2012
New Revision: 1350792
URL: http://svn.apache.org/viewvc?rev=1350792&view=rev
Log:
HIVE-3106 Add option to make multi inserts more atomic
(Kevin Wilfong via namit)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DependencyCollectionTask.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DependencyCollectionWork.java
hive/trunk/ql/src/test/queries/clientpositive/multi_insert_move_tasks_share_dependencies.q
hive/trunk/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/conf/hive-default.xml.template
hive/trunk/ql/if/queryplan.thrift
hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp
hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.h
hive/trunk/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/StageType.java
hive/trunk/ql/src/gen/thrift/gen-php/queryplan/queryplan_types.php
hive/trunk/ql/src/gen/thrift/gen-py/queryplan/ttypes.py
hive/trunk/ql/src/gen/thrift/gen-rb/queryplan_types.rb
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1350792&r1=1350791&r2=1350792&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Fri Jun 15 20:48:46 2012
@@ -586,6 +586,11 @@ public class HiveConf extends Configurat
// beginning and end of Driver.run, these will be run in the order specified
HIVE_DRIVER_RUN_HOOKS("hive.exec.driver.run.hooks", ""),
HIVE_DDL_OUTPUT_FORMAT("hive.ddl.output.format", null),
+
+ // If this is set all move tasks at the end of a multi-insert query will only begin once all
+ // outputs are ready
+ HIVE_MULTI_INSERT_MOVE_TASKS_SHARE_DEPENDENCIES(
+ "hive.multi.insert.move.tasks.share.dependencies", false),
;
public final String varname;
Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1350792&r1=1350791&r2=1350792&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Fri Jun 15 20:48:46 2012
@@ -1306,5 +1306,22 @@
</description>
</property>
+<property>
+ <name>hive.multi.insert.move.tasks.share.dependencies</name>
+ <value>false</value>
+ <description>
+ If this is set all move tasks for tables/partitions (not directories) at the end of a
+ multi-insert query will only begin once the dependencies for all these move tasks have been
+ met.
+ Advantages: If concurrency is enabled, the locks will only be released once the query has
+ finished, so with this config enabled, the time when the table/partition is
+ generated will be much closer to when the lock on it is released.
+ Disadvantages: If concurrency is not enabled, with this disabled, the tables/partitions which
+ are produced by this query and finish earlier will be available for querying
+ much earlier. Since the locks are only released once the query finishes, this
+ does not apply if concurrency is enabled.
+ </description>
+</property>
+
</configuration>
Modified: hive/trunk/ql/if/queryplan.thrift
URL: http://svn.apache.org/viewvc/hive/trunk/ql/if/queryplan.thrift?rev=1350792&r1=1350791&r2=1350792&view=diff
==============================================================================
--- hive/trunk/ql/if/queryplan.thrift (original)
+++ hive/trunk/ql/if/queryplan.thrift Fri Jun 15 20:48:46 2012
@@ -90,6 +90,7 @@ enum StageType {
MAPREDLOCAL,
MOVE,
STATS,
+ DEPENDENCY_COLLECTION,
}
struct Stage {
Modified: hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp?rev=1350792&r1=1350791&r2=1350792&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp (original)
+++ hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.cpp Fri Jun 15 20:48:46 2012
@@ -91,7 +91,8 @@ int _kStageTypeValues[] = {
StageType::FUNC,
StageType::MAPREDLOCAL,
StageType::MOVE,
- StageType::STATS
+ StageType::STATS,
+ StageType::DEPENDENCY_COLLECTION
};
const char* _kStageTypeNames[] = {
"CONDITIONAL",
@@ -103,9 +104,10 @@ const char* _kStageTypeNames[] = {
"FUNC",
"MAPREDLOCAL",
"MOVE",
- "STATS"
+ "STATS",
+ "DEPENDENCY_COLLECTION"
};
-const std::map<int, const char*> _StageType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(10, _kStageTypeValues, _kStageTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
+const std::map<int, const char*> _StageType_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(11, _kStageTypeValues, _kStageTypeNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL));
const char* Adjacency::ascii_fingerprint = "BC4F8C394677A1003AA9F56ED26D8204";
const uint8_t Adjacency::binary_fingerprint[16] = {0xBC,0x4F,0x8C,0x39,0x46,0x77,0xA1,0x00,0x3A,0xA9,0xF5,0x6E,0xD2,0x6D,0x82,0x04};
Modified: hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.h
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.h?rev=1350792&r1=1350791&r2=1350792&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.h (original)
+++ hive/trunk/ql/src/gen/thrift/gen-cpp/queryplan_types.h Fri Jun 15 20:48:46 2012
@@ -79,7 +79,8 @@ struct StageType {
FUNC = 6,
MAPREDLOCAL = 7,
MOVE = 8,
- STATS = 9
+ STATS = 9,
+ DEPENDENCY_COLLECTION = 10
};
};
Modified: hive/trunk/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/StageType.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/StageType.java?rev=1350792&r1=1350791&r2=1350792&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/StageType.java (original)
+++ hive/trunk/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/StageType.java Fri Jun 15 20:48:46 2012
@@ -20,7 +20,8 @@ public enum StageType implements org.apa
FUNC(6),
MAPREDLOCAL(7),
MOVE(8),
- STATS(9);
+ STATS(9),
+ DEPENDENCY_COLLECTION(10);
private final int value;
@@ -61,6 +62,8 @@ public enum StageType implements org.apa
return MOVE;
case 9:
return STATS;
+ case 10:
+ return DEPENDENCY_COLLECTION;
default:
return null;
}
Modified: hive/trunk/ql/src/gen/thrift/gen-php/queryplan/queryplan_types.php
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/thrift/gen-php/queryplan/queryplan_types.php?rev=1350792&r1=1350791&r2=1350792&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/thrift/gen-php/queryplan/queryplan_types.php (original)
+++ hive/trunk/ql/src/gen/thrift/gen-php/queryplan/queryplan_types.php Fri Jun 15 20:48:46 2012
@@ -125,6 +125,7 @@ $GLOBALS['E_StageType'] = array(
'MAPREDLOCAL' => 7,
'MOVE' => 8,
'STATS' => 9,
+ 'DEPENDENCY_COLLECTION' => 10,
);
final class StageType {
@@ -138,6 +139,7 @@ final class StageType {
const MAPREDLOCAL = 7;
const MOVE = 8;
const STATS = 9;
+ const DEPENDENCY_COLLECTION = 10;
static public $__names = array(
0 => 'CONDITIONAL',
1 => 'COPY',
@@ -149,6 +151,7 @@ final class StageType {
7 => 'MAPREDLOCAL',
8 => 'MOVE',
9 => 'STATS',
+ 10 => 'DEPENDENCY_COLLECTION',
);
}
Modified: hive/trunk/ql/src/gen/thrift/gen-py/queryplan/ttypes.py
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/thrift/gen-py/queryplan/ttypes.py?rev=1350792&r1=1350791&r2=1350792&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/thrift/gen-py/queryplan/ttypes.py (original)
+++ hive/trunk/ql/src/gen/thrift/gen-py/queryplan/ttypes.py Fri Jun 15 20:48:46 2012
@@ -132,6 +132,7 @@ class StageType:
MAPREDLOCAL = 7
MOVE = 8
STATS = 9
+ DEPENDENCY_COLLECTION = 10
_VALUES_TO_NAMES = {
0: "CONDITIONAL",
@@ -144,6 +145,7 @@ class StageType:
7: "MAPREDLOCAL",
8: "MOVE",
9: "STATS",
+ 10: "DEPENDENCY_COLLECTION",
}
_NAMES_TO_VALUES = {
@@ -157,6 +159,7 @@ class StageType:
"MAPREDLOCAL": 7,
"MOVE": 8,
"STATS": 9,
+ "DEPENDENCY_COLLECTION": 10,
}
Modified: hive/trunk/ql/src/gen/thrift/gen-rb/queryplan_types.rb
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/thrift/gen-rb/queryplan_types.rb?rev=1350792&r1=1350791&r2=1350792&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/thrift/gen-rb/queryplan_types.rb (original)
+++ hive/trunk/ql/src/gen/thrift/gen-rb/queryplan_types.rb Fri Jun 15 20:48:46 2012
@@ -61,8 +61,9 @@ module StageType
MAPREDLOCAL = 7
MOVE = 8
STATS = 9
- VALUE_MAP = {0 => "CONDITIONAL", 1 => "COPY", 2 => "DDL", 3 => "MAPRED", 4 => "EXPLAIN", 5 => "FETCH", 6 => "FUNC", 7 => "MAPREDLOCAL", 8 => "MOVE", 9 => "STATS"}
- VALID_VALUES = Set.new([CONDITIONAL, COPY, DDL, MAPRED, EXPLAIN, FETCH, FUNC, MAPREDLOCAL, MOVE, STATS]).freeze
+ DEPENDENCY_COLLECTION = 10
+ VALUE_MAP = {0 => "CONDITIONAL", 1 => "COPY", 2 => "DDL", 3 => "MAPRED", 4 => "EXPLAIN", 5 => "FETCH", 6 => "FUNC", 7 => "MAPREDLOCAL", 8 => "MOVE", 9 => "STATS", 10 => "DEPENDENCY_COLLECTION"}
+ VALID_VALUES = Set.new([CONDITIONAL, COPY, DDL, MAPRED, EXPLAIN, FETCH, FUNC, MAPREDLOCAL, MOVE, STATS, DEPENDENCY_COLLECTION]).freeze
end
class Adjacency
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DependencyCollectionTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DependencyCollectionTask.java?rev=1350792&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DependencyCollectionTask.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DependencyCollectionTask.java Fri Jun 15 20:48:46 2012
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import java.io.Serializable;
+
+import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.DriverContext;
+import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork;
+import org.apache.hadoop.hive.ql.plan.api.StageType;
+
+/**
+ * DependencyCollectionTask.
+ *
+ * Exists for the sole purpose of reducing the number of dependency edges in the task graph.
+ **/
+public class DependencyCollectionTask extends Task<DependencyCollectionWork>
+ implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ public DependencyCollectionTask() {
+ super();
+ }
+
+ @Override
+ public int execute(DriverContext driverContext) {
+ return 0;
+ }
+
+ @Override
+ public StageType getType() {
+ return StageType.DEPENDENCY_COLLECTION;
+ }
+
+ @Override
+ public String getName() {
+ return "DEPENDENCY_COLLECTION";
+ }
+
+ @Override
+ protected void localizeMRTmpFilesImpl(Context ctx) {
+ // copy task doesn't have any execution and so
+ // does not use any map-reduce tmp files
+ // we don't expect to enter this code path at all
+ throw new RuntimeException ("Unexpected call");
+ }
+}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java?rev=1350792&r1=1350791&r2=1350792&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java Fri Jun 15 20:48:46 2012
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.io.rcfi
import org.apache.hadoop.hive.ql.plan.ConditionalWork;
import org.apache.hadoop.hive.ql.plan.CopyWork;
import org.apache.hadoop.hive.ql.plan.DDLWork;
+import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork;
import org.apache.hadoop.hive.ql.plan.ExplainWork;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.FunctionWork;
@@ -78,7 +79,8 @@ public final class TaskFactory {
StatsTask.class));
taskvec.add(new taskTuple<MergeWork>(MergeWork.class,
BlockMergeTask.class));
-
+ taskvec.add(new taskTuple<DependencyCollectionWork>(DependencyCollectionWork.class,
+ DependencyCollectionTask.class));
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java?rev=1350792&r1=1350791&r2=1350792&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java Fri Jun 15 20:48:46 2012
@@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.ErrorMs
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
+import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
@@ -110,7 +111,7 @@ public class GenMRFileSink1 implements N
// Has the user enabled merging of files for map-only jobs or for all jobs
if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) {
- List<Task<? extends Serializable>> mvTasks = ctx.getMvTask();
+ List<Task<MoveWork>> mvTasks = ctx.getMvTask();
// In case of unions or map-joins, it is possible that the file has
// already been seen.
@@ -429,12 +430,42 @@ public class GenMRFileSink1 implements N
private void LinkMoveTask(GenMRProcContext ctx, FileSinkOperator newOutput,
ConditionalTask cndTsk) {
- List<Task<? extends Serializable>> mvTasks = ctx.getMvTask();
- Task<? extends Serializable> mvTask = findMoveTask(mvTasks, newOutput);
+ List<Task<MoveWork>> mvTasks = ctx.getMvTask();
+ Task<MoveWork> mvTask = findMoveTask(mvTasks, newOutput);
+
+ for (Task<? extends Serializable> tsk : cndTsk.getListTasks()) {
+ addDependentMoveTasks(ctx, mvTask, tsk);
+ }
+ }
+
+ /**
+ * Adds the dependencyTaskForMultiInsert in ctx as a dependent of parentTask. If mvTask is a
+ * load table, and HIVE_MULTI_INSERT_ATOMIC_OUTPUTS is set, adds mvTask as a dependent of
+ * dependencyTaskForMultiInsert in ctx, otherwise adds mvTask as a dependent of parentTask as
+ * well.
+ * @param ctx
+ * @param mvTask
+ * @param parentTask
+ */
+ private void addDependentMoveTasks(GenMRProcContext ctx, Task<MoveWork> mvTask,
+ Task<? extends Serializable> parentTask) {
if (mvTask != null) {
- for (Task<? extends Serializable> tsk : cndTsk.getListTasks()) {
- tsk.addDependentTask(mvTask);
+ if (ctx.getConf().getBoolVar(
+ HiveConf.ConfVars.HIVE_MULTI_INSERT_MOVE_TASKS_SHARE_DEPENDENCIES)) {
+
+ DependencyCollectionTask dependencyTask = ctx.getDependencyTaskForMultiInsert();
+ parentTask.addDependentTask(dependencyTask);
+ if (mvTask.getWork().getLoadTableWork() != null) {
+ // Moving tables/partitions depend on the dependencyTask
+ dependencyTask.addDependentTask(mvTask);
+ } else {
+ // Moving files depends on the parentTask (we still want the dependencyTask to depend
+ // on the parentTask)
+ parentTask.addDependentTask(mvTask);
+ }
+ } else {
+ parentTask.addDependentTask(mvTask);
}
}
}
@@ -547,11 +578,11 @@ public class GenMRFileSink1 implements N
return cndTsk;
}
- private Task<? extends Serializable> findMoveTask(
- List<Task<? extends Serializable>> mvTasks, FileSinkOperator fsOp) {
+ private Task<MoveWork> findMoveTask(
+ List<Task<MoveWork>> mvTasks, FileSinkOperator fsOp) {
// find the move task
- for (Task<? extends Serializable> mvTsk : mvTasks) {
- MoveWork mvWork = (MoveWork) mvTsk.getWork();
+ for (Task<MoveWork> mvTsk : mvTasks) {
+ MoveWork mvWork = mvTsk.getWork();
String srcDir = null;
if (mvWork.getLoadFileWork() != null) {
srcDir = mvWork.getLoadFileWork().getSourceDir();
@@ -614,7 +645,7 @@ public class GenMRFileSink1 implements N
fsOp.getConf().setDirName(tmpDir);
}
- Task<? extends Serializable> mvTask = null;
+ Task<MoveWork> mvTask = null;
if (!chDir) {
mvTask = findMoveTask(ctx.getMvTask(), fsOp);
@@ -629,7 +660,8 @@ public class GenMRFileSink1 implements N
// Set the move task to be dependent on the current task
if (mvTask != null) {
- currTask.addDependentTask(mvTask);
+
+ addDependentMoveTasks(ctx, mvTask, currTask);
}
// In case of multi-table insert, the path to alias mapping is needed for
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java?rev=1350792&r1=1350791&r2=1350792&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java Fri Jun 15 20:48:46 2012
@@ -27,16 +27,19 @@ import java.util.Set;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.TableDesc;
/**
@@ -236,7 +239,7 @@ public class GenMRProcContext implements
private List<FileSinkOperator> seenFileSinkOps;
private ParseContext parseCtx;
- private List<Task<? extends Serializable>> mvTask;
+ private List<Task<MoveWork>> mvTask;
private List<Task<? extends Serializable>> rootTasks;
private LinkedHashMap<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx;
@@ -246,6 +249,7 @@ public class GenMRProcContext implements
private AbstractMapJoinOperator<? extends MapJoinDesc> currMapJoinOp;
private String currAliasId;
private List<Operator<? extends Serializable>> rootOps;
+ private DependencyCollectionTask dependencyTaskForMultiInsert;
/**
* Set of read entities. This list is generated by the walker and is passed to
@@ -285,7 +289,7 @@ public class GenMRProcContext implements
HiveConf conf,
HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap,
List<Operator<? extends Serializable>> seenOps, ParseContext parseCtx,
- List<Task<? extends Serializable>> mvTask,
+ List<Task<MoveWork>> mvTask,
List<Task<? extends Serializable>> rootTasks,
LinkedHashMap<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx,
Set<ReadEntity> inputs, Set<WriteEntity> outputs) {
@@ -307,6 +311,7 @@ public class GenMRProcContext implements
rootOps.addAll(parseCtx.getTopOps().values());
unionTaskMap = new HashMap<UnionOperator, GenMRUnionCtx>();
mapJoinTaskMap = new HashMap<AbstractMapJoinOperator<? extends MapJoinDesc>, GenMRMapJoinCtx>();
+ dependencyTaskForMultiInsert = null;
}
/**
@@ -388,7 +393,7 @@ public class GenMRProcContext implements
/**
* @return the final move task
*/
- public List<Task<? extends Serializable>> getMvTask() {
+ public List<Task<MoveWork>> getMvTask() {
return mvTask;
}
@@ -396,7 +401,7 @@ public class GenMRProcContext implements
* @param mvTask
* the final move task
*/
- public void setMvTask(List<Task<? extends Serializable>> mvTask) {
+ public void setMvTask(List<Task<MoveWork>> mvTask) {
this.mvTask = mvTask;
}
@@ -544,4 +549,20 @@ public class GenMRProcContext implements
public void setConf(HiveConf conf) {
this.conf = conf;
}
+
+ /**
+ * Returns dependencyTaskForMultiInsert initializing it if necessary.
+ *
+ * dependencyTaskForMultiInsert serves as a mutual dependency for the final move tasks in a
+ * multi-insert query.
+ *
+ * @return
+ */
+ public DependencyCollectionTask getDependencyTaskForMultiInsert() {
+ if (dependencyTaskForMultiInsert == null) {
+ dependencyTaskForMultiInsert =
+ (DependencyCollectionTask) TaskFactory.get(new DependencyCollectionWork(), conf);
+ }
+ return dependencyTaskForMultiInsert;
+ }
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1350792&r1=1350791&r2=1350792&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Jun 15 20:48:46 2012
@@ -27,9 +27,9 @@ import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
-import java.util.Map.Entry;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
@@ -42,6 +42,7 @@ import org.apache.hadoop.hive.common.Fil
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -97,6 +98,7 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1;
import org.apache.hadoop.hive.ql.optimizer.GenMROperator;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext;
+import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1;
import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2;
import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3;
@@ -106,7 +108,6 @@ import org.apache.hadoop.hive.ql.optimiz
import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory;
import org.apache.hadoop.hive.ql.optimizer.Optimizer;
-import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
@@ -127,6 +128,7 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc;
import org.apache.hadoop.hive.ql.plan.ForwardDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
@@ -149,14 +151,13 @@ import org.apache.hadoop.hive.ql.plan.Ta
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.UDTFDesc;
import org.apache.hadoop.hive.ql.plan.UnionDesc;
-import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
@@ -164,16 +165,15 @@ import org.apache.hadoop.hive.serde2.Ser
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.hive.metastore.TableType;
/**
* Implementation of the semantic analyzer.
@@ -6960,7 +6960,7 @@ public class SemanticAnalyzer extends Ba
@SuppressWarnings("nls")
private void genMapRedTasks(QB qb) throws SemanticException {
FetchWork fetch = null;
- List<Task<? extends Serializable>> mvTask = new ArrayList<Task<? extends Serializable>>();
+ List<Task<MoveWork>> mvTask = new ArrayList<Task<MoveWork>>();
FetchTask fetchTask = null;
QBParseInfo qbParseInfo = qb.getParseInfo();
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DependencyCollectionWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DependencyCollectionWork.java?rev=1350792&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DependencyCollectionWork.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DependencyCollectionWork.java Fri Jun 15 20:48:46 2012
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+import java.io.Serializable;
+
+/**
+ * DependencyCollectionWork
+ *
+ */
+@Explain(displayName = "Dependency Collection")
+public class DependencyCollectionWork implements Serializable {
+ private static final long serialVersionUID = 1L;
+
+ public DependencyCollectionWork() {
+
+ }
+}
Added: hive/trunk/ql/src/test/queries/clientpositive/multi_insert_move_tasks_share_dependencies.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/multi_insert_move_tasks_share_dependencies.q?rev=1350792&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/multi_insert_move_tasks_share_dependencies.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/multi_insert_move_tasks_share_dependencies.q Fri Jun 15 20:48:46 2012
@@ -0,0 +1,413 @@
+set hive.multi.insert.move.tasks.share.dependencies=true;
+
+create table src_multi1 like src;
+create table src_multi2 like src;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=false;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=true;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=true;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=false;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+
+
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+explain
+from (select * from src union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from (select * from src union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=false;
+
+explain
+from (select * from src union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from (select * from src union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=true;
+
+explain
+from (select * from src union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from (select * from src union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+
+explain
+from (select * from src union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from (select * from src union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+!rm -fr ${system:test.tmp.dir}/hive_test/multiins_local;
+
+explain
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/4' select * where key = 4;
+
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/4' select * where key = 4;
+
+!ls ${system:test.tmp.dir}/hive_test/multiins_local;
+!rm -fr ${system:test.tmp.dir}/hive_test/multiins_local;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=false;
+
+explain
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/4' select * where key = 4;
+
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/4' select * where key = 4;
+
+!ls ${system:test.tmp.dir}/hive_test/multiins_local;
+!rm -fr ${system:test.tmp.dir}/hive_test/multiins_local;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=true;
+
+
+explain
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/4' select * where key = 4;
+
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/4' select * where key = 4;
+
+!ls ${system:test.tmp.dir}/hive_test/multiins_local;
+!rm -fr ${system:test.tmp.dir}/hive_test/multiins_local;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+
+explain
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/4' select * where key = 4;
+
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/4' select * where key = 4;
+
+!ls ${system:test.tmp.dir}/hive_test/multiins_local;
+!rm -fr ${system:test.tmp.dir}/hive_test/multiins_local;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+explain
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+!ls ${system:test.tmp.dir}/hive_test/multiins_local;
+!rm -fr ${system:test.tmp.dir}/hive_test/multiins_local;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=false;
+
+explain
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+!ls ${system:test.tmp.dir}/hive_test/multiins_local;
+!rm -fr ${system:test.tmp.dir}/hive_test/multiins_local;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=true;
+
+explain
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+!ls ${system:test.tmp.dir}/hive_test/multiins_local;
+!rm -fr ${system:test.tmp.dir}/hive_test/multiins_local;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+
+explain
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+from src
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+!ls ${system:test.tmp.dir}/hive_test/multiins_local;
+!rm -fr ${system:test.tmp.dir}/hive_test/multiins_local;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+!ls ${system:test.tmp.dir}/hive_test/multiins_local;
+!rm -fr ${system:test.tmp.dir}/hive_test/multiins_local;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=true;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+!ls ${system:test.tmp.dir}/hive_test/multiins_local;
+!rm -fr ${system:test.tmp.dir}/hive_test/multiins_local;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=false;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+!ls ${system:test.tmp.dir}/hive_test/multiins_local;
+!rm -fr ${system:test.tmp.dir}/hive_test/multiins_local;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/1' select * where key < 10 group by key, value cluster by key
+insert overwrite local directory '${system:test.tmp.dir}/hive_test/multiins_local/2' select * where key > 10 and key < 20 group by key, value cluster by value;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+!ls ${system:test.tmp.dir}/hive_test/multiins_local;
+!rm -fr ${system:test.tmp.dir}/hive_test/multiins_local;