You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/11/22 02:46:29 UTC
[20/35] hive git commit: HIVE-15178 : ORC stripe merge may produce
many MR jobs and no merge if split size is small (Sergey Shelukhin,
reviewed by Prasanth Jayachandran)
HIVE-15178 : ORC stripe merge may produce many MR jobs and no merge if split size is small (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8a1dcd7e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8a1dcd7e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8a1dcd7e
Branch: refs/heads/hive-14535
Commit: 8a1dcd7e6c1befeb6f0fc38695cf24072570b2a5
Parents: 6efa869
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Nov 18 11:06:39 2016 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Nov 18 11:06:49 2016 -0800
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/mr/ExecDriver.java | 42 +++++++++++---------
.../hadoop/hive/ql/io/merge/MergeFileTask.java | 4 ++
2 files changed, 27 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8a1dcd7e/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
index 9b07e21..34b683c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
@@ -260,25 +260,7 @@ public class ExecDriver extends Task<MapredWork> implements Serializable, Hadoop
throw new RuntimeException(e.getMessage(), e);
}
- if (mWork.getNumMapTasks() != null) {
- job.setNumMapTasks(mWork.getNumMapTasks().intValue());
- }
-
- if (mWork.getMaxSplitSize() != null) {
- HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mWork.getMaxSplitSize().longValue());
- }
-
- if (mWork.getMinSplitSize() != null) {
- HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mWork.getMinSplitSize().longValue());
- }
-
- if (mWork.getMinSplitSizePerNode() != null) {
- HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, mWork.getMinSplitSizePerNode().longValue());
- }
-
- if (mWork.getMinSplitSizePerRack() != null) {
- HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, mWork.getMinSplitSizePerRack().longValue());
- }
+ propagateSplitSettings(job, mWork);
job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0);
job.setReducerClass(ExecReducer.class);
@@ -486,6 +468,28 @@ public class ExecDriver extends Task<MapredWork> implements Serializable, Hadoop
return (returnVal);
}
+ public static void propagateSplitSettings(JobConf job, MapWork work) {
+ if (work.getNumMapTasks() != null) {
+ job.setNumMapTasks(work.getNumMapTasks().intValue());
+ }
+
+ if (work.getMaxSplitSize() != null) {
+ HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, work.getMaxSplitSize().longValue());
+ }
+
+ if (work.getMinSplitSize() != null) {
+ HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work.getMinSplitSize().longValue());
+ }
+
+ if (work.getMinSplitSizePerNode() != null) {
+ HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, work.getMinSplitSizePerNode().longValue());
+ }
+
+ if (work.getMinSplitSizePerRack() != null) {
+ HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, work.getMinSplitSizePerRack().longValue());
+ }
+ }
+
private void handleSampling(Context context, MapWork mWork, JobConf job)
throws Exception {
assert mWork.getAliasToWork().keySet().size() == 1;
http://git-wip-us.apache.org/repos/asf/hive/blob/8a1dcd7e/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java
index 67a6dc7..b3b6431 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.io.merge;
+import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
+
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -108,6 +110,8 @@ public class MergeFileTask extends Task<MergeFileWork> implements Serializable,
fs.mkdirs(tempOutPath);
}
+ ExecDriver.propagateSplitSettings(job, work);
+
// set job name
boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));