You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/11/22 02:46:29 UTC

[20/35] hive git commit: HIVE-15178 : ORC stripe merge may produce many MR jobs and no merge if split size is small (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

HIVE-15178 : ORC stripe merge may produce many MR jobs and no merge if split size is small (Sergey Shelukhin, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8a1dcd7e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8a1dcd7e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8a1dcd7e

Branch: refs/heads/hive-14535
Commit: 8a1dcd7e6c1befeb6f0fc38695cf24072570b2a5
Parents: 6efa869
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Nov 18 11:06:39 2016 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Nov 18 11:06:49 2016 -0800

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/mr/ExecDriver.java      | 42 +++++++++++---------
 .../hadoop/hive/ql/io/merge/MergeFileTask.java  |  4 ++
 2 files changed, 27 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8a1dcd7e/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
index 9b07e21..34b683c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
@@ -260,25 +260,7 @@ public class ExecDriver extends Task<MapredWork> implements Serializable, Hadoop
       throw new RuntimeException(e.getMessage(), e);
     }
 
-    if (mWork.getNumMapTasks() != null) {
-      job.setNumMapTasks(mWork.getNumMapTasks().intValue());
-    }
-
-    if (mWork.getMaxSplitSize() != null) {
-      HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mWork.getMaxSplitSize().longValue());
-    }
-
-    if (mWork.getMinSplitSize() != null) {
-      HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mWork.getMinSplitSize().longValue());
-    }
-
-    if (mWork.getMinSplitSizePerNode() != null) {
-      HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, mWork.getMinSplitSizePerNode().longValue());
-    }
-
-    if (mWork.getMinSplitSizePerRack() != null) {
-      HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, mWork.getMinSplitSizePerRack().longValue());
-    }
+    propagateSplitSettings(job, mWork);
 
     job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0);
     job.setReducerClass(ExecReducer.class);
@@ -486,6 +468,28 @@ public class ExecDriver extends Task<MapredWork> implements Serializable, Hadoop
     return (returnVal);
   }
 
+  public static void propagateSplitSettings(JobConf job, MapWork work) {
+    if (work.getNumMapTasks() != null) {
+      job.setNumMapTasks(work.getNumMapTasks().intValue());
+    }
+
+    if (work.getMaxSplitSize() != null) {
+      HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, work.getMaxSplitSize().longValue());
+    }
+
+    if (work.getMinSplitSize() != null) {
+      HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work.getMinSplitSize().longValue());
+    }
+
+    if (work.getMinSplitSizePerNode() != null) {
+      HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, work.getMinSplitSizePerNode().longValue());
+    }
+
+    if (work.getMinSplitSizePerRack() != null) {
+      HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, work.getMinSplitSizePerRack().longValue());
+    }
+  }
+
   private void handleSampling(Context context, MapWork mWork, JobConf job)
       throws Exception {
     assert mWork.getAliasToWork().keySet().size() == 1;

http://git-wip-us.apache.org/repos/asf/hive/blob/8a1dcd7e/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java
index 67a6dc7..b3b6431 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileTask.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.io.merge;
 
+import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
+
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -108,6 +110,8 @@ public class MergeFileTask extends Task<MergeFileWork> implements Serializable,
         fs.mkdirs(tempOutPath);
       }
 
+      ExecDriver.propagateSplitSettings(job, work);
+
       // set job name
       boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));