You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/12/18 20:05:34 UTC
svn commit: r1646512 - in
/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql:
exec/Utilities.java io/CombineHiveInputFormat.java
Author: xuefu
Date: Thu Dec 18 19:05:33 2014
New Revision: 1646512
URL: http://svn.apache.org/r1646512
Log:
HIVE-9127: Improve CombineHiveInputFormat.getSplit performance (Brock via Xuefu)
Modified:
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1646512&r1=1646511&r2=1646512&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Thu Dec 18 19:05:33 2014
@@ -388,7 +388,6 @@ public final class Utilities {
in = new InflaterInputStream(in);
} else {
LOG.info("Open file to read in plan: " + localPath);
-// in = new FileInputStream(localPath.toUri().getPath());
in = localPath.getFileSystem(conf).open(localPath);
}
@@ -427,8 +426,9 @@ public final class Utilities {
LOG.info("No plan file found: "+path);
return null;
} catch (Exception e) {
- LOG.error("Failed to load plan: "+path, e);
- throw new RuntimeException(e);
+ String msg = "Failed to load plan: " + path + ": " + e;
+ LOG.error(msg, e);
+ throw new RuntimeException(msg, e);
} finally {
if (in != null) {
try {
@@ -710,11 +710,11 @@ public final class Utilities {
// Cache the plan in this process
gWorkMap.put(planPath, w);
-
return planPath;
} catch (Exception e) {
- e.printStackTrace();
- throw new RuntimeException(e);
+ String msg = "Error caching " + name + ": " + e;
+ LOG.error(msg, e);
+ throw new RuntimeException(msg, e);
}
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1646512&r1=1646511&r2=1646512&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Thu Dec 18 19:05:33 2014
@@ -82,8 +82,9 @@ public class CombineHiveInputFormat<K ex
*/
public static class CombineHiveInputSplit extends InputSplitShim {
- String inputFormatClassName;
- CombineFileSplit inputSplitShim;
+ private String inputFormatClassName;
+ private CombineFileSplit inputSplitShim;
+ private Map<String, PartitionDesc> pathToPartitionInfo;
public CombineHiveInputSplit() throws IOException {
this(ShimLoader.getHadoopShims().getCombineFileInputFormat()
@@ -93,20 +94,25 @@ public class CombineHiveInputFormat<K ex
public CombineHiveInputSplit(CombineFileSplit inputSplitShim) throws IOException {
this(inputSplitShim.getJob(), inputSplitShim);
}
-
public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim)
throws IOException {
+ this(job, inputSplitShim, null);
+ }
+ public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim,
+ Map<String, PartitionDesc> pathToPartitionInfo) throws IOException {
this.inputSplitShim = inputSplitShim;
+ this.pathToPartitionInfo = pathToPartitionInfo;
if (job != null) {
- Map<String, PartitionDesc> pathToPartitionInfo = Utilities
- .getMapWork(job).getPathToPartitionInfo();
+ if (this.pathToPartitionInfo == null) {
+ this.pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo();
+ }
// extract all the inputFormatClass names for each chunk in the
// CombinedSplit.
Path[] ipaths = inputSplitShim.getPaths();
if (ipaths.length > 0) {
PartitionDesc part = HiveFileFormatUtils
- .getPartitionDescFromPathRecursively(pathToPartitionInfo,
+ .getPartitionDescFromPathRecursively(this.pathToPartitionInfo,
ipaths[0], IOPrepareCache.get().getPartitionDescMap());
inputFormatClassName = part.getInputFileFormatClass().getName();
}
@@ -215,8 +221,9 @@ public class CombineHiveInputFormat<K ex
inputSplitShim.write(out);
if (inputFormatClassName == null) {
- Map<String, PartitionDesc> pathToPartitionInfo = Utilities
- .getMapWork(getJob()).getPathToPartitionInfo();
+ if (pathToPartitionInfo == null) {
+ pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
+ }
// extract all the inputFormatClass names for each chunk in the
// CombinedSplit.
@@ -268,8 +275,8 @@ public class CombineHiveInputFormat<K ex
/**
* Create Hive splits based on CombineFileSplit.
*/
- private InputSplit[] getCombineSplits(JobConf job,
- int numSplits) throws IOException {
+ private InputSplit[] getCombineSplits(JobConf job, int numSplits, Map<String, PartitionDesc> pathToPartitionInfo)
+ throws IOException {
PerfLogger perfLogger = PerfLogger.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
init(job);
@@ -438,7 +445,7 @@ public class CombineHiveInputFormat<K ex
}
for (CombineFileSplit is : iss) {
- CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is);
+ CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is, pathToPartitionInfo);
result.add(csplit);
}
@@ -505,7 +512,8 @@ public class CombineHiveInputFormat<K ex
if (combinablePaths.size() > 0) {
FileInputFormat.setInputPaths(job, combinablePaths.toArray
(new Path[combinablePaths.size()]));
- InputSplit[] splits = getCombineSplits(job, numSplits);
+ Map<String, PartitionDesc> pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo();
+ InputSplit[] splits = getCombineSplits(job, numSplits, pathToPartitionInfo);
for (InputSplit split : splits) {
result.add(split);
}