You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/02/26 22:50:14 UTC
svn commit: r916830 - in /hadoop/hive/branches/branch-0.5: CHANGES.txt
ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
Author: namit
Date: Fri Feb 26 21:50:13 2010
New Revision: 916830
URL: http://svn.apache.org/viewvc?rev=916830&view=rev
Log:
HIVE-1200. Fix CombineHiveInputFormat
(Zheng Shao via namit)
Modified:
hadoop/hive/branches/branch-0.5/CHANGES.txt
hadoop/hive/branches/branch-0.5/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
Modified: hadoop/hive/branches/branch-0.5/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.5/CHANGES.txt?rev=916830&r1=916829&r2=916830&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.5/CHANGES.txt (original)
+++ hadoop/hive/branches/branch-0.5/CHANGES.txt Fri Feb 26 21:50:13 2010
@@ -24,6 +24,9 @@
HIVE-1188. NPE when running TestJdbcDriver/TestHiveServer
(Carl Steinbach via Ning Zhang)
+ HIVE-1200. Fix CombineHiveInputFormat
+ (Zheng Shao via namit)
+
Release 0.5.0
INCOMPATIBLE CHANGES
Modified: hadoop/hive/branches/branch-0.5/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.5/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=916830&r1=916829&r2=916830&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.5/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hadoop/hive/branches/branch-0.5/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Fri Feb 26 21:50:13 2010
@@ -88,30 +88,14 @@
public CombineHiveInputSplit(JobConf job, InputSplitShim inputSplitShim) throws IOException {
this.inputSplitShim = inputSplitShim;
if (job != null) {
- Map<String, partitionDesc> pathToPartitionInfo =
+ Map<String, partitionDesc> pathToPartitionInfo =
Utilities.getMapRedWork(job).getPathToPartitionInfo();
// extract all the inputFormatClass names for each chunk in the CombinedSplit.
Path[] ipaths = inputSplitShim.getPaths();
for (int i = 0; i < ipaths.length; i++) {
- partitionDesc part = null;
- try {
- part = getPartitionDescFromPath(pathToPartitionInfo, ipaths[i].getParent());
- } catch (IOException e) {
- // The file path may be present in case of sampling - so ignore that
- part = null;
- }
-
- if (part == null) {
- try {
- part = getPartitionDescFromPath(pathToPartitionInfo, ipaths[i]);
- } catch (IOException e) {
- LOG.warn("CombineHiveInputSplit unable to find table description for " +
- ipaths[i].getParent());
- continue;
- }
- }
-
+ partitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, ipaths[i]);
+
// create a new InputFormat instance if this is the first time to see this class
if (i == 0)
inputFormatClassName = part.getInputFileFormatClass().getName();
@@ -124,7 +108,7 @@
public InputSplitShim getInputSplitShim() {
return inputSplitShim;
}
-
+
/**
* Returns the inputFormat class name for the i-th chunk
*/
@@ -135,55 +119,55 @@
public void setInputFormatClassName(String inputFormatClassName) {
this.inputFormatClassName = inputFormatClassName;
}
-
+
public JobConf getJob() {
return inputSplitShim.getJob();
}
-
+
public long getLength() {
return inputSplitShim.getLength();
}
-
- /** Returns an array containing the startoffsets of the files in the split*/
+
+ /** Returns an array containing the startoffsets of the files in the split*/
public long[] getStartOffsets() {
return inputSplitShim.getStartOffsets();
}
-
- /** Returns an array containing the lengths of the files in the split*/
+
+ /** Returns an array containing the lengths of the files in the split*/
public long[] getLengths() {
return inputSplitShim.getLengths();
}
-
+
/** Returns the start offset of the i<sup>th</sup> Path */
public long getOffset(int i) {
return inputSplitShim.getOffset(i);
}
-
+
/** Returns the length of the i<sup>th</sup> Path */
public long getLength(int i) {
return inputSplitShim.getLength(i);
}
-
+
/** Returns the number of Paths in the split */
public int getNumPaths() {
return inputSplitShim.getNumPaths();
}
-
+
/** Returns the i<sup>th</sup> Path */
public Path getPath(int i) {
return inputSplitShim.getPath(i);
}
-
+
/** Returns all the Paths in the split */
public Path[] getPaths() {
return inputSplitShim.getPaths();
}
-
+
/** Returns all the Paths where this input-split resides */
public String[] getLocations() throws IOException {
return inputSplitShim.getLocations();
}
-
+
/**
* Prints this obejct as a string.
*/
@@ -210,20 +194,12 @@
inputSplitShim.write(out);
if (inputFormatClassName == null) {
- Map<String, partitionDesc> pathToPartitionInfo =
+ Map<String, partitionDesc> pathToPartitionInfo =
Utilities.getMapRedWork(getJob()).getPathToPartitionInfo();
-
- // extract all the inputFormatClass names for each chunk in the CombinedSplit.
- partitionDesc part = null;
- try {
- part = getPartitionDescFromPath(pathToPartitionInfo, inputSplitShim.getPath(0).getParent());
- } catch (IOException e) {
- // The file path may be present in case of sampling - so ignore that
- part = null;
- }
- if (part == null)
- part = getPartitionDescFromPath(pathToPartitionInfo, inputSplitShim.getPath(0));
+ // extract all the inputFormatClass names for each chunk in the CombinedSplit.
+ partitionDesc part =
+ getPartitionDescFromPath(pathToPartitionInfo, inputSplitShim.getPath(0));
// create a new InputFormat instance if this is the first time to see this class
inputFormatClassName = part.getInputFileFormatClass().getName();
@@ -258,14 +234,14 @@
CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is);
result.add(csplit);
}
-
+
LOG.info("number of splits " + result.size());
return result.toArray(new CombineHiveInputSplit[result.size()]);
}
/**
- * Create a generic Hive RecordReader than can iterate over all chunks in
+ * Create a generic Hive RecordReader than can iterate over all chunks in
* a CombinedFileSplit
*/
public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
@@ -280,12 +256,12 @@
throw new IOException("cannot find class " + inputFormatClassName);
}
- initColumnsNeeded(job, inputFormatClass, hsplit.getPath(0).toString(),
+ initColumnsNeeded(job, inputFormatClass, hsplit.getPath(0).toString(),
hsplit.getPath(0).toUri().getPath());
- return
- ShimLoader.getHadoopShims().getCombineFileInputFormat().getRecordReader(job,
- ((CombineHiveInputSplit)split).getInputSplitShim(),
+ return
+ ShimLoader.getHadoopShims().getCombineFileInputFormat().getRecordReader(job,
+ ((CombineHiveInputSplit)split).getInputSplitShim(),
reporter, CombineHiveRecordReader.class);
}
@@ -293,16 +269,23 @@
Map<String, partitionDesc> pathToPartitionInfo, Path dir) throws IOException {
// The format of the keys in pathToPartitionInfo sometimes contains a port
// and sometimes doesn't, so we just compare paths.
+ URI dirUri = dir.toUri();
for (Map.Entry<String, partitionDesc> entry : pathToPartitionInfo.entrySet()) {
try {
- if (new URI(entry.getKey()).getPath().equals(dir.toUri().getPath())) {
+ // Take only the path part of the URI.
+ URI pathOfPartition = new URI(entry.getKey());
+ pathOfPartition = new URI(pathOfPartition.getPath());
+
+ if (!pathOfPartition.relativize(dirUri).equals(dirUri)) {
return entry.getValue();
}
}
- catch (URISyntaxException e2) {}
+ catch (URISyntaxException e2) {
+ LOG.info("getPartitionDescFromPath ", e2);
+ }
}
throw new IOException("cannot find dir = " + dir.toString()
- + " in partToPartitionInfo!");
+ + " in partToPartitionInfo: " + pathToPartitionInfo.keySet());
}
static class CombineFilter implements PathFilter {