You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ga...@apache.org on 2012/03/12 20:24:44 UTC
svn commit: r1299828 - in /incubator/hcatalog/branches/branch-0.4:
CHANGES.txt
src/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java
src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java
Author: gates
Date: Mon Mar 12 20:24:44 2012
New Revision: 1299828
URL: http://svn.apache.org/viewvc?rev=1299828&view=rev
Log:
HCATALOG-276 After merging in HCATALOG-237 related changes Pig scripts with more than one store fail, take 2
Modified:
incubator/hcatalog/branches/branch-0.4/CHANGES.txt
incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java
incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java
Modified: incubator/hcatalog/branches/branch-0.4/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/CHANGES.txt?rev=1299828&r1=1299827&r2=1299828&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/CHANGES.txt (original)
+++ incubator/hcatalog/branches/branch-0.4/CHANGES.txt Mon Mar 12 20:24:44 2012
@@ -86,7 +86,7 @@ Release 0.4.0 - Unreleased
HCAT-278 When outputSchema doesn't match table schema wrong columns are returned to the user (gates)
- HCAT-276 After merging in HCATALOG-237 related changes Pig scripts with more than one store fail (daijy via gates)
+ HCAT-276 After merging in HCATALOG-237 related changes Pig scripts with more than one store fail (daijy and mithun via gates)
HCAT-257 e2e harness not working properly after file location change (gates)
Modified: incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java?rev=1299828&r1=1299827&r2=1299828&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java (original)
+++ incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java Mon Mar 12 20:24:44 2012
@@ -102,12 +102,11 @@ class FileOutputCommitterContainer exten
@Override
public void commitTask(TaskAttemptContext context) throws IOException {
if (!dynamicPartitioningUsed){
- OutputJobInfo outputJobInfo = HCatOutputFormat.getJobInfo(context);
//TODO fix this hack, something wrong with pig
//running multiple storers in a single job, the real output dir got overwritten or something
//the location in OutputJobInfo is still correct so we'll use that
//TestHCatStorer.testMultiPartColsInData() used to fail without this
- context.getConfiguration().set("mapred.output.dir",outputJobInfo.getLocation());
+ resetMapRedOutputDirFromJobInfo(context.getConfiguration());
getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context));
}
}
@@ -125,6 +124,9 @@ class FileOutputCommitterContainer exten
@Override
public void setupJob(JobContext context) throws IOException {
if(getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
+ // TODO: Hack! Pig messes up mapred.output.dir, when 2 Storers are used in the same Pig script.
+ // Workaround: Set mapred.output.dir from OutputJobInfo.
+ context.getConfiguration().set("mapred.output.dir", jobInfo.getLocation());
getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context));
}
// in dynamic usecase, called through FileRecordWriterContainer
@@ -140,13 +142,16 @@ class FileOutputCommitterContainer exten
@Override
public void abortJob(JobContext jobContext, State state) throws IOException {
org.apache.hadoop.mapred.JobContext
- marpedJobContext = HCatMapRedUtil.createJobContext(jobContext);
+ mapRedJobContext = HCatMapRedUtil.createJobContext(jobContext);
if (dynamicPartitioningUsed){
discoverPartitions(jobContext);
}
if(getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
- getBaseOutputCommitter().abortJob(marpedJobContext, state);
+ // TODO: Hack! Pig messes up mapred.output.dir, when 2 Storers are used in the same Pig script.
+ // Workaround: Set mapred.output.dir from OutputJobInfo.
+ resetMapRedOutputDirFromJobInfo(mapRedJobContext.getConfiguration());
+ getBaseOutputCommitter().abortJob(mapRedJobContext, state);
}
else if (dynamicPartitioningUsed){
for(JobContext currContext : contextDiscoveredByPath.values()){
@@ -219,6 +224,9 @@ class FileOutputCommitterContainer exten
discoverPartitions(jobContext);
}
if(getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
+ // TODO: Hack! Pig messes up mapred.output.dir, when 2 Storers are used in the same Pig script.
+ // Workaround: Set mapred.output.dir from OutputJobInfo.
+ resetMapRedOutputDirFromJobInfo(jobContext.getConfiguration());
getBaseOutputCommitter().commitJob(HCatMapRedUtil.createJobContext(jobContext));
}
// create _SUCCESS FILE if so requested.
@@ -256,7 +264,10 @@ class FileOutputCommitterContainer exten
if( table.getPartitionKeys().size() == 0 ) {
//non partitioned table
if(getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
- getBaseOutputCommitter().cleanupJob(HCatMapRedUtil.createJobContext(context));
+ // TODO: Hack! Pig messes up mapred.output.dir, when 2 Storers are used in the same Pig script.
+ // Workaround: Set mapred.output.dir from OutputJobInfo.
+ resetMapRedOutputDirFromJobInfo(context.getConfiguration());
+ getBaseOutputCommitter().cleanupJob(HCatMapRedUtil.createJobContext(context));
}
else if (dynamicPartitioningUsed){
for(JobContext currContext : contextDiscoveredByPath.values()){
@@ -688,4 +699,15 @@ class FileOutputCommitterContainer exten
}
}
+ /**
+ * TODO: Clean up this Hack! Resetting mapred.output.dir from OutputJobInfo.
+ * This works around PIG-2578, where Pig messes up output-directory
+ * if multiple storers are used in the same pig-script.
+ * @param config The configuration whose mapred.output.dir is to be reset.
+ */
+ private void resetMapRedOutputDirFromJobInfo(Configuration config) {
+ String outputLocation = jobInfo.getLocation();
+ if (outputLocation != null)
+ config.set("mapred.output.dir", outputLocation);
+ }
}
Modified: incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java?rev=1299828&r1=1299827&r2=1299828&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java (original)
+++ incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java Mon Mar 12 20:24:44 2012
@@ -231,7 +231,6 @@ public class HCatOutputFormat extends HC
public RecordWriter<WritableComparable<?>, HCatRecord>
getRecordWriter(TaskAttemptContext context)
throws IOException, InterruptedException {
- getOutputFormat(context).getOutputCommitter(context).setupJob(context);
return getOutputFormat(context).getRecordWriter(context);
}