You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ga...@apache.org on 2012/03/12 20:24:44 UTC

svn commit: r1299828 - in /incubator/hcatalog/branches/branch-0.4: CHANGES.txt src/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java

Author: gates
Date: Mon Mar 12 20:24:44 2012
New Revision: 1299828

URL: http://svn.apache.org/viewvc?rev=1299828&view=rev
Log:
HCATALOG-276 After merging in HCATALOG-237 related changes Pig scripts with more than one store fail, take 2

Modified:
    incubator/hcatalog/branches/branch-0.4/CHANGES.txt
    incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java
    incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java

Modified: incubator/hcatalog/branches/branch-0.4/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/CHANGES.txt?rev=1299828&r1=1299827&r2=1299828&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/CHANGES.txt (original)
+++ incubator/hcatalog/branches/branch-0.4/CHANGES.txt Mon Mar 12 20:24:44 2012
@@ -86,7 +86,7 @@ Release 0.4.0 - Unreleased
 
   HCAT-278 When outputSchema doesn't match table schema wrong columns are returned to the user (gates)
 
-  HCAT-276 After merging in HCATALOG-237 related changes Pig scripts with more than one store fail (daijy via gates)
+  HCAT-276 After merging in HCATALOG-237 related changes Pig scripts with more than one store fail (daijy and mithun via gates)
 
   HCAT-257 e2e harness not working properly after file location change (gates)
 

Modified: incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java?rev=1299828&r1=1299827&r2=1299828&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java (original)
+++ incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java Mon Mar 12 20:24:44 2012
@@ -102,12 +102,11 @@ class FileOutputCommitterContainer exten
     @Override
     public void commitTask(TaskAttemptContext context) throws IOException {
         if (!dynamicPartitioningUsed){
-            OutputJobInfo outputJobInfo = HCatOutputFormat.getJobInfo(context);
             //TODO fix this hack, something wrong with pig
             //running multiple storers in a single job, the real output dir got overwritten or something
             //the location in OutputJobInfo is still correct so we'll use that
             //TestHCatStorer.testMultiPartColsInData() used to fail without this
-            context.getConfiguration().set("mapred.output.dir",outputJobInfo.getLocation());
+            resetMapRedOutputDirFromJobInfo(context.getConfiguration());
             getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context));
         }
     }
@@ -125,6 +124,9 @@ class FileOutputCommitterContainer exten
     @Override
     public void setupJob(JobContext context) throws IOException {
         if(getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
+            // TODO: Hack! Pig messes up mapred.output.dir, when 2 Storers are used in the same Pig script.
+            // Workaround: Set mapred.output.dir from OutputJobInfo.
+            context.getConfiguration().set("mapred.output.dir", jobInfo.getLocation());
             getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context));
         }
         // in dynamic usecase, called through FileRecordWriterContainer
@@ -140,13 +142,16 @@ class FileOutputCommitterContainer exten
     @Override
     public void abortJob(JobContext jobContext, State state) throws IOException {
         org.apache.hadoop.mapred.JobContext
-                marpedJobContext = HCatMapRedUtil.createJobContext(jobContext);
+                mapRedJobContext = HCatMapRedUtil.createJobContext(jobContext);
         if (dynamicPartitioningUsed){
             discoverPartitions(jobContext);
         }
 
         if(getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
-            getBaseOutputCommitter().abortJob(marpedJobContext, state);
+            // TODO: Hack! Pig messes up mapred.output.dir, when 2 Storers are used in the same Pig script.
+            // Workaround: Set mapred.output.dir from OutputJobInfo.
+            resetMapRedOutputDirFromJobInfo(mapRedJobContext.getConfiguration());
+            getBaseOutputCommitter().abortJob(mapRedJobContext, state);
         }
         else if (dynamicPartitioningUsed){
             for(JobContext currContext : contextDiscoveredByPath.values()){
@@ -219,6 +224,9 @@ class FileOutputCommitterContainer exten
             discoverPartitions(jobContext);
         }
         if(getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
+            // TODO: Hack! Pig messes up mapred.output.dir, when 2 Storers are used in the same Pig script.
+            // Workaround: Set mapred.output.dir from OutputJobInfo.
+            resetMapRedOutputDirFromJobInfo(jobContext.getConfiguration());
             getBaseOutputCommitter().commitJob(HCatMapRedUtil.createJobContext(jobContext));
         }
         // create _SUCCESS FILE if so requested.
@@ -256,7 +264,10 @@ class FileOutputCommitterContainer exten
         if( table.getPartitionKeys().size() == 0 ) {
             //non partitioned table
             if(getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
-                getBaseOutputCommitter().cleanupJob(HCatMapRedUtil.createJobContext(context));
+               // TODO: Hack! Pig messes up mapred.output.dir, when 2 Storers are used in the same Pig script.
+               // Workaround: Set mapred.output.dir from OutputJobInfo.
+               resetMapRedOutputDirFromJobInfo(context.getConfiguration());
+               getBaseOutputCommitter().cleanupJob(HCatMapRedUtil.createJobContext(context));
             }
             else if (dynamicPartitioningUsed){
                 for(JobContext currContext : contextDiscoveredByPath.values()){
@@ -688,4 +699,15 @@ class FileOutputCommitterContainer exten
         }
     }
 
+  /**
+   * TODO: Clean up this Hack! Resetting mapred.output.dir from OutputJobInfo.
+   * This works around PIG-2578, where Pig messes up output-directory
+   * if multiple storers are used in the same pig-script.
+   * @param config The configuration whose mapred.output.dir is to be reset.
+   */
+  private void resetMapRedOutputDirFromJobInfo(Configuration config) {
+    String outputLocation = jobInfo.getLocation();
+    if (outputLocation != null)
+      config.set("mapred.output.dir", outputLocation);
+  }
 }

Modified: incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java?rev=1299828&r1=1299827&r2=1299828&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java (original)
+++ incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java Mon Mar 12 20:24:44 2012
@@ -231,7 +231,6 @@ public class HCatOutputFormat extends HC
     public RecordWriter<WritableComparable<?>, HCatRecord>
         getRecordWriter(TaskAttemptContext context)
         throws IOException, InterruptedException {
-      getOutputFormat(context).getOutputCommitter(context).setupJob(context);
       return getOutputFormat(context).getRecordWriter(context);
     }