You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dd...@apache.org on 2009/03/19 14:57:50 UTC

svn commit: r756000 - in /hadoop/core/branches/branch-0.20: ./ CHANGES.txt src/mapred/org/apache/hadoop/mapred/JobHistory.java src/test/org/apache/hadoop/mapred/TestJobTrackerRestart.java

Author: ddas
Date: Thu Mar 19 13:57:50 2009
New Revision: 756000

URL: http://svn.apache.org/viewvc?rev=756000&view=rev
Log:
Merge -r 755997:755998 from trunk onto 0.20 branch. Fixes HADOOP-5328.

Modified:
    hadoop/core/branches/branch-0.20/   (props changed)
    hadoop/core/branches/branch-0.20/CHANGES.txt   (contents, props changed)
    hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobHistory.java
    hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestart.java

Propchange: hadoop/core/branches/branch-0.20/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Mar 19 13:57:50 2009
@@ -1,2 +1,2 @@
 /hadoop/core/branches/branch-0.19:713112
-/hadoop/core/trunk:727001,727117,727191,727212,727217,727228,727255,727869,728187,729052,729987,732385,732572,732613,732777,732838,732869,733887,734870,734916,736426,738328,738697,740077,740157,741703,741762,743745,743816,743892,744894,745180,746010,746206,746227,746233,746274,746338,746902-746903,746925,746944,746968,746970,747279,747289,747802,748084,748090,748783,749262,749318,749863,750533,752073,752609,752834,752836,752913,752932,753112-753113,753346,754645,754847,754927,755035,755226,755348,755370,755418,755426,755790,755905,755938,755960,755986
+/hadoop/core/trunk:727001,727117,727191,727212,727217,727228,727255,727869,728187,729052,729987,732385,732572,732613,732777,732838,732869,733887,734870,734916,736426,738328,738697,740077,740157,741703,741762,743745,743816,743892,744894,745180,746010,746206,746227,746233,746274,746338,746902-746903,746925,746944,746968,746970,747279,747289,747802,748084,748090,748783,749262,749318,749863,750533,752073,752609,752834,752836,752913,752932,753112-753113,753346,754645,754847,754927,755035,755226,755348,755370,755418,755426,755790,755905,755938,755960,755986,755998

Modified: hadoop/core/branches/branch-0.20/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/CHANGES.txt?rev=756000&r1=755999&r2=756000&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/CHANGES.txt (original)
+++ hadoop/core/branches/branch-0.20/CHANGES.txt Thu Mar 19 13:57:50 2009
@@ -773,6 +773,9 @@
     HADOOP-5534. Fixed a deadlock in Fair scheduler's servlet.
     (Rahul Kumar Singh via yhemanth)
 
+    HADOOP-5328. Fixes a problem in the renaming of job history files during job
+    recovery. Amar Kamat via ddas)
+
 Release 0.19.2 - Unreleased
 
   BUG FIXES

Propchange: hadoop/core/branches/branch-0.20/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Mar 19 13:57:50 2009
@@ -1,3 +1,3 @@
 /hadoop/core/branches/branch-0.18/CHANGES.txt:727226
 /hadoop/core/branches/branch-0.19/CHANGES.txt:713112
-/hadoop/core/trunk/CHANGES.txt:727001,727117,727191,727212,727228,727255,727869,728187,729052,729987,732385,732572,732613,732777,732838,732869,733887,734870,734916,735082,736426,738602,738697,739416,740077,740157,741703,741762,743296,743745,743816,743892,744894,745180,745268,746010,746193,746206,746227,746233,746274,746902-746903,746925,746944,746968,746970,747279,747289,747802,748084,748090,748783,749262,749318,749863,750533,752073,752514,752555,752590,752609,752834,752836,752913,752932,753112-753113,753346,754645,754847,754927,755035,755226,755348,755370,755418,755426,755790,755905,755938,755986
+/hadoop/core/trunk/CHANGES.txt:727001,727117,727191,727212,727228,727255,727869,728187,729052,729987,732385,732572,732613,732777,732838,732869,733887,734870,734916,735082,736426,738602,738697,739416,740077,740157,741703,741762,743296,743745,743816,743892,744894,745180,745268,746010,746193,746206,746227,746233,746274,746902-746903,746925,746944,746968,746970,747279,747289,747802,748084,748090,748783,749262,749318,749863,750533,752073,752514,752555,752590,752609,752834,752836,752913,752932,753112-753113,753346,754645,754847,754927,755035,755226,755348,755370,755418,755426,755790,755905,755938,755986,755998

Modified: hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobHistory.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobHistory.java?rev=756000&r1=755999&r2=756000&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobHistory.java (original)
+++ hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobHistory.java Thu Mar 19 13:57:50 2009
@@ -640,13 +640,12 @@
       if (LOG_DIR == null) {
         return null;
       }
-      
-      jobName = escapeRegexChars( jobName );
 
       // Make the pattern matching the job's history file
       final Pattern historyFilePattern = 
         Pattern.compile(jobtrackerHostname + "_" + DIGITS + "_" 
-                        + id.toString() + "_" + user + "_" + jobName + "+");
+                        + id.toString() + "_" + user + "_" 
+                        + escapeRegexChars(jobName) + "+");
       // a path filter that matches 4 parts of the filenames namely
       //  - jt-hostname
       //  - job-id
@@ -671,6 +670,8 @@
       if (statuses.length == 0) {
         filename = 
           encodeJobHistoryFileName(getNewJobHistoryFileName(jobConf, id));
+        LOG.info("Nothing to recover! Generating a new filename " + filename 
+                 + " for job " + id);
       } else {
         // return filename considering that fact the name can be a 
         // secondary filename like filename.recover
@@ -681,6 +682,8 @@
           filename = filename.substring(0, newLength);
         }
         filename = encodeJobHistoryFileName(filename);
+        LOG.info("Recovered job history filename for job " + id + " is " 
+                 + filename);
       }
       return filename;
     }
@@ -698,6 +701,7 @@
       Path logPath = JobHistory.JobInfo.getJobHistoryLogLocation(fileName);
       if (logPath != null) {
         FileSystem fs = logPath.getFileSystem(conf);
+        LOG.info("Deleting job history file " + logPath.getName());
         fs.delete(logPath, false);
       }
       // do the same for the user file too
@@ -725,23 +729,57 @@
     public synchronized static Path recoverJobHistoryFile(JobConf conf, 
                                                           Path logFilePath) 
     throws IOException {
+      Path ret;
       FileSystem fs = logFilePath.getFileSystem(conf);
-      String tmpFilename = getSecondaryJobHistoryFile(logFilePath.getName());
+      String logFileName = logFilePath.getName();
+      String tmpFilename = getSecondaryJobHistoryFile(logFileName);
       Path logDir = logFilePath.getParent();
       Path tmpFilePath = new Path(logDir, tmpFilename);
       if (fs.exists(logFilePath)) {
+        LOG.info(logFileName + " exists!");
         if (fs.exists(tmpFilePath)) {
+          LOG.info("Deleting " + tmpFilename 
+                   + "  and using " + logFileName + " for recovery.");
           fs.delete(tmpFilePath, false);
         }
-        return tmpFilePath;
+        ret = tmpFilePath;
       } else {
+        LOG.info(logFileName + " doesnt exist! Using " 
+                 + tmpFilename + " for recovery.");
         if (fs.exists(tmpFilePath)) {
+          LOG.info("Renaming " + tmpFilename + " to " + logFileName);
           fs.rename(tmpFilePath, logFilePath);
-          return tmpFilePath;
+          ret = tmpFilePath;
+        } else {
+          ret = logFilePath;
+        }
+      }
+
+      // do the same for the user files too
+      logFilePath = getJobHistoryLogLocationForUser(logFileName, conf);
+      if (logFilePath != null) {
+        fs = logFilePath.getFileSystem(conf);
+        logDir = logFilePath.getParent();
+        tmpFilePath = new Path(logDir, tmpFilename);
+        if (fs.exists(logFilePath)) {
+          LOG.info(logFileName + " exists!");
+          if (fs.exists(tmpFilePath)) {
+            LOG.info("Deleting " + tmpFilename + "  and making " + logFileName 
+                     + " as the master history file for user.");
+            fs.delete(tmpFilePath, false);
+          }
         } else {
-          return logFilePath;
+          LOG.info(logFileName + " doesnt exist! Using " 
+                   + tmpFilename + " as the master history file for user.");
+          if (fs.exists(tmpFilePath)) {
+            LOG.info("Renaming " + tmpFilename + " to " + logFileName 
+                     + " in user directory");
+            fs.rename(tmpFilePath, logFilePath);
+          }
         }
       }
+      
+      return ret;
     }
 
     /** Finalize the recovery and make one file in the end. 
@@ -765,6 +803,7 @@
         // rename the tmp file to the master file. Note that this should be 
         // done only when the file is closed and handles are released.
         if(fs.exists(tmpLogPath)) {
+          LOG.info("Renaming " + tmpLogFileName + " to " + masterLogFileName);
           fs.rename(tmpLogPath, masterLogPath);
         }
       }
@@ -779,6 +818,8 @@
       if (masterLogPath != null) {
         FileSystem fs = masterLogPath.getFileSystem(conf);
         if (fs.exists(tmpLogPath)) {
+          LOG.info("Renaming " + tmpLogFileName + " to " + masterLogFileName
+                   + " in user directory");
           fs.rename(tmpLogPath, masterLogPath);
         }
       }
@@ -829,6 +870,7 @@
             fs = new Path(LOG_DIR).getFileSystem(jobConf);
             
             logFile = recoverJobHistoryFile(jobConf, logFile);
+            logFileName = logFile.getName();
             
             int defaultBufferSize = 
               fs.getConf().getInt("io.file.buffer.size", 4096);
@@ -842,13 +884,15 @@
             writers.add(writer);
           }
           if (userLogFile != null) {
+            // Get the actual filename as recoverJobHistoryFile() might return
+            // a different filename
             userLogDir = userLogFile.getParent().toString();
+            userLogFile = new Path(userLogDir, logFileName);
+            
             // create output stream for logging 
             // in hadoop.job.history.user.location
             fs = userLogFile.getFileSystem(jobConf);
  
-            userLogFile = recoverJobHistoryFile(jobConf, userLogFile);
-            
             out = fs.create(userLogFile, true, 4096);
             writer = new PrintWriter(out);
             writers.add(writer);

Modified: hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestart.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestart.java?rev=756000&r1=755999&r2=756000&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestart.java (original)
+++ hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestart.java Thu Mar 19 13:57:50 2009
@@ -65,7 +65,7 @@
       jobs[i] = new JobConf(conf);
       Path newOutputDir = outputDir.suffix(String.valueOf(numJobsSubmitted++));
       UtilsForTests.configureWaitingJobConf(jobs[i], inDir, newOutputDir, 
-          numMaps[i], numReds[i], "jt-restart-test-job", mapSignalFile, 
+          numMaps[i], numReds[i], "jt restart test job", mapSignalFile, 
           reduceSignalFile);
       jobs[i].setJobPriority(priorities[i]);
     }