You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dd...@apache.org on 2009/03/19 14:57:50 UTC
svn commit: r756000 - in /hadoop/core/branches/branch-0.20: ./ CHANGES.txt
src/mapred/org/apache/hadoop/mapred/JobHistory.java
src/test/org/apache/hadoop/mapred/TestJobTrackerRestart.java
Author: ddas
Date: Thu Mar 19 13:57:50 2009
New Revision: 756000
URL: http://svn.apache.org/viewvc?rev=756000&view=rev
Log:
Merge -r 755997:755998 from trunk onto 0.20 branch. Fixes HADOOP-5328.
Modified:
hadoop/core/branches/branch-0.20/ (props changed)
hadoop/core/branches/branch-0.20/CHANGES.txt (contents, props changed)
hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobHistory.java
hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestart.java
Propchange: hadoop/core/branches/branch-0.20/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Mar 19 13:57:50 2009
@@ -1,2 +1,2 @@
/hadoop/core/branches/branch-0.19:713112
-/hadoop/core/trunk:727001,727117,727191,727212,727217,727228,727255,727869,728187,729052,729987,732385,732572,732613,732777,732838,732869,733887,734870,734916,736426,738328,738697,740077,740157,741703,741762,743745,743816,743892,744894,745180,746010,746206,746227,746233,746274,746338,746902-746903,746925,746944,746968,746970,747279,747289,747802,748084,748090,748783,749262,749318,749863,750533,752073,752609,752834,752836,752913,752932,753112-753113,753346,754645,754847,754927,755035,755226,755348,755370,755418,755426,755790,755905,755938,755960,755986
+/hadoop/core/trunk:727001,727117,727191,727212,727217,727228,727255,727869,728187,729052,729987,732385,732572,732613,732777,732838,732869,733887,734870,734916,736426,738328,738697,740077,740157,741703,741762,743745,743816,743892,744894,745180,746010,746206,746227,746233,746274,746338,746902-746903,746925,746944,746968,746970,747279,747289,747802,748084,748090,748783,749262,749318,749863,750533,752073,752609,752834,752836,752913,752932,753112-753113,753346,754645,754847,754927,755035,755226,755348,755370,755418,755426,755790,755905,755938,755960,755986,755998
Modified: hadoop/core/branches/branch-0.20/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/CHANGES.txt?rev=756000&r1=755999&r2=756000&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/CHANGES.txt (original)
+++ hadoop/core/branches/branch-0.20/CHANGES.txt Thu Mar 19 13:57:50 2009
@@ -773,6 +773,9 @@
HADOOP-5534. Fixed a deadlock in Fair scheduler's servlet.
(Rahul Kumar Singh via yhemanth)
+ HADOOP-5328. Fixes a problem in the renaming of job history files during job
+ recovery. Amar Kamat via ddas)
+
Release 0.19.2 - Unreleased
BUG FIXES
Propchange: hadoop/core/branches/branch-0.20/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Mar 19 13:57:50 2009
@@ -1,3 +1,3 @@
/hadoop/core/branches/branch-0.18/CHANGES.txt:727226
/hadoop/core/branches/branch-0.19/CHANGES.txt:713112
-/hadoop/core/trunk/CHANGES.txt:727001,727117,727191,727212,727228,727255,727869,728187,729052,729987,732385,732572,732613,732777,732838,732869,733887,734870,734916,735082,736426,738602,738697,739416,740077,740157,741703,741762,743296,743745,743816,743892,744894,745180,745268,746010,746193,746206,746227,746233,746274,746902-746903,746925,746944,746968,746970,747279,747289,747802,748084,748090,748783,749262,749318,749863,750533,752073,752514,752555,752590,752609,752834,752836,752913,752932,753112-753113,753346,754645,754847,754927,755035,755226,755348,755370,755418,755426,755790,755905,755938,755986
+/hadoop/core/trunk/CHANGES.txt:727001,727117,727191,727212,727228,727255,727869,728187,729052,729987,732385,732572,732613,732777,732838,732869,733887,734870,734916,735082,736426,738602,738697,739416,740077,740157,741703,741762,743296,743745,743816,743892,744894,745180,745268,746010,746193,746206,746227,746233,746274,746902-746903,746925,746944,746968,746970,747279,747289,747802,748084,748090,748783,749262,749318,749863,750533,752073,752514,752555,752590,752609,752834,752836,752913,752932,753112-753113,753346,754645,754847,754927,755035,755226,755348,755370,755418,755426,755790,755905,755938,755986,755998
Modified: hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobHistory.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobHistory.java?rev=756000&r1=755999&r2=756000&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobHistory.java (original)
+++ hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobHistory.java Thu Mar 19 13:57:50 2009
@@ -640,13 +640,12 @@
if (LOG_DIR == null) {
return null;
}
-
- jobName = escapeRegexChars( jobName );
// Make the pattern matching the job's history file
final Pattern historyFilePattern =
Pattern.compile(jobtrackerHostname + "_" + DIGITS + "_"
- + id.toString() + "_" + user + "_" + jobName + "+");
+ + id.toString() + "_" + user + "_"
+ + escapeRegexChars(jobName) + "+");
// a path filter that matches 4 parts of the filenames namely
// - jt-hostname
// - job-id
@@ -671,6 +670,8 @@
if (statuses.length == 0) {
filename =
encodeJobHistoryFileName(getNewJobHistoryFileName(jobConf, id));
+ LOG.info("Nothing to recover! Generating a new filename " + filename
+ + " for job " + id);
} else {
// return filename considering that fact the name can be a
// secondary filename like filename.recover
@@ -681,6 +682,8 @@
filename = filename.substring(0, newLength);
}
filename = encodeJobHistoryFileName(filename);
+ LOG.info("Recovered job history filename for job " + id + " is "
+ + filename);
}
return filename;
}
@@ -698,6 +701,7 @@
Path logPath = JobHistory.JobInfo.getJobHistoryLogLocation(fileName);
if (logPath != null) {
FileSystem fs = logPath.getFileSystem(conf);
+ LOG.info("Deleting job history file " + logPath.getName());
fs.delete(logPath, false);
}
// do the same for the user file too
@@ -725,23 +729,57 @@
public synchronized static Path recoverJobHistoryFile(JobConf conf,
Path logFilePath)
throws IOException {
+ Path ret;
FileSystem fs = logFilePath.getFileSystem(conf);
- String tmpFilename = getSecondaryJobHistoryFile(logFilePath.getName());
+ String logFileName = logFilePath.getName();
+ String tmpFilename = getSecondaryJobHistoryFile(logFileName);
Path logDir = logFilePath.getParent();
Path tmpFilePath = new Path(logDir, tmpFilename);
if (fs.exists(logFilePath)) {
+ LOG.info(logFileName + " exists!");
if (fs.exists(tmpFilePath)) {
+ LOG.info("Deleting " + tmpFilename
+ + " and using " + logFileName + " for recovery.");
fs.delete(tmpFilePath, false);
}
- return tmpFilePath;
+ ret = tmpFilePath;
} else {
+ LOG.info(logFileName + " doesnt exist! Using "
+ + tmpFilename + " for recovery.");
if (fs.exists(tmpFilePath)) {
+ LOG.info("Renaming " + tmpFilename + " to " + logFileName);
fs.rename(tmpFilePath, logFilePath);
- return tmpFilePath;
+ ret = tmpFilePath;
+ } else {
+ ret = logFilePath;
+ }
+ }
+
+ // do the same for the user files too
+ logFilePath = getJobHistoryLogLocationForUser(logFileName, conf);
+ if (logFilePath != null) {
+ fs = logFilePath.getFileSystem(conf);
+ logDir = logFilePath.getParent();
+ tmpFilePath = new Path(logDir, tmpFilename);
+ if (fs.exists(logFilePath)) {
+ LOG.info(logFileName + " exists!");
+ if (fs.exists(tmpFilePath)) {
+ LOG.info("Deleting " + tmpFilename + " and making " + logFileName
+ + " as the master history file for user.");
+ fs.delete(tmpFilePath, false);
+ }
} else {
- return logFilePath;
+ LOG.info(logFileName + " doesnt exist! Using "
+ + tmpFilename + " as the master history file for user.");
+ if (fs.exists(tmpFilePath)) {
+ LOG.info("Renaming " + tmpFilename + " to " + logFileName
+ + " in user directory");
+ fs.rename(tmpFilePath, logFilePath);
+ }
}
}
+
+ return ret;
}
/** Finalize the recovery and make one file in the end.
@@ -765,6 +803,7 @@
// rename the tmp file to the master file. Note that this should be
// done only when the file is closed and handles are released.
if(fs.exists(tmpLogPath)) {
+ LOG.info("Renaming " + tmpLogFileName + " to " + masterLogFileName);
fs.rename(tmpLogPath, masterLogPath);
}
}
@@ -779,6 +818,8 @@
if (masterLogPath != null) {
FileSystem fs = masterLogPath.getFileSystem(conf);
if (fs.exists(tmpLogPath)) {
+ LOG.info("Renaming " + tmpLogFileName + " to " + masterLogFileName
+ + " in user directory");
fs.rename(tmpLogPath, masterLogPath);
}
}
@@ -829,6 +870,7 @@
fs = new Path(LOG_DIR).getFileSystem(jobConf);
logFile = recoverJobHistoryFile(jobConf, logFile);
+ logFileName = logFile.getName();
int defaultBufferSize =
fs.getConf().getInt("io.file.buffer.size", 4096);
@@ -842,13 +884,15 @@
writers.add(writer);
}
if (userLogFile != null) {
+ // Get the actual filename as recoverJobHistoryFile() might return
+ // a different filename
userLogDir = userLogFile.getParent().toString();
+ userLogFile = new Path(userLogDir, logFileName);
+
// create output stream for logging
// in hadoop.job.history.user.location
fs = userLogFile.getFileSystem(jobConf);
- userLogFile = recoverJobHistoryFile(jobConf, userLogFile);
-
out = fs.create(userLogFile, true, 4096);
writer = new PrintWriter(out);
writers.add(writer);
Modified: hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestart.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestart.java?rev=756000&r1=755999&r2=756000&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestart.java (original)
+++ hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestart.java Thu Mar 19 13:57:50 2009
@@ -65,7 +65,7 @@
jobs[i] = new JobConf(conf);
Path newOutputDir = outputDir.suffix(String.valueOf(numJobsSubmitted++));
UtilsForTests.configureWaitingJobConf(jobs[i], inDir, newOutputDir,
- numMaps[i], numReds[i], "jt-restart-test-job", mapSignalFile,
+ numMaps[i], numReds[i], "jt restart test job", mapSignalFile,
reduceSignalFile);
jobs[i].setJobPriority(priorities[i]);
}