You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ni...@apache.org on 2008/03/03 17:44:46 UTC
svn commit: r633166 - in /hadoop/core/trunk: CHANGES.txt
src/contrib/hod/hodlib/RingMaster/idleJobTracker.py
Author: nigel
Date: Mon Mar 3 08:44:44 2008
New Revision: 633166
URL: http://svn.apache.org/viewvc?rev=633166&view=rev
Log:
HADOOP-2847. Ensure idle cluster cleanup works even if the JobTracker becomes unresponsive to RPC calls. Contributed by Hemanth Yamijala.
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/idleJobTracker.py
Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=633166&r1=633165&r2=633166&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Mon Mar 3 08:44:44 2008
@@ -216,6 +216,9 @@
HADOOP-2923. Add SequenceFileAsBinaryInputFormat, which was
missed in the commit for HADOOP-2603. (cdouglas via omalley)
+ HADOOP-2847. Ensure idle cluster cleanup works even if the JobTracker
+ becomes unresponsive to RPC calls. (Hemanth Yamijala via nigel)
+
Release 0.16.0 - 2008-02-07
INCOMPATIBLE CHANGES
Modified: hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/idleJobTracker.py
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/idleJobTracker.py?rev=633166&r1=633165&r2=633166&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/idleJobTracker.py (original)
+++ hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/idleJobTracker.py Mon Mar 3 08:44:44 2008
@@ -31,6 +31,13 @@
def getStatus(self):
return self.__status
+class HadoopClientException(Exception):
+ """This class represents an exception that is raised when we fail in
+ running the job client."""
+
+ def __init__(self, errorCode):
+ self.errorCode = errorCode
+
class JobTrackerMonitor:
"""This class monitors the JobTracker of an allocated cluster
periodically to detect whether it is idle. If it is found
@@ -134,7 +141,17 @@
def __isIdle(self):
"""This method checks if the JobTracker is idle beyond a certain limit."""
- if self.__getJobCount() == 0:
+ jobCount = 0
+ err = False
+
+ try:
+ jobCount = self.__getJobCount()
+ except HadoopClientException, hce:
+ self.__log.debug('HadoopClientException handled in getting job count. \
+ Error code: %s' % hce.errorCode)
+ err = True
+
+ if (jobCount==0) or err:
if self.__firstIdleTime == 0:
#detecting idleness for the first time
self.__firstIdleTime = time.time()
@@ -145,6 +162,7 @@
else:
# reset idleness time
self.__firstIdleTime = 0
+
return False
def __getJobCount(self):
@@ -164,6 +182,11 @@
match = self.__jobCountRegExp.match(line)
if match:
jobs = int(match.group(1))
+ elif jtStatusCommand.exit_code() == 1:
+ # for now, exit code 1 comes for any exception raised by JobClient. If hadoop gets
+ # to differentiate and give more granular exit codes, we can check for those errors
+ # corresponding to network errors etc.
+ raise HadoopClientException(jtStatusCommand.exit_code())
return jobs
def __isCompatibleHadoopVersion(self, expectedVersion):