You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dd...@apache.org on 2008/06/12 12:32:27 UTC
svn commit: r667035 - in /hadoop/core/branches/branch-0.18/src/contrib/hod:
CHANGES.txt hodlib/Hod/hadoop.py hodlib/Hod/hod.py
hodlib/NodePools/torque.py hodlib/Schedulers/torque.py testing/testHod.py
testing/testRingmasterRPCs.py
Author: ddas
Date: Thu Jun 12 03:32:26 2008
New Revision: 667035
URL: http://svn.apache.org/viewvc?rev=667035&view=rev
Log:
Merge -r 667032:667033 from trunk onto 0.18 branch. Fixes HADOOP-3523.
Modified:
hadoop/core/branches/branch-0.18/src/contrib/hod/CHANGES.txt
hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Hod/hadoop.py
hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Hod/hod.py
hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/NodePools/torque.py
hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Schedulers/torque.py
hadoop/core/branches/branch-0.18/src/contrib/hod/testing/testHod.py
hadoop/core/branches/branch-0.18/src/contrib/hod/testing/testRingmasterRPCs.py
Modified: hadoop/core/branches/branch-0.18/src/contrib/hod/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/contrib/hod/CHANGES.txt?rev=667035&r1=667034&r2=667035&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/contrib/hod/CHANGES.txt (original)
+++ hadoop/core/branches/branch-0.18/src/contrib/hod/CHANGES.txt Thu Jun 12 03:32:26 2008
@@ -1,7 +1,6 @@
HOD Change Log
-
-Trunk (unreleased changes)
+Release 0.18.0 - Unreleased
INCOMPATIBLE CHANGES
@@ -29,10 +28,13 @@
BUG FIXES
- HADOOP-2961: Avoids unnecessary checks for some configuration parameters
+ HADOOP-2961. Avoids unnecessary checks for some configuration parameters
related to service configuration. (Vinod Kumar Vavilapalli via ddas)
-Release 0.17.0 - Unreleased
+ HADOOP-3523. Fixes auto-deallocation of cluster if job id is not found in
+ Torque's job list (Hemanth Yamijala via ddas)
+
+Release 0.17.0 - 2008-05-18
INCOMPATIBLE CHANGES
Modified: hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Hod/hadoop.py
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Hod/hadoop.py?rev=667035&r1=667034&r2=667035&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Hod/hadoop.py (original)
+++ hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Hod/hadoop.py Thu Jun 12 03:32:26 2008
@@ -431,7 +431,9 @@
"""Returns True if the JobId that represents this cluster
is in the Completed or exiting state."""
jobInfo = self.__nodePool.getJobInfo(jobId)
- state = jobInfo['job_state']
+ state = None
+ if jobInfo is not None and jobInfo.has_key('job_state'):
+ state = jobInfo['job_state']
return ((state == 'C') or (state == 'E'))
def cleanup(self):
Modified: hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Hod/hod.py
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Hod/hod.py?rev=667035&r1=667034&r2=667035&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Hod/hod.py (original)
+++ hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Hod/hod.py Thu Jun 12 03:32:26 2008
@@ -307,7 +307,7 @@
self.__remove_cluster(clusterDir)
self.__clusterState.clear()
else:
- self.__log.critical("Found a previously allocated cluster at cluster directory '%s'. Deallocate the cluster first." % (clusterDir))
+ self.__log.critical("Found a previously allocated cluster at cluster directory '%s'. HOD cannot determine if this cluster can be automatically deallocated. Deallocate the cluster if it is unused." % (clusterDir))
self.__opCode = 12
return
Modified: hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/NodePools/torque.py
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/NodePools/torque.py?rev=667035&r1=667034&r2=667035&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/NodePools/torque.py (original)
+++ hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/NodePools/torque.py Thu Jun 12 03:32:26 2008
@@ -270,7 +270,8 @@
def getJobInfo(self, jobId=None):
#torque error code when credentials fail, a temporary condition sometimes.
- credFailureErrorCode = 171
+ credFailureErrorCode = 171
+ jobNonExistentErrorCode = 153
credFailureRetries = 10
i = 0
self.__jobInfo = None
@@ -283,6 +284,12 @@
if exitCode == 0:
self.__jobInfo = qstatInfo
break
+ elif exitCode == jobNonExistentErrorCode:
+ # This really means that the job completed
+ # However, setting only job_state for now, not
+ # any other attributes, as none seem required.
+ self.__jobInfo = { 'job_state' : 'C' }
+ break
else:
if exitCode == credFailureErrorCode:
time.sleep(1)
Modified: hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Schedulers/torque.py
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Schedulers/torque.py?rev=667035&r1=667034&r2=667035&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Schedulers/torque.py (original)
+++ hadoop/core/branches/branch-0.18/src/contrib/hod/hodlib/Schedulers/torque.py Thu Jun 12 03:32:26 2008
@@ -93,7 +93,7 @@
exitCode = qstatProcess.exit_code()
if exitCode > 0:
- self.__log.error('qstat error: %s' % qstatProcess.exit_status_string())
+ self.__log.warn('qstat error: %s' % qstatProcess.exit_status_string())
else:
qstatInfo = {}
for line in qstatProcess.output():
Modified: hadoop/core/branches/branch-0.18/src/contrib/hod/testing/testHod.py
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/contrib/hod/testing/testHod.py?rev=667035&r1=667034&r2=667035&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/contrib/hod/testing/testHod.py (original)
+++ hadoop/core/branches/branch-0.18/src/contrib/hod/testing/testHod.py Thu Jun 12 03:32:26 2008
@@ -185,7 +185,10 @@
userState = { clusterDir : jobid }
self.__setupClusterState(userState, False)
self.client._op_allocate(['allocate', clusterDir, '3'])
- self.assertTrue(self.log.hasMessage("Found a previously allocated cluster at cluster directory '%s'. Deallocate the cluster first." % (clusterDir), 'critical'))
+ self.assertTrue(self.log.hasMessage("Found a previously allocated cluster at "\
+ "cluster directory '%s'. HOD cannot determine if this cluster "\
+ "can be automatically deallocated. Deallocate the cluster if it "\
+ "is unused." % (clusterDir), 'critical'))
os.rmdir(clusterDir)
def __setupClusterState(self, clusterStateMap, verifyDirIsAbsent=True):
Modified: hadoop/core/branches/branch-0.18/src/contrib/hod/testing/testRingmasterRPCs.py
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.18/src/contrib/hod/testing/testRingmasterRPCs.py?rev=667035&r1=667034&r2=667035&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.18/src/contrib/hod/testing/testRingmasterRPCs.py (original)
+++ hadoop/core/branches/branch-0.18/src/contrib/hod/testing/testRingmasterRPCs.py Thu Jun 12 03:32:26 2008
@@ -68,7 +68,8 @@
'batch-home': '/home/y/'
},
'ringmaster': {
- 'max-connect' : 2
+ 'max-connect' : 2,
+ 'max-master-failures' : 5
},
'hodring': {
},