You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by mu...@apache.org on 2008/06/04 20:06:41 UTC

svn commit: r663331 - in /hadoop/core/trunk/src/contrib/hod: ./ bin/ hodlib/GridServices/ hodlib/Hod/ hodlib/HodRing/ hodlib/RingMaster/ hodlib/ServiceRegistry/ testing/

Author: mukund
Date: Wed Jun  4 11:06:40 2008
New Revision: 663331

URL: http://svn.apache.org/viewvc?rev=663331&view=rev
Log:
HADOOP-3464. Implemented a mechanism to transfer HOD errors that occur on compute nodes to the submit node running the HOD client, so users have good feedback on why an allocation failed. (Vinod Kumar Vavilapalli via mukund)

Modified:
    hadoop/core/trunk/src/contrib/hod/CHANGES.txt
    hadoop/core/trunk/src/contrib/hod/bin/hodring
    hadoop/core/trunk/src/contrib/hod/bin/ringmaster
    hadoop/core/trunk/src/contrib/hod/hodlib/GridServices/service.py
    hadoop/core/trunk/src/contrib/hod/hodlib/Hod/hadoop.py
    hadoop/core/trunk/src/contrib/hod/hodlib/Hod/hod.py
    hadoop/core/trunk/src/contrib/hod/hodlib/HodRing/hodRing.py
    hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/idleJobTracker.py
    hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/ringMaster.py
    hadoop/core/trunk/src/contrib/hod/hodlib/ServiceRegistry/serviceRegistry.py
    hadoop/core/trunk/src/contrib/hod/testing/testHod.py

Modified: hadoop/core/trunk/src/contrib/hod/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/CHANGES.txt?rev=663331&r1=663330&r2=663331&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/CHANGES.txt (original)
+++ hadoop/core/trunk/src/contrib/hod/CHANGES.txt Wed Jun  4 11:06:40 2008
@@ -15,6 +15,9 @@
     HADOOP-3151. Improves error messages when reporting failures due to 
     incorrect parameters passed to HOD. (Vinod Kumar Vavilapalli via ddas)
 
+    HADOOP-3464. Implemented a mechanism to transfer HOD errors that occur on
+    compute nodes to the submit node running the HOD client, so users have good
+    feedback on why an allocation failed. (Vinod Kumar Vavilapalli via mukund)
  
   BUG FIXES
 

Modified: hadoop/core/trunk/src/contrib/hod/bin/hodring
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/bin/hodring?rev=663331&r1=663330&r2=663331&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/bin/hodring (original)
+++ hadoop/core/trunk/src/contrib/hod/bin/hodring Wed Jun  4 11:06:40 2008
@@ -46,8 +46,11 @@
 
 from hodlib.HodRing.hodRing import HodRing
 from hodlib.Common.setup import *
-from hodlib.Common.util import filter_warnings,get_exception_string, get_exception_error_string, getMapredSystemDirectory
+from hodlib.Common.util import filter_warnings, get_exception_string, \
+                get_exception_error_string, getMapredSystemDirectory, \
+                to_http_url, local_fqdn
 from hodlib.Common.logger import getLogger, ensureLogDir
+from hodlib.Common.xmlrpc import hodXRClient
 
 filter_warnings()
 
@@ -255,10 +258,30 @@
       log.critical("exec failed")
       os._exit(1)
 
-  except Exception:
+  except Exception, e:
     if service:
       if service.log:
         log = service.log
     else:
       log = getLogger(hodRingOptions['hodring'], 'hodring')
     log.error("Error in bin/hodring %s. \nStack trace:\n%s" %(get_exception_error_string(),get_exception_string()))
+    
+    log.info("now trying informing to ringmaster")
+    log.info(hodRingOptions['hodring']['ringmaster-xrs-addr'])
+    log.info(hodRingOptions.normalizeValue('hodring', 'ringmaster-xrs-addr'))
+    log.info(to_http_url(hodRingOptions.normalizeValue( \
+            'hodring', 'ringmaster-xrs-addr')))
+    # Report errors to the Ringmaster if possible
+    try:
+      ringXRAddress = to_http_url(hodRingOptions.normalizeValue( \
+                                     'hodring', 'ringmaster-xrs-addr'))
+      log.debug("Creating ringmaster XML-RPC client.")
+      ringClient = hodXRClient(ringXRAddress)    
+      if ringClient is not None:
+        addr = local_fqdn() + "_" + str(os.getpid())
+        ringClient.setHodRingErrors(addr, str(e))
+        log.info("Reported errors to ringmaster at %s" % ringXRAddress)
+    except Exception, e:
+      log.error("Failed to report errors to ringmaster at %s" % ringXRAddress)
+      log.error("Reason : %s" % get_exception_string())
+    # End of reporting errors to the client

Modified: hadoop/core/trunk/src/contrib/hod/bin/ringmaster
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/bin/ringmaster?rev=663331&r1=663330&r2=663331&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/bin/ringmaster (original)
+++ hadoop/core/trunk/src/contrib/hod/bin/ringmaster Wed Jun  4 11:06:40 2008
@@ -46,9 +46,11 @@
 from hodlib.RingMaster.ringMaster import main
 from hodlib.Common.setup import *
 from hodlib.Common.descGenerator import *
-from hodlib.Common.util import local_fqdn, filter_warnings
+from hodlib.Common.util import local_fqdn, filter_warnings, to_http_url, \
+                        get_exception_string, get_exception_error_string
 from hodlib.Common.logger import getLogger, ensureLogDir
-from hodlib.Common.util import get_exception_string, get_exception_error_string
+from hodlib.Common.xmlrpc import hodXRClient
+import logging
 
 filter_warnings()
 
@@ -275,9 +277,16 @@
   confDef = definition()
   confDef.add_defs(defList, defOrder)
   ringMasterOptions = options(confDef, "./%s [OPTIONS]" % myName, VERSION)
-  log = None
+  log = logging.getLogger()
 
   try:
+
+    # Set up logging before anything else.
+    ensureLogDir(ringMasterOptions.normalizeValue('ringmaster', 'log-dir'))
+    log = getLogger(ringMasterOptions['ringmaster'],'ringmaster')
+    # End of setting up logging
+
+    # Verify and process options
     statusMsgs = []
     # Conditional validation
     if not ringMasterOptions['ringmaster'].has_key('hadoop-tar-ball') or \
@@ -291,21 +300,42 @@
                                                   'gridservice-mapred', 'pkgs'))
 
     if len(statusMsgs) != 0:
-      raise Exception("%s" % statusMsgs)
+      # format status messages into a single string
+      errStr = ''
+      for msg in statusMsgs:
+        errStr = "%s%s\n" % (errStr, msg)
+      raise Exception("%s" % errStr)
     # End of conditional validation
 
     (status, statusMsgs) = ringMasterOptions.verify()
     if not status:
-      raise Exception("%s" % statusMsgs)
+      # format status messages into a single string
+      errStr = ''
+      for msg in statusMsgs:
+        errStr = "%s%s\n" % (errStr, msg)
+      raise Exception("%s" % errStr)
+
     ringMasterOptions.replace_escape_seqs()
     ringMasterOptions['ringmaster']['base-dir'] = rootDirectory 
+    # End of option processing
 
-    ensureLogDir(ringMasterOptions['ringmaster']['log-dir'])
-    log = getLogger(ringMasterOptions['ringmaster'],'ringmaster')
     ret = main(ringMasterOptions,log)
     sys.exit(ret)
   except Exception, e:
-    if log:
-      log.error("bin/ringmaster failed to start.%s. Stack trace follows:\n%s" % (get_exception_error_string(),get_exception_string()))
+    log.error("bin/ringmaster failed to start.%s. Stack trace follows:\n%s" % (get_exception_error_string(),get_exception_string()))
+
+    # Report errors to the client if possible
+    try:
+      serviceAddr = to_http_url(ringMasterOptions.normalizeValue( \
+                                     'ringmaster', 'svcrgy-addr'))
+      serviceClient = hodXRClient(serviceAddr)
+      if serviceClient is not None:
+        serviceClient.setRMError([str(e),get_exception_string()])
+        log.info("Reported errors to service registry at %s" % serviceAddr)
+    except Exception, e:
+      log.error("Failed to report errors to service registry.")
+      log.error("Reason : %s" % get_exception_string())
+    # End of reporting errors to the client
+
     # Ringmaster failing to start is a ringmaster error. Exit with the appropriate exit code.
     sys.exit(6)

Modified: hadoop/core/trunk/src/contrib/hod/hodlib/GridServices/service.py
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/hodlib/GridServices/service.py?rev=663331&r1=663330&r2=663331&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/hodlib/GridServices/service.py (original)
+++ hadoop/core/trunk/src/contrib/hod/hodlib/GridServices/service.py Wed Jun  4 11:06:40 2008
@@ -65,6 +65,7 @@
     self.masterInitialized = False
     self.masterAddress = 'none'
     self.requiredNode = requiredNode
+    self.failedMsg = None
 
   def getRequiredNode(self):
     return self.requiredNode
@@ -149,6 +150,12 @@
 
   def isExternal(self):
     return self.serviceDesc.isExternal()
+
+  def setMasterFailed(self, err):
+    self.failedMsg = err
+
+  def getMasterFailed(self):
+    return self.failedMsg
   
 class NodeRequest:
   """ A class to define 
@@ -191,15 +198,16 @@
       if log: log.debug("Trying to see if port %s is available"% n)
       try:
         s.bind((h, n))
+        if log: log.debug("Yes, port %s is available" % n)
+        avail = True
       except socket.error,e:
         if log: log.debug("Could not bind to the port %s. Reason %s" % (n,e))
         retry -= 1
         pass
-      else:
-        if log: log.debug("Yes, port %s is available" % n)
-        avail = True
-      finally:
-        s.close()
+      # The earlier code that used to be here had syntax errors. The code path
+      # couldn't be followd anytime, so the error remained uncaught.
+      # This time I stumbled upon the error
+      s.close()
 
       if avail:
         ServiceUtil.localPortUsed[n] = True
@@ -229,15 +237,13 @@
       if log: log.debug("Trying to see if port %s is available"% n)
       try:
         s.bind((h, n))
+        if log: log.debug("Yes, port %s is available" % n)
+        avail = True
       except socket.error,e:
         if log: log.debug("Could not bind to the port %s. Reason %s" % (n,e))
         retry -= 1
         pass
-      else:
-        if log: log.debug("Yes, port %s is available" % n)
-        avail = True
-      finally:
-        s.close()
+      s.close()
 
       if avail:
         ServiceUtil.localPortUsed[n] = True

Modified: hadoop/core/trunk/src/contrib/hod/hodlib/Hod/hadoop.py
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/hodlib/Hod/hadoop.py?rev=663331&r1=663330&r2=663331&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/hodlib/Hod/hadoop.py (original)
+++ hadoop/core/trunk/src/contrib/hod/hodlib/Hod/hadoop.py Wed Jun  4 11:06:40 2008
@@ -260,7 +260,6 @@
           if (count % 10 == 0):
             if not self.__check_job_status():
               break
-
     return ringmasterXRS
  
   def __init_hadoop_service(self, serviceName, xmlrpcClient):
@@ -296,6 +295,11 @@
       self.__log.critical("Failed to retrieve '%s' service address." % 
                           serviceName)
       status = False
+    elif serviceAddress.startswith("Error: "):
+      errs = serviceAddress[len("Error: "):]
+      self.__log.critical("Cluster could not be allocated because of the following errors.\n%s" % \
+                             errs)
+      status = False
     else:
       try:
         self.__svcrgyClient.registerService(self.__cfg['hodring']['userid'], 
@@ -556,7 +560,7 @@
             else:
               status = 6
             if status != 0:
-              self.__log.info("Cleaning up cluster id %s, as cluster could not be allocated." % self.jobId)
+              self.__log.debug("Cleaning up cluster id %s, as cluster could not be allocated." % self.jobId)
               if ringClient is None:
                 self.delete_job(self.jobId)
               else:
@@ -590,6 +594,12 @@
         self.__log.critical("Scheduler failure, allocation failed.\n\n")        
         status = 4
     
+    if status == 5 or status == 6:
+      ringMasterErrors = self.__svcrgyClient.getRMError()
+      if ringMasterErrors:
+        self.__log.critical("Cluster could not be allocated because of the following errors on the ringmaster host.\n%s" % \
+                               (ringMasterErrors[0]))
+        self.__log.debug("Stack trace on ringmaster: %s" % ringMasterErrors[1])
     return status
 
   def __isRingMasterAlive(self, rmAddr):

Modified: hadoop/core/trunk/src/contrib/hod/hodlib/Hod/hod.py
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/hodlib/Hod/hod.py?rev=663331&r1=663330&r2=663331&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/hodlib/Hod/hod.py (original)
+++ hadoop/core/trunk/src/contrib/hod/hodlib/Hod/hod.py Wed Jun  4 11:06:40 2008
@@ -186,7 +186,7 @@
   def __setup_service_registry(self):
     cfg = self.__cfg['hod'].copy()
     cfg['debug'] = 0
-    self.__registry = svcrgy(cfg)
+    self.__registry = svcrgy(cfg, self.__log)
     self.__registry.start()
     self.__log.debug(self.__registry.getXMLRPCAddr())
     self.__cfg['hod']['xrs-address'] = self.__registry.getXMLRPCAddr()

Modified: hadoop/core/trunk/src/contrib/hod/hodlib/HodRing/hodRing.py
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/hodlib/HodRing/hodRing.py?rev=663331&r1=663330&r2=663331&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/hodlib/HodRing/hodRing.py (original)
+++ hadoop/core/trunk/src/contrib/hod/hodlib/HodRing/hodRing.py Wed Jun  4 11:06:40 2008
@@ -234,6 +234,7 @@
     self._createHadoopSiteXml()
     self._createHadoopLogDir()
     self.__hadoopThread = None
+    self.stdErrContents = "" # store list of contents for returning to user
 
   def _createWorkDirs(self):
     for dir in self.workdirs:
@@ -443,6 +444,18 @@
     if status == False:
       self.log.error('hadoop error: %s' % (
                        self.__hadoopThread.exit_status_string()))
+      # read the contents of redirected stderr to print information back to user
+      if os.path.exists(self.err):
+        f = None
+        try:
+          f = open(self.err)
+          lines = f.readlines()
+          # format
+          for line in lines:
+            self.stdErrContents = "%s%s" % (self.stdErrContents, line)
+        finally:
+          if f is not None:
+            f.close()
       self.log.error('See %s.out and/or %s.err for details. They are ' % \
                      (self.name, self.name) + \
                      'located at subdirectories under either ' + \
@@ -607,9 +620,12 @@
 
         self.log.debug('This is the packcage dir %s ' % (pkgdir))
         if not cmd.run(pkgdir):
-          raise ValueError, "Can't launch command: %s" % pkgdir
+          addnInfo = ""
+          if cmd.stdErrContents is not "":
+            addnInfo = " Information from stderr of the command:\n%s" % (cmd.stdErrContents)
+          raise Exception("Could not launch the %s using %s/bin/hadoop.%s" % (desc.getName(), pkgdir, addnInfo))
       except Exception, e:
-        print get_exception_string()
+        self.log.debug("Exception running hadoop command: %s\n%s" % (get_exception_error_string(), get_exception_string()))
         self.__running[id] = cmd
         raise Exception(e)
 

Modified: hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/idleJobTracker.py
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/idleJobTracker.py?rev=663331&r1=663330&r2=663331&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/idleJobTracker.py (original)
+++ hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/idleJobTracker.py Wed Jun  4 11:06:40 2008
@@ -85,17 +85,14 @@
   def getJobTrackerURL(self):
     """This method periodically checks the service info provider for the JT URL"""
     self.__jobTrackerURL = self.__serviceInfoProvider.getServiceAddr('mapred')
-    while not self.__stopFlag and \
-          (self.__jobTrackerURL is None or \
-            self.__jobTrackerURL == 'not found'):
+    while not self.__stopFlag and not self.__isValidJobTrackerURL():
       time.sleep(10)
       if not self.__stopFlag:
         self.__jobTrackerURL = self.__serviceInfoProvider.getServiceAddr('mapred')
       else:
         break
 
-    if (self.__jobTrackerURL != None) and \
-          (self.__jobTrackerURL != 'not found'):
+    if self.__isValidJobTrackerURL():
       self.__log.debug('Got URL %s. Starting monitoring' % self.__jobTrackerURL)
       self.__jtMonitorThread.start()
 
@@ -129,6 +126,12 @@
       self.__log.debug('Exception while getting job statuses. %s' % get_exception_string())
     return jobStatusList
 
+  def __isValidJobTrackerURL(self):
+    """This method checks that the passed in URL is not one of the special case strings
+       returned by the getServiceAddr API"""
+    return ((self.__jobTrackerURL != None) and (self.__jobTrackerURL != 'not found') \
+              and (not self.__jobTrackerURL.startswith('Error')))
+
   def __extractJobStatus(self, line):
     """This method parses an output line from the job status command and creates
        the JobStatus object if there is a match"""

Modified: hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/ringMaster.py
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/ringMaster.py?rev=663331&r1=663330&r2=663331&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/ringMaster.py (original)
+++ hadoop/core/trunk/src/contrib/hod/hodlib/RingMaster/ringMaster.py Wed Jun  4 11:06:40 2008
@@ -421,7 +421,7 @@
     hodring to update any parameters
     its changed for the commands it was
     running"""
-    self.log.debug('Comment: adding master params')
+    self.log.debug('Comment: adding master params from %s' % addr)
     self.log.debug(pformat(vals))
     lock = self.masterParamLock
     lock.acquire()
@@ -431,7 +431,6 @@
           if (v.getMasterAddress() == addr):
             v.setMasterParams(vals)
             v.setMasterInitialized()
-
     except:
       self.log.debug(get_exception_string())
       pass
@@ -439,6 +438,27 @@
             
     return addr
 
+  def setHodRingErrors(self, addr, errors):
+    """This method is called by the hodrings to update errors 
+      it encountered while starting up"""
+    self.log.critical("Hodring at %s failed with following errors:\n%s" % (addr, errors))
+    lock = self.masterParamLock
+    lock.acquire()
+    try:
+      for v in self.serviceDict.itervalues():
+        if v.isMasterLaunched():
+          if (v.getMasterAddress() == addr):
+            # strip the PID part.
+            idx = addr.rfind('_')
+            if idx is not -1:
+              addr = addr[:idx]
+            v.setMasterFailed("Hodring at %s failed with following errors:\n%s" % (addr, errors))
+    except:
+      self.log.debug(get_exception_string())
+      pass
+    lock.release()
+    return True
+
   def getKeys(self):
     lock= self.masterParamLock
     lock.acquire()
@@ -458,7 +478,10 @@
       pass
     else:
       self.log.debug("getServiceAddr service: %s" % service)
-      if (service.isMasterInitialized()):
+      err = service.getMasterFailed()
+      if err is not None:
+        addr = "Error: " + err
+      elif (service.isMasterInitialized()):
         addr = service.getMasterAddrs()[0]
       else:
         addr = 'not found'
@@ -563,7 +586,10 @@
       # Determine hadoop Version
       hadoopVers = hadoopVersion(self.__getHadoopDir(), \
                                 self.cfg['hodring']['java-home'], self.log)
-      
+     
+      if (hadoopVers['major']==None) or (hadoopVers['minor']==None):
+        raise Exception('Could not retrive the version of Hadoop.'
+                        + ' Check the Hadoop installation or the value of the hodring.java-home variable.')
       if hdfsDesc.isExternal():
         hdfs = HdfsExternal(hdfsDesc, workDirs, version=int(hadoopVers['minor']))
         hdfs.setMasterParams( self.cfg['gridservice-hdfs'] )
@@ -888,9 +914,11 @@
   def __findExitCode(self):
     """Determine the exit code based on the status of the cluster or jobs run on them"""
     xmlrpcServer = ringMasterServer.instance.logMasterSources
-    if xmlrpcServer.getServiceAddr('hdfs') == 'not found':
+    if xmlrpcServer.getServiceAddr('hdfs') == 'not found' or \
+        xmlrpcServer.getServiceAddr('hdfs').startswith("Error: "):
       self.__exitCode = 7
-    elif xmlrpcServer.getServiceAddr('mapred') == 'not found':
+    elif xmlrpcServer.getServiceAddr('mapred') == 'not found' or \
+        xmlrpcServer.getServiceAddr('mapred').startswith("Error: "):
       self.__exitCode = 8
     else:
       clusterStatus = get_cluster_status(xmlrpcServer.getServiceAddr('hdfs'),

Modified: hadoop/core/trunk/src/contrib/hod/hodlib/ServiceRegistry/serviceRegistry.py
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/hodlib/ServiceRegistry/serviceRegistry.py?rev=663331&r1=663330&r2=663331&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/hodlib/ServiceRegistry/serviceRegistry.py (original)
+++ hadoop/core/trunk/src/contrib/hod/hodlib/ServiceRegistry/serviceRegistry.py Wed Jun  4 11:06:40 2008
@@ -19,9 +19,10 @@
 from hodlib.Common.threads import loop
 from hodlib.Common.tcp import tcpSocket
 from hodlib.Common.util import get_exception_string
+import logging
 
 class svcrgy(hodBaseService):
-    def __init__(self, config):
+    def __init__(self, config, log=None):
         hodBaseService.__init__(self, 'serviceRegistry', config)
         
         self.__serviceDict = {}
@@ -30,6 +31,10 @@
         self.__locked = {}
         
         self.__serviceDictLock = threading.Lock()
+        self.RMErrorMsgs = None # Ringmaster error messages
+        self.log = log
+        if self.log is None:
+          self.log = logging.getLogger()
     
     def __get_job_key(self, userid, job):
         return "%s-%s" % (userid, job)
@@ -40,7 +45,20 @@
     def _xr_method_getServiceInfo(self, userid=None, job=None, name=None, 
                                   type=None):
         return self.getServiceInfo(userid, job, name, type)
-        
+
+    def _xr_method_setRMError(self, args):
+        self.log.debug("setRMError called with %s" % args)
+        self.RMErrorMsgs = args
+        return True
+
+    def _xr_method_getRMError(self):
+        self.log.debug("getRMError called")
+        if self.RMErrorMsgs is not None:
+          return self.RMErrorMsgs
+        else:
+          self.log.debug("no Ringmaster error messages")
+          return False
+
     def registerService(self, userid, job, host, name, type, dict):
         """Method thats called upon by
         the ringmaster to register to the

Modified: hadoop/core/trunk/src/contrib/hod/testing/testHod.py
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/testing/testHod.py?rev=663331&r1=663330&r2=663331&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/testing/testHod.py (original)
+++ hadoop/core/trunk/src/contrib/hod/testing/testHod.py Wed Jun  4 11:06:40 2008
@@ -131,7 +131,7 @@
   def testInfoNonExistentDirectory(self):
     clusterDir = '/tmp/hod/testInfoNonExistentDirectory'
     self.client._op_info(['info', clusterDir])
-    self.assertTrue(self.log.hasMessage("'%s' is not a valid cluster directory." % (clusterDir), 'critical'))
+    self.assertTrue(self.log.hasMessage("Invalid hod.clusterdir(--hod.clusterdir or -d). %s : No such directory" % (clusterDir), 'critical'))
 
   # Test that deallocation works on a deleted cluster directory
   # by clearing the job, and removing the state
@@ -174,7 +174,7 @@
     self.client._op_deallocate(['deallocate', clusterDir])
     # there should be no call..
     self.assertFalse(self.cluster.wasOperationPerformed('delete_job', None))
-    self.assertTrue(self.log.hasMessage("'%s' is not a valid cluster directory." % (clusterDir), 'critical'))
+    self.assertTrue(self.log.hasMessage("Invalid hod.clusterdir(--hod.clusterdir or -d). %s : No such directory" % (clusterDir), 'critical'))
 
   # Test that allocation on an previously deleted directory fails.    
   def testAllocateOnDeletedDirectory(self):