You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2011/06/05 06:57:49 UTC
svn commit: r1131688 - in /incubator/mesos/trunk/frameworks/torque:
start_pbs_mom.py test_date_sleep_date_5node.qsub
test_date_sleep_date_5node_10sec.qsub test_date_sleep_date_5node_60sec.qsub
torquesched.py
Author: benh
Date: Sun Jun 5 04:57:49 2011
New Revision: 1131688
URL: http://svn.apache.org/viewvc?rev=1131688&view=rev
Log:
Torque framework now kills tasks correctly, also writes its own event_log showing how many nodes (i.e. tasks, which are all on distinct nodes) it currently has.
Added:
incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_10sec.qsub
- copied, changed from r1131687, incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub
incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_60sec.qsub
- copied, changed from r1131687, incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub
Removed:
incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub
Modified:
incubator/mesos/trunk/frameworks/torque/start_pbs_mom.py
incubator/mesos/trunk/frameworks/torque/torquesched.py
Modified: incubator/mesos/trunk/frameworks/torque/start_pbs_mom.py
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/frameworks/torque/start_pbs_mom.py?rev=1131688&r1=1131687&r2=1131688&view=diff
==============================================================================
--- incubator/mesos/trunk/frameworks/torque/start_pbs_mom.py (original)
+++ incubator/mesos/trunk/frameworks/torque/start_pbs_mom.py Sun Jun 5 04:57:49 2011
@@ -70,8 +70,10 @@ class MyExecutor(nexus.Executor):
print "running pbs_mom on compute node"
Popen("pbs_mom", shell=True)
- def killTask(self, driver, tid):
- sys.exit(1)
+ #def killTask(self, driver, tid):
+ #send a message back to the scheduler to tell it this task is dead
+ #msg = nexus.TaskStatus(tid, nexus.TASK_KILLED, "")
+ #driver.sendStatusUpdate(msg);
def shutdown(self, driver):
print "shutdown"
Copied: incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_10sec.qsub (from r1131687, incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub)
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_10sec.qsub?p2=incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_10sec.qsub&p1=incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub&r1=1131687&r2=1131688&rev=1131688&view=diff
==============================================================================
--- incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub (original)
+++ incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_10sec.qsub Sun Jun 5 04:57:49 2011
@@ -10,8 +10,8 @@
#print the time and date
date
-#wait 120 seconds
-sleep 5
+#wait 10 seconds
+sleep 10
#print the time and date again
date
Copied: incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_60sec.qsub (from r1131687, incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub)
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_60sec.qsub?p2=incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_60sec.qsub&p1=incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub&r1=1131687&r2=1131688&rev=1131688&view=diff
==============================================================================
--- incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub (original)
+++ incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_60sec.qsub Sun Jun 5 04:57:49 2011
@@ -10,8 +10,8 @@
#print the time and date
date
-#wait 120 seconds
-sleep 5
+#wait 60 seconds
+sleep 60
#print the time and date again
date
Modified: incubator/mesos/trunk/frameworks/torque/torquesched.py
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/frameworks/torque/torquesched.py?rev=1131688&r1=1131687&r2=1131688&view=diff
==============================================================================
--- incubator/mesos/trunk/frameworks/torque/torquesched.py (original)
+++ incubator/mesos/trunk/frameworks/torque/torquesched.py Sun Jun 5 04:57:49 2011
@@ -81,6 +81,11 @@ class MyScheduler(nexus.Scheduler):
self.servers[self.id] = offer.host
self.regComputeNode(offer.host)
self.numToRegister -= 1
+ #HUGE HACK HERE. THIS IS BAD!
+ if self.numToRegister == 0:# and len(torquelib.getActiveJobs()) == 1:
+ #submit job that will fail because it is asking for too many resources
+ time.sleep(8)
+ Popen("echo date | qsub -l nodes=1", shell=True)
self.id += 1
driverlog.info("writing logfile")
eventlog.info(len(self.servers))
@@ -134,6 +139,7 @@ class MyScheduler(nexus.Scheduler):
monitorlog.info("We still have to kill %d of the %d compute nodes which master is tracking" % (toKill, len(self.servers)))
monitorlog.info("unregistering node " + str(hostname))
self.unregComputeNode(hostname)
+ eventlog.info(len(sched.servers))
self.servers.pop(tid)
toKill = toKill - 1
monitorlog.info("killing corresponding task with tid %d" % tid)