You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2011/06/05 06:57:49 UTC

svn commit: r1131688 - in /incubator/mesos/trunk/frameworks/torque: start_pbs_mom.py test_date_sleep_date_5node.qsub test_date_sleep_date_5node_10sec.qsub test_date_sleep_date_5node_60sec.qsub torquesched.py

Author: benh
Date: Sun Jun  5 04:57:49 2011
New Revision: 1131688

URL: http://svn.apache.org/viewvc?rev=1131688&view=rev
Log:
Torque framework now kills tasks correctly, also writes its own event_log showing how many nodes (i.e. tasks, which are all on distinct nodes) it currently has.

Added:
    incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_10sec.qsub
      - copied, changed from r1131687, incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub
    incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_60sec.qsub
      - copied, changed from r1131687, incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub
Removed:
    incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub
Modified:
    incubator/mesos/trunk/frameworks/torque/start_pbs_mom.py
    incubator/mesos/trunk/frameworks/torque/torquesched.py

Modified: incubator/mesos/trunk/frameworks/torque/start_pbs_mom.py
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/frameworks/torque/start_pbs_mom.py?rev=1131688&r1=1131687&r2=1131688&view=diff
==============================================================================
--- incubator/mesos/trunk/frameworks/torque/start_pbs_mom.py (original)
+++ incubator/mesos/trunk/frameworks/torque/start_pbs_mom.py Sun Jun  5 04:57:49 2011
@@ -70,8 +70,10 @@ class MyExecutor(nexus.Executor):
     print "running pbs_mom on compute node"
     Popen("pbs_mom", shell=True)
 
-  def killTask(self, driver, tid):
-    sys.exit(1)
+  #def killTask(self, driver, tid):
+    #send a message back to the scheduler to tell it this task is dead
+    #msg = nexus.TaskStatus(tid, nexus.TASK_KILLED, "")
+    #driver.sendStatusUpdate(msg);
 
   def shutdown(self, driver):
     print "shutdown"

Copied: incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_10sec.qsub (from r1131687, incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub)
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_10sec.qsub?p2=incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_10sec.qsub&p1=incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub&r1=1131687&r2=1131688&rev=1131688&view=diff
==============================================================================
--- incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub (original)
+++ incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_10sec.qsub Sun Jun  5 04:57:49 2011
@@ -10,8 +10,8 @@
 #print the time and date
 date
 
-#wait 120 seconds
-sleep 5 
+#wait 10 seconds
+sleep 10 
 
 #print the time and date again
 date

Copied: incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_60sec.qsub (from r1131687, incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub)
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_60sec.qsub?p2=incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_60sec.qsub&p1=incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub&r1=1131687&r2=1131688&rev=1131688&view=diff
==============================================================================
--- incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node.qsub (original)
+++ incubator/mesos/trunk/frameworks/torque/test_date_sleep_date_5node_60sec.qsub Sun Jun  5 04:57:49 2011
@@ -10,8 +10,8 @@
 #print the time and date
 date
 
-#wait 120 seconds
-sleep 5 
+#wait 60 seconds
+sleep 60 
 
 #print the time and date again
 date

Modified: incubator/mesos/trunk/frameworks/torque/torquesched.py
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/frameworks/torque/torquesched.py?rev=1131688&r1=1131687&r2=1131688&view=diff
==============================================================================
--- incubator/mesos/trunk/frameworks/torque/torquesched.py (original)
+++ incubator/mesos/trunk/frameworks/torque/torquesched.py Sun Jun  5 04:57:49 2011
@@ -81,6 +81,11 @@ class MyScheduler(nexus.Scheduler):
       self.servers[self.id] = offer.host
       self.regComputeNode(offer.host)
       self.numToRegister -= 1
+      #HUGE HACK HERE. THIS IS BAD!
+      if self.numToRegister == 0:# and len(torquelib.getActiveJobs()) == 1:
+        #submit job that will fail because it is asking for too many resources
+        time.sleep(8)
+        Popen("echo date | qsub -l nodes=1", shell=True)
       self.id += 1
       driverlog.info("writing logfile")
       eventlog.info(len(self.servers))
@@ -134,6 +139,7 @@ class MyScheduler(nexus.Scheduler):
         monitorlog.info("We still have to kill %d of the %d compute nodes which master is tracking" % (toKill, len(self.servers)))
         monitorlog.info("unregistering node " + str(hostname))
         self.unregComputeNode(hostname)
+        eventlog.info(len(sched.servers))
         self.servers.pop(tid)
         toKill = toKill - 1
         monitorlog.info("killing corresponding task with tid %d" % tid)