You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by cw...@apache.org on 2017/01/16 20:05:49 UTC

svn commit: r1779094 - /uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java

Author: cwiklik
Date: Mon Jan 16 20:05:49 2017
New Revision: 1779094

URL: http://svn.apache.org/viewvc?rev=1779094&view=rev
Log:
UIMA-5157 fixed cause for agent not stopping on admin stop request

Modified:
    uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java

Modified: uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java?rev=1779094&r1=1779093&r2=1779094&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java Mon Jan 16 20:05:49 2017
@@ -51,6 +51,7 @@ import org.apache.uima.ducc.agent.config
 import org.apache.uima.ducc.agent.event.AgentEventListener;
 import org.apache.uima.ducc.agent.event.ProcessLifecycleObserver;
 import org.apache.uima.ducc.agent.launcher.CGroupsManager;
+import org.apache.uima.ducc.agent.launcher.DuccCommandExecutor;
 import org.apache.uima.ducc.agent.launcher.Launcher;
 import org.apache.uima.ducc.agent.launcher.ManagedProcess;
 import org.apache.uima.ducc.agent.launcher.ManagedProcess.StopPriority;
@@ -309,7 +310,7 @@ public class NodeAgent extends AbstractD
                 // memory and cpu subsystem
                 String cgroupsSubsystems = System.getProperty("ducc.agent.launcher.cgroups.subsystems");
                 if (cgroupsSubsystems == null) {
-                  cgroupsSubsystems = "memory,cpu";
+                  cgroupsSubsystems = "memory,cpu,cpuacct";
                 }
         		long maxTimeToWaitForProcessToStop = 60000; // default 1 minute
         		if (configurationFactory.processStopTimeout != null) {
@@ -1403,7 +1404,7 @@ public class NodeAgent extends AbstractD
  	  }
   }
   
-  private boolean sendSIGTERM(ManagedProcess process) {
+  private boolean runnable(ManagedProcess process) {
 	  return ( process.getDuccProcess().getProcessState().equals(ProcessState.Initializing) ||
 			   process.getDuccProcess().getProcessState().equals(ProcessState.Starting) ||
 			   process.getDuccProcess().getProcessState().equals(ProcessState.Running) );
@@ -1421,7 +1422,7 @@ public class NodeAgent extends AbstractD
 	  try {
 	      for (ManagedProcess deployedProcess : deployedProcesses) {
             String pid = deployedProcess.getDuccProcess().getPID();
-            if (pid == null || pid.trim().length() == 0 || !sendSIGTERM(deployedProcess) ) {
+            if (pid == null || pid.trim().length() == 0 || !runnable(deployedProcess) ) {
             	continue;
             }
             logger.info(methodName, null, "....Stopping Process - DuccId:" + deployedProcess.getDuccProcess().getDuccId()
@@ -1438,6 +1439,48 @@ public class NodeAgent extends AbstractD
 	  }
 	  return wait;
   }
+      
+      private void killChildProcesses() {
+    	  String methodName = "killChildProcesses";
+    	  
+
+    	  try {
+      	    if ( useCgroups ) {
+    	        logger.info("stop", null, "CgroupsManager.cleanup() before ");
+    	        // Since SIGTERM may not be enough to take down a process, use cgroups to find
+    	        // any process still standing and do hard kill 
+    	        cgroupsManager.cleanup();
+    	        logger.info("stop", null, "CgroupsManager.cleanup() after ");
+    	    } else {
+      	      for (ManagedProcess deployedProcess : deployedProcesses) {
+                  String pid = deployedProcess.getDuccProcess().getPID();
+                  if (pid == null || pid.trim().length() == 0 || !runnable(deployedProcess) ) {
+                  	continue;
+                  }
+                  logger.info(methodName, null, "....Stopping Process - DuccId:" + deployedProcess.getDuccProcess().getDuccId()
+      	                    + " PID:" + pid+" Sending SIGKILL Process State:"+deployedProcess.getDuccProcess().getProcessState().toString());
+      	          ICommandLine cmdLine;
+      	            if (Utils.isWindows()) {
+      	              cmdLine = new NonJavaCommandLine("taskkill");
+      	              cmdLine.addArgument("/PID");
+      	            } else {
+      	              cmdLine = new NonJavaCommandLine("/bin/kill");
+      	              cmdLine.addArgument("-9");
+      	            }
+      	            cmdLine.addArgument(pid);
+
+      	        deployedProcess.setStopping();
+      			deployedProcess.setStopPriority(StopPriority.DONT_WAIT);
+      			
+                  launcher.launchProcess(this, getIdentity(),deployedProcess.getDuccProcess(), cmdLine, this, deployedProcess);
+      	      }
+
+    	    }
+    	  } catch( Exception e) {
+    		logger.warn(methodName, null, e);  
+    	  }
+    	          	  
+      }
   /**
    * Kills a given process
    * 
@@ -1793,13 +1836,23 @@ public class NodeAgent extends AbstractD
       return;
     }
     stopping = true;
+
+    // Send an empty process map as the final inventory 
+    HashMap<DuccId, IDuccProcess> emptyMap = 
+    		new HashMap<DuccId, IDuccProcess>();
+    DuccEvent duccEvent = new NodeInventoryUpdateDuccEvent(emptyMap,getLastORSequence(), getIdentity());
+    inventoryDispatcher.dispatch(duccEvent);
+    logger.info("stop", null, "Agent published final inventory");
+    
+    configurationFactory.stopRoutes();
+    
     logger.info("stop", null, "Agent stopping managed processes");
     // Dispatch SIGTERM to all child processes
     boolean wait = stopChildProcesses();
     
     // Stop publishing inventory. Once the route is down the agent forces last publication
     // sending an empty process map.
-    configurationFactory.stopInventoryRoute();
+    //configurationFactory.stopInventoryRoute();
 
     if ( wait && deployedProcesses.size() > 0 ) {
         logger.info("stop", null, "Agent Sent SIGTERM to ALL Child Processes - Number of Deployed Processes:"+deployedProcesses.size());
@@ -1818,17 +1871,9 @@ public class NodeAgent extends AbstractD
       }
     }
 
-    // Send an empty process map as the final inventory 
-    HashMap<DuccId, IDuccProcess> emptyMap = 
-    		new HashMap<DuccId, IDuccProcess>();
-    DuccEvent duccEvent = new NodeInventoryUpdateDuccEvent(emptyMap,getLastORSequence(), getIdentity());
-    inventoryDispatcher.dispatch(duccEvent);
-    logger.info("stop", null, "Agent published final inventory");
+    // send kill -9 to any child process still running
+    killChildProcesses();
 
-    // Since SIGTERM may not be enough to take down a process, use cgroups to find
-    // any process still standing and do hard kill 
-    cgroupsManager.cleanup();
-   
     // Self destruct thread in case we loose AMQ broker and AMQ listener gets into retry
     // mode trying to recover a connection
     Thread t = new Thread( new Runnable() {
@@ -1845,7 +1890,9 @@ public class NodeAgent extends AbstractD
     	}
     });
     t.start();
-    
+    t.join(10000);
+    logger.info("stop", null, "Reaper thread finished - calling super.stop()");
+
     super.stop();
   }