You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by de...@apache.org on 2016/09/06 21:01:26 UTC

svn commit: r1759510 - in /uima/uima-ducc/trunk: src/main/admin/ src/main/resources/ uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/ uima-ducc-common/src/main/java/org/apache/uima/ducc/common/config/ uima-ducc-common/src/main/java/org/apache/uima...

Author: degenaro
Date: Tue Sep  6 21:01:26 2016
New Revision: 1759510

URL: http://svn.apache.org/viewvc?rev=1759510&view=rev
Log:
UIMA-5060 DUCC Orchestrator (OR) "warm" restart issues

- eliminate (hidden) "hot" start keyword and transform "warm" start into recover all
- job monitor should be prepared for briefly regressive information from WS when polling for work item status after OR "warm" start
- start_ducc should give help information for OR specific --warm and --cold options

Added:
    uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java   (with props)
Modified:
    uima/uima-ducc/trunk/src/main/admin/ducc.py
    uima/uima-ducc/trunk/src/main/admin/start_ducc
    uima/uima-ducc/trunk/src/main/resources/default.ducc.properties
    uima/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/DuccMonitor.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/config/CommonConfiguration.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/json/MonitorInfo.java
    uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-database.tex
    uima/uima-ducc/trunk/uima-ducc-examples/src/main/scripts/start_sim
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorConstants.java

Modified: uima/uima-ducc/trunk/src/main/admin/ducc.py
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/ducc.py?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/ducc.py (original)
+++ uima/uima-ducc/trunk/src/main/admin/ducc.py Tue Sep  6 21:01:26 2016
@@ -294,7 +294,7 @@ class Ducc(DuccUtil):
         print '   -o <mem-in-GB> rm memory override for use on small machines'
         print '   -k causes the entire DUCC system to shutdown'
         print '   --nodup If specified, do not start a process if it appears to be already started.'
-        print '   --or_parms [cold|warm|hot]'
+        print '   --or_parms [cold|warm]'
         print '   --simtest If specified, use unblocked broker for sim tests.'
         print '   arguments - any additional arguments to pass to the component.'
         sys.exit(1)

Modified: uima/uima-ducc/trunk/src/main/admin/start_ducc
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/start_ducc?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/start_ducc (original)
+++ uima/uima-ducc/trunk/src/main/admin/start_ducc Tue Sep  6 21:01:26 2016
@@ -198,6 +198,16 @@ class StartDucc(DuccUtil):
         print "    --nothreading"
         print "        Disable multithreaded operation if it would otherwise be used"
         print ""
+        print "    Choose none or one of the following two options, which is only effective when the orchestrator (or) component is started."
+        print "    When specified here it supersedes that specified for ducc.orchestrator.start.type in ducc.properties."
+        print "    When not specified here or in ducc.properties, the default is --warm."
+        print ""
+        print "    --warm"
+        print "        Do NOT force active Jobs, Services, and Reservations to Completed state."
+        print ""
+        print "    --cold"
+        print "        Force active Jobs, Services, and Reservations to Completed state."
+        print ""
         print "Examples:"
         print "   Start all DUCC processes, using custom nodelists:"
         print "       start_ducc -n foo.nodes -n bar.nodes"
@@ -235,7 +245,7 @@ class StartDucc(DuccUtil):
         self.pids.load_if_exists(self.pid_file)
         
         try:
-            opts, args = getopt.getopt(argv, 'c:mn:sh?v', ['component=', 'components=', 'help', 'nodelist=', 'cold', 'warm', 'hot', 'nothreading'])
+            opts, args = getopt.getopt(argv, 'c:mn:sh?v', ['component=', 'components=', 'help', 'nodelist=', 'cold', 'warm', 'nothreading'])
         except:
             self.invalid('Invalid arguments', ' '.join(argv))
                        
@@ -246,7 +256,7 @@ class StartDucc(DuccUtil):
                 nodefiles.append(a)
             elif o in ( '--nothreading' ):
                 self.disable_threading()
-            elif o in ( '--cold', '--warm', '--hot' ):
+            elif o in ( '--cold', '--warm' ):
                 or_parms = o[2:]         # (strip the leading --)
             elif ( o == '-v'):
                 print self.version()

Modified: uima/uima-ducc/trunk/src/main/resources/default.ducc.properties
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/resources/default.ducc.properties?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/resources/default.ducc.properties (original)
+++ uima/uima-ducc/trunk/src/main/resources/default.ducc.properties Tue Sep  6 21:01:26 2016
@@ -527,18 +527,14 @@ ducc.sm.default.linger=300000
 ducc.orchestrator.configuration.class=org.apache.uima.ducc.orchestrator.config.OrchestratorConfiguration
 
 # This indicates the level of recovery to be taken on restarting a
-# system. There are two levels of startup:
+# system. There are three levels of startup:
 #     [cold] All reservations are canceled, all currently running
 #            jobs (if any) are terminated. All services are terminated. The
 #            system starts with no jobs, reservations, or services active.
-#     [warm] All unmanaged reservations are restored. All currently
-#            running jobs (if any) are terminated. All services are started or
-#            restarted as indicated by their state when the system went down.
-#            The system starts with no jobs active, but unmanaged reservations
-#            and services are preserved.
+#     [warm] All active work is continued.
 #------------------------------------------------------------------------------
-#	cold, // Recover: All is lost			         		    JD host: employ new
-#	warm, // Recover: Unmanaged Reservations only (default) 	JD host: employ new
+#	cold, // Recover: All active are forced to Completed	    JD host: employ new
+#   warm, // Recover: All Jobs+Services+Reservations (default)  JD host: employ current
 ducc.orchestrator.start.type=warm
 #------------------------------------------------------------------------------
 

Modified: uima/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/DuccMonitor.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/DuccMonitor.java?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/DuccMonitor.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/DuccMonitor.java Tue Sep  6 21:01:26 2016
@@ -79,6 +79,8 @@ public abstract class DuccMonitor {
 	private DuccContext context = null;
 	protected IDuccCallback messageProcessor = null;
 
+	private volatile MonitorInfo previousMonitorInfo = null;
+	
 	private String delayedRunning = null;
 	
 	private SynchronizedSimpleDateFormat sdf = new SynchronizedSimpleDateFormat(
@@ -369,6 +371,16 @@ public abstract class DuccMonitor {
 			}
 
             if ( monitorInfo != null ) {
+            	// It is  possible after OR "warm" start that
+            	// work item processing status information 
+            	// may be missing or incorrect for a short time.
+            	// Therefore, we assure that newly arrived
+            	// information is not a regression from the
+            	// last good one received, if any.
+            	if(monitorInfo.isRegression(previousMonitorInfo)) {
+            		continue;
+            	}
+            	previousMonitorInfo = monitorInfo;
             	displayRemotePids(monitorInfo);
 				int stateCount = monitorInfo.stateSequence.size();
 				debug("states:" + stateCount);

Modified: uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/config/CommonConfiguration.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/config/CommonConfiguration.java?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/config/CommonConfiguration.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/config/CommonConfiguration.java Tue Sep  6 21:01:26 2016
@@ -63,7 +63,7 @@ public class CommonConfiguration {
 	@Value("#{ systemProperties['ducc.orchestrator.checkpoint'] }")
 	public String orchestratorCheckpoint;
 	
-	//	fetch the orchestrator start type (cold/warm/hot)
+	//	fetch the orchestrator start type (cold/warm)
 	@Value("#{ systemProperties['ducc.orchestrator.start.type'] }")
 	public String orchestratorStartType;
 	

Modified: uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/json/MonitorInfo.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/json/MonitorInfo.java?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/json/MonitorInfo.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/json/MonitorInfo.java Tue Sep  6 21:01:26 2016
@@ -34,4 +34,49 @@ public class MonitorInfo {
 	public List<String> remotePids = new ArrayList<String>();
 	public List<String> errorLogs = new ArrayList<String>();
 	public List<String> nodes = new ArrayList<String>();
+	
+	private int compareStringNumbers(String sn1, String sn2) {
+		int retVal = 0;
+		try {
+			Integer n1 = Integer.parseInt(sn1);
+			Integer n2 = Integer.parseInt(sn2);
+			retVal = n1.compareTo(n2);
+		}
+		catch(Exception e) {
+			// oh well...
+		}
+		return retVal;
+	}
+	
+	public int compareWith(MonitorInfo that) {
+		int retVal = 0;
+		try {
+			if(retVal == 0) {
+				retVal = compareStringNumbers(this.total, that.total);
+			}
+			if(retVal == 0) {
+				retVal = compareStringNumbers(this.done, that.done);
+			}
+			if(retVal == 0) {
+				retVal = compareStringNumbers(this.error, that.error);
+			}
+			if(retVal == 0) {
+				retVal = compareStringNumbers(this.retry, that.retry);
+			}
+		}
+		catch(Exception e) {
+			// oh well...
+		}
+		return retVal;
+	}
+	
+	public boolean isRegression(MonitorInfo that) {
+		boolean retVal = false;
+		if(that != null) {
+			if(compareWith(that) < 0) {
+				retVal = true;
+			}
+		}
+		return retVal;
+	}
 }

Added: uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java?rev=1759510&view=auto
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java (added)
+++ uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java Tue Sep  6 21:01:26 2016
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+package org.apache.uima.ducc.common.test;
+
+import static org.junit.Assert.*;
+
+import org.apache.uima.ducc.common.json.MonitorInfo;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class MonitorInfoTest {
+
+	@Before
+	public void setUp() throws Exception {
+	}
+
+	@After
+	public void tearDown() throws Exception {
+	}
+
+	@Test
+	public void test0() {
+		MonitorInfo m0 = new MonitorInfo();
+		assertFalse(m0.isRegression(null));
+	}
+	
+	@Test
+	public void test1() {
+		MonitorInfo m0 = new MonitorInfo();
+		MonitorInfo m1 = new MonitorInfo();
+		assertFalse(m1.isRegression(m0));
+	}
+	
+	@Test
+	public void test2() {
+		MonitorInfo m0 = new MonitorInfo();
+		MonitorInfo m1 = new MonitorInfo();
+		m0.total = "0";
+		m1.total = "1";
+		assertFalse(m1.isRegression(m0));
+		m0.total = "1";
+		m1.total = "0";
+		assertTrue(m1.isRegression(m0));
+	}
+	
+	@Test
+	public void test3() {
+		MonitorInfo m0 = new MonitorInfo();
+		MonitorInfo m1 = new MonitorInfo();
+		m0.done = "0";
+		m1.done = "1";
+		assertFalse(m1.isRegression(m0));
+		m0.done = "1";
+		m1.done = "0";
+		assertTrue(m1.isRegression(m0));
+	}
+	
+	@Test
+	public void test4() {
+		MonitorInfo m0 = new MonitorInfo();
+		MonitorInfo m1 = new MonitorInfo();
+		m0.error = "0";
+		m1.error = "1";
+		assertFalse(m1.isRegression(m0));
+		m0.error = "1";
+		m1.error = "0";
+		assertTrue(m1.isRegression(m0));
+	}
+	
+	@Test
+	public void test5() {
+		MonitorInfo m0 = new MonitorInfo();
+		MonitorInfo m1 = new MonitorInfo();
+		m0.retry = "0";
+		m1.retry = "1";
+		assertFalse(m1.isRegression(m0));
+		m0.retry = "1";
+		m1.retry = "0";
+		assertTrue(m1.isRegression(m0));
+	}
+}

Propchange: uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-database.tex
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-database.tex?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-database.tex (original)
+++ uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-database.tex Tue Sep  6 21:01:26 2016
@@ -43,8 +43,8 @@
         on previously run jobs.  Prior to the database, this data was saved in the
         {\em DUCC\_HOME/history directory}.
       \item Checkpoint.  On every state change, the Orchestrator saves the state of 
-        all running and allocated work in the system.  This is used to recover reservations
-        when DUCC is started, and to allow hot-start of the Orchestrator without losing work.
+        all running and allocated work in the system.  This is used to recover active allocations
+        when DUCC is started enabling warm-start of the Orchestrator without losing work.
         Prior to the database, this data was saved in the file {\em DUCC\_HOME/state/orchestrator.ckpt}.
     \end{enumerate}
     

Modified: uima/uima-ducc/trunk/uima-ducc-examples/src/main/scripts/start_sim
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-examples/src/main/scripts/start_sim?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-examples/src/main/scripts/start_sim (original)
+++ uima/uima-ducc/trunk/uima-ducc-examples/src/main/scripts/start_sim Tue Sep  6 21:01:26 2016
@@ -314,7 +314,7 @@ class StartSim(DuccUtil):
         try:
             opts, args = getopt.getopt(argv, 'c:i:n:vh?', ['component=', 'help', 'agent', 'memory=', 
                                                           'instance=', 'addr=', 'pseudoname=', 'node-config=', 
-                                                          'version', 'hot', 'warm', 'cold',
+                                                          'version', 'warm', 'cold',
                                                            'nothreading'])
         except:
             self.invalid('Invalid arguments', ' '.join(argv))
@@ -343,7 +343,7 @@ class StartSim(DuccUtil):
             elif o in ( '-v', '--version' ):
                 print self.version()
                 os.exit(0)
-            elif o in ( '--hot', '--warm', '--cold' ):
+            elif o in ( '--warm', '--cold' ):
                 or_parms = o[2:]
             elif o in ( '-h', '--help' ):
                 self.usage(None)

Modified: uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java Tue Sep  6 21:01:26 2016
@@ -166,9 +166,6 @@ implements Orchestrator {
 			else if(startTypeProperty.equals("warm")) {
 				startType = StartType.warm;
 			}
-			else if(startTypeProperty.equals("hot")) {
-				startType = StartType.hot;
-			}
 			else {
 				logger.warn(methodName, null, "ducc.orchestrator.start.type value in ducc.properties not recognized: "+property);
 			}
@@ -225,9 +222,6 @@ implements Orchestrator {
 				else if(flag.equals(StartType.warm.toString())) {
 					startType = StartType.warm;
 				}
-				else if(flag.equals(StartType.hot.toString())) {
-					startType = StartType.hot;
-				}
 				else {
 					logger.warn(methodName, null, "unrecognized arg: "+arg);
 				}
@@ -292,10 +286,6 @@ implements Orchestrator {
 							saveState = true;
 							break;
 						case warm:
-							force(job, new Rationale("system warm start"));
-							saveState = true;
-							break;
-						case hot:
 							break;
 						}
 						break;
@@ -307,12 +297,6 @@ implements Orchestrator {
 							saveState = true;
 							break;
 						case warm:
-							if(reservation.isJdReservation()) {
-								cancel(reservation);
-								saveState = true;
-							}
-							break;
-						case hot:
 							break;
 						}
 						break;
@@ -327,10 +311,9 @@ implements Orchestrator {
 			}
 			switch(startType) {
 			case cold:
-			case warm:
 				jdScheduler = JdScheduler.getInstance();
 				break;
-			case hot:
+			case warm:
 				jdScheduler = JdScheduler.getInstance();
 				jdScheduler.restore();
 				break;

Modified: uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorConstants.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorConstants.java?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorConstants.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorConstants.java Tue Sep  6 21:01:26 2016
@@ -26,7 +26,6 @@ public interface OrchestratorConstants e
 	
 		public enum StartType {
 			cold, // Recover: All is lost					JD host: employ new
-			warm, // Recover: Reservations only (default) 	JD host: employ new
-			hot , // Recover: Reservations and Jobs, 		JD host: employ current
+			warm, // Recover: All is recovered		 		JD host: employ current
 		}
 }