You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by de...@apache.org on 2016/09/06 21:01:26 UTC
svn commit: r1759510 - in /uima/uima-ducc/trunk: src/main/admin/
src/main/resources/ uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/
uima-ducc-common/src/main/java/org/apache/uima/ducc/common/config/
uima-ducc-common/src/main/java/org/apache/uima...
Author: degenaro
Date: Tue Sep 6 21:01:26 2016
New Revision: 1759510
URL: http://svn.apache.org/viewvc?rev=1759510&view=rev
Log:
UIMA-5060 DUCC Orchestrator (OR) "warm" restart issues
- eliminate (hidden) "hot" start keyword and transform "warm" start into recover all
- job monitor should be prepared for briefly regressive information from WS when polling for work item status after OR "warm" start
- start_ducc should give help information for OR specific --warm and --cold options
Added:
uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java (with props)
Modified:
uima/uima-ducc/trunk/src/main/admin/ducc.py
uima/uima-ducc/trunk/src/main/admin/start_ducc
uima/uima-ducc/trunk/src/main/resources/default.ducc.properties
uima/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/DuccMonitor.java
uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/config/CommonConfiguration.java
uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/json/MonitorInfo.java
uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-database.tex
uima/uima-ducc/trunk/uima-ducc-examples/src/main/scripts/start_sim
uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java
uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorConstants.java
Modified: uima/uima-ducc/trunk/src/main/admin/ducc.py
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/ducc.py?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/ducc.py (original)
+++ uima/uima-ducc/trunk/src/main/admin/ducc.py Tue Sep 6 21:01:26 2016
@@ -294,7 +294,7 @@ class Ducc(DuccUtil):
print ' -o <mem-in-GB> rm memory override for use on small machines'
print ' -k causes the entire DUCC system to shutdown'
print ' --nodup If specified, do not start a process if it appears to be already started.'
- print ' --or_parms [cold|warm|hot]'
+ print ' --or_parms [cold|warm]'
print ' --simtest If specified, use unblocked broker for sim tests.'
print ' arguments - any additional arguments to pass to the component.'
sys.exit(1)
Modified: uima/uima-ducc/trunk/src/main/admin/start_ducc
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/start_ducc?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/start_ducc (original)
+++ uima/uima-ducc/trunk/src/main/admin/start_ducc Tue Sep 6 21:01:26 2016
@@ -198,6 +198,16 @@ class StartDucc(DuccUtil):
print " --nothreading"
print " Disable multithreaded operation if it would otherwise be used"
print ""
+ print " Choose none or one of the following two options, which is only effective when the orchestrator (or) component is started."
+ print " When specified here it supersedes that specified for ducc.orchestrator.start.type in ducc.properties."
+ print " When not specified here or in ducc.properties, the default is --warm."
+ print ""
+ print " --warm"
+ print " Do NOT force active Jobs, Services, and Reservations to Completed state."
+ print ""
+ print " --cold"
+ print " Force active Jobs, Services, and Reservations to Completed state."
+ print ""
print "Examples:"
print " Start all DUCC processes, using custom nodelists:"
print " start_ducc -n foo.nodes -n bar.nodes"
@@ -235,7 +245,7 @@ class StartDucc(DuccUtil):
self.pids.load_if_exists(self.pid_file)
try:
- opts, args = getopt.getopt(argv, 'c:mn:sh?v', ['component=', 'components=', 'help', 'nodelist=', 'cold', 'warm', 'hot', 'nothreading'])
+ opts, args = getopt.getopt(argv, 'c:mn:sh?v', ['component=', 'components=', 'help', 'nodelist=', 'cold', 'warm', 'nothreading'])
except:
self.invalid('Invalid arguments', ' '.join(argv))
@@ -246,7 +256,7 @@ class StartDucc(DuccUtil):
nodefiles.append(a)
elif o in ( '--nothreading' ):
self.disable_threading()
- elif o in ( '--cold', '--warm', '--hot' ):
+ elif o in ( '--cold', '--warm' ):
or_parms = o[2:] # (strip the leading --)
elif ( o == '-v'):
print self.version()
Modified: uima/uima-ducc/trunk/src/main/resources/default.ducc.properties
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/resources/default.ducc.properties?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/resources/default.ducc.properties (original)
+++ uima/uima-ducc/trunk/src/main/resources/default.ducc.properties Tue Sep 6 21:01:26 2016
@@ -527,18 +527,14 @@ ducc.sm.default.linger=300000
ducc.orchestrator.configuration.class=org.apache.uima.ducc.orchestrator.config.OrchestratorConfiguration
# This indicates the level of recovery to be taken on restarting a
-# system. There are two levels of startup:
+# system. There are three levels of startup:
# [cold] All reservations are canceled, all currently running
# jobs (if any) are terminated. All services are terminated. The
# system starts with no jobs, reservations, or services active.
-# [warm] All unmanaged reservations are restored. All currently
-# running jobs (if any) are terminated. All services are started or
-# restarted as indicated by their state when the system went down.
-# The system starts with no jobs active, but unmanaged reservations
-# and services are preserved.
+# [warm] All active work is continued.
#------------------------------------------------------------------------------
-# cold, // Recover: All is lost JD host: employ new
-# warm, // Recover: Unmanaged Reservations only (default) JD host: employ new
+# cold, // Recover: All active are forced to Completed JD host: employ new
+# warm, // Recover: All Jobs+Services+Reservations (default) JD host: employ current
ducc.orchestrator.start.type=warm
#------------------------------------------------------------------------------
Modified: uima/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/DuccMonitor.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/DuccMonitor.java?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/DuccMonitor.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/DuccMonitor.java Tue Sep 6 21:01:26 2016
@@ -79,6 +79,8 @@ public abstract class DuccMonitor {
private DuccContext context = null;
protected IDuccCallback messageProcessor = null;
+ private volatile MonitorInfo previousMonitorInfo = null;
+
private String delayedRunning = null;
private SynchronizedSimpleDateFormat sdf = new SynchronizedSimpleDateFormat(
@@ -369,6 +371,16 @@ public abstract class DuccMonitor {
}
if ( monitorInfo != null ) {
+ // It is possible after OR "warm" start that
+ // work item processing status information
+ // may be missing or incorrect for a short time.
+ // Therefore, we assure that newly arrived
+ // information is not a regression from the
+ // last good one received, if any.
+ if(monitorInfo.isRegression(previousMonitorInfo)) {
+ continue;
+ }
+ previousMonitorInfo = monitorInfo;
displayRemotePids(monitorInfo);
int stateCount = monitorInfo.stateSequence.size();
debug("states:" + stateCount);
Modified: uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/config/CommonConfiguration.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/config/CommonConfiguration.java?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/config/CommonConfiguration.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/config/CommonConfiguration.java Tue Sep 6 21:01:26 2016
@@ -63,7 +63,7 @@ public class CommonConfiguration {
@Value("#{ systemProperties['ducc.orchestrator.checkpoint'] }")
public String orchestratorCheckpoint;
- // fetch the orchestrator start type (cold/warm/hot)
+ // fetch the orchestrator start type (cold/warm)
@Value("#{ systemProperties['ducc.orchestrator.start.type'] }")
public String orchestratorStartType;
Modified: uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/json/MonitorInfo.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/json/MonitorInfo.java?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/json/MonitorInfo.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/json/MonitorInfo.java Tue Sep 6 21:01:26 2016
@@ -34,4 +34,49 @@ public class MonitorInfo {
public List<String> remotePids = new ArrayList<String>();
public List<String> errorLogs = new ArrayList<String>();
public List<String> nodes = new ArrayList<String>();
+
+ private int compareStringNumbers(String sn1, String sn2) {
+ int retVal = 0;
+ try {
+ Integer n1 = Integer.parseInt(sn1);
+ Integer n2 = Integer.parseInt(sn2);
+ retVal = n1.compareTo(n2);
+ }
+ catch(Exception e) {
+ // oh well...
+ }
+ return retVal;
+ }
+
+ public int compareWith(MonitorInfo that) {
+ int retVal = 0;
+ try {
+ if(retVal == 0) {
+ retVal = compareStringNumbers(this.total, that.total);
+ }
+ if(retVal == 0) {
+ retVal = compareStringNumbers(this.done, that.done);
+ }
+ if(retVal == 0) {
+ retVal = compareStringNumbers(this.error, that.error);
+ }
+ if(retVal == 0) {
+ retVal = compareStringNumbers(this.retry, that.retry);
+ }
+ }
+ catch(Exception e) {
+ // oh well...
+ }
+ return retVal;
+ }
+
+ public boolean isRegression(MonitorInfo that) {
+ boolean retVal = false;
+ if(that != null) {
+ if(compareWith(that) < 0) {
+ retVal = true;
+ }
+ }
+ return retVal;
+ }
}
Added: uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java?rev=1759510&view=auto
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java (added)
+++ uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java Tue Sep 6 21:01:26 2016
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+*/
+package org.apache.uima.ducc.common.test;
+
+import static org.junit.Assert.*;
+
+import org.apache.uima.ducc.common.json.MonitorInfo;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class MonitorInfoTest {
+
+ @Before
+ public void setUp() throws Exception {
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ }
+
+ @Test
+ public void test0() {
+ MonitorInfo m0 = new MonitorInfo();
+ assertFalse(m0.isRegression(null));
+ }
+
+ @Test
+ public void test1() {
+ MonitorInfo m0 = new MonitorInfo();
+ MonitorInfo m1 = new MonitorInfo();
+ assertFalse(m1.isRegression(m0));
+ }
+
+ @Test
+ public void test2() {
+ MonitorInfo m0 = new MonitorInfo();
+ MonitorInfo m1 = new MonitorInfo();
+ m0.total = "0";
+ m1.total = "1";
+ assertFalse(m1.isRegression(m0));
+ m0.total = "1";
+ m1.total = "0";
+ assertTrue(m1.isRegression(m0));
+ }
+
+ @Test
+ public void test3() {
+ MonitorInfo m0 = new MonitorInfo();
+ MonitorInfo m1 = new MonitorInfo();
+ m0.done = "0";
+ m1.done = "1";
+ assertFalse(m1.isRegression(m0));
+ m0.done = "1";
+ m1.done = "0";
+ assertTrue(m1.isRegression(m0));
+ }
+
+ @Test
+ public void test4() {
+ MonitorInfo m0 = new MonitorInfo();
+ MonitorInfo m1 = new MonitorInfo();
+ m0.error = "0";
+ m1.error = "1";
+ assertFalse(m1.isRegression(m0));
+ m0.error = "1";
+ m1.error = "0";
+ assertTrue(m1.isRegression(m0));
+ }
+
+ @Test
+ public void test5() {
+ MonitorInfo m0 = new MonitorInfo();
+ MonitorInfo m1 = new MonitorInfo();
+ m0.retry = "0";
+ m1.retry = "1";
+ assertFalse(m1.isRegression(m0));
+ m0.retry = "1";
+ m1.retry = "0";
+ assertTrue(m1.isRegression(m0));
+ }
+}
Propchange: uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: uima/uima-ducc/trunk/uima-ducc-common/src/test/java/org/apache/uima/ducc/common/test/MonitorInfoTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-database.tex
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-database.tex?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-database.tex (original)
+++ uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-database.tex Tue Sep 6 21:01:26 2016
@@ -43,8 +43,8 @@
on previously run jobs. Prior to the database, this data was saved in the
{\em DUCC\_HOME/history directory}.
\item Checkpoint. On every state change, the Orchestrator saves the state of
- all running and allocated work in the system. This is used to recover reservations
- when DUCC is started, and to allow hot-start of the Orchestrator without losing work.
+ all running and allocated work in the system. This is used to recover active allocations
+ when DUCC is started enabling warm-start of the Orchestrator without losing work.
Prior to the database, this data was saved in the file {\em DUCC\_HOME/state/orchestrator.ckpt}.
\end{enumerate}
Modified: uima/uima-ducc/trunk/uima-ducc-examples/src/main/scripts/start_sim
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-examples/src/main/scripts/start_sim?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-examples/src/main/scripts/start_sim (original)
+++ uima/uima-ducc/trunk/uima-ducc-examples/src/main/scripts/start_sim Tue Sep 6 21:01:26 2016
@@ -314,7 +314,7 @@ class StartSim(DuccUtil):
try:
opts, args = getopt.getopt(argv, 'c:i:n:vh?', ['component=', 'help', 'agent', 'memory=',
'instance=', 'addr=', 'pseudoname=', 'node-config=',
- 'version', 'hot', 'warm', 'cold',
+ 'version', 'warm', 'cold',
'nothreading'])
except:
self.invalid('Invalid arguments', ' '.join(argv))
@@ -343,7 +343,7 @@ class StartSim(DuccUtil):
elif o in ( '-v', '--version' ):
print self.version()
os.exit(0)
- elif o in ( '--hot', '--warm', '--cold' ):
+ elif o in ( '--warm', '--cold' ):
or_parms = o[2:]
elif o in ( '-h', '--help' ):
self.usage(None)
Modified: uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java Tue Sep 6 21:01:26 2016
@@ -166,9 +166,6 @@ implements Orchestrator {
else if(startTypeProperty.equals("warm")) {
startType = StartType.warm;
}
- else if(startTypeProperty.equals("hot")) {
- startType = StartType.hot;
- }
else {
logger.warn(methodName, null, "ducc.orchestrator.start.type value in ducc.properties not recognized: "+property);
}
@@ -225,9 +222,6 @@ implements Orchestrator {
else if(flag.equals(StartType.warm.toString())) {
startType = StartType.warm;
}
- else if(flag.equals(StartType.hot.toString())) {
- startType = StartType.hot;
- }
else {
logger.warn(methodName, null, "unrecognized arg: "+arg);
}
@@ -292,10 +286,6 @@ implements Orchestrator {
saveState = true;
break;
case warm:
- force(job, new Rationale("system warm start"));
- saveState = true;
- break;
- case hot:
break;
}
break;
@@ -307,12 +297,6 @@ implements Orchestrator {
saveState = true;
break;
case warm:
- if(reservation.isJdReservation()) {
- cancel(reservation);
- saveState = true;
- }
- break;
- case hot:
break;
}
break;
@@ -327,10 +311,9 @@ implements Orchestrator {
}
switch(startType) {
case cold:
- case warm:
jdScheduler = JdScheduler.getInstance();
break;
- case hot:
+ case warm:
jdScheduler = JdScheduler.getInstance();
jdScheduler.restore();
break;
Modified: uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorConstants.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorConstants.java?rev=1759510&r1=1759509&r2=1759510&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorConstants.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorConstants.java Tue Sep 6 21:01:26 2016
@@ -26,7 +26,6 @@ public interface OrchestratorConstants e
public enum StartType {
cold, // Recover: All is lost JD host: employ new
- warm, // Recover: Reservations only (default) JD host: employ new
- hot , // Recover: Reservations and Jobs, JD host: employ current
+ warm, // Recover: All is recovered JD host: employ current
}
}