You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by de...@apache.org on 2018/04/23 19:07:33 UTC

svn commit: r1829921 [1/6] - in /uima/uima-ducc/trunk: ./ src/main/admin/ src/main/config/ src/main/resources/ src/main/scripts/ src/main/test/ uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/ uima-ducc-agent/src/main/java/org/apache/uima/ducc...

Author: degenaro
Date: Mon Apr 23 19:07:31 2018
New Revision: 1829921

URL: http://svn.apache.org/viewvc?rev=1829921&view=rev
Log:
UIMA-5742 Reliable DUCC - merge with branch

Added:
    uima/uima-ducc/trunk/src/main/admin/db_start
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/src/main/admin/db_start
    uima/uima-ducc/trunk/src/main/admin/db_stop
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/src/main/admin/db_stop
    uima/uima-ducc/trunk/src/main/admin/ducc_head_mode.py
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/src/main/admin/ducc_head_mode.py
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/head/
      - copied from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/head/
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/head/ADuccHead.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/head/ADuccHead.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/head/IDuccHead.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/head/IDuccHead.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/InetHelper.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/InetHelper.java
    uima/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbOrchestratorProperties.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbOrchestratorProperties.java
    uima/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/IDbOrchestratorProperties.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/IDbOrchestratorProperties.java
    uima/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/IOrchestratorProperties.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/IOrchestratorProperties.java
    uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/reliable.tex
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-duccdocs/src/site/tex/duccbook/part4/reliable.tex
    uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/reliable-ducc.tex
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-duccdocs/src/site/tex/duccbook/reliable-ducc.tex
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/DuccHead.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/DuccHead.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ckpt/
      - copied from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ckpt/
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ckpt/AOrchestratorCheckpoint.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ckpt/AOrchestratorCheckpoint.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ckpt/IOrchestratorCheckpoint.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ckpt/IOrchestratorCheckpoint.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ckpt/OrchestratorCheckpoint.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ckpt/OrchestratorCheckpoint.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ckpt/OrchestratorCheckpointDb.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ckpt/OrchestratorCheckpointDb.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ckpt/OrchestratorCheckpointFile.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ckpt/OrchestratorCheckpointFile.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/database/
      - copied from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/database/
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/database/OrDbDuccWorks.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/database/OrDbDuccWorks.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/database/OrDbOrchestratorProperties.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/database/OrDbOrchestratorProperties.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/
      - copied from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/AOrchestratorState.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/AOrchestratorState.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/DuccWorkIdFactory.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/DuccWorkIdFactory.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/IOrchestratorState.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/IOrchestratorState.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/OrchestratorState.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/OrchestratorState.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/OrchestratorStateDb.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/OrchestratorStateDb.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/OrchestratorStateDbConversion.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/OrchestratorStateDbConversion.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/OrchestratorStateFile.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/OrchestratorStateFile.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/OrchestratorStateJson.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/state/OrchestratorStateJson.java
    uima/uima-ducc/trunk/uima-ducc-pm/src/main/java/org/apache/uima/ducc/pm/DuccHead.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-pm/src/main/java/org/apache/uima/ducc/pm/DuccHead.java
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/DuccHead.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/DuccHead.java
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/persistence/
      - copied from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/persistence/
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/persistence/access/
      - copied from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/persistence/access/
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/persistence/access/IPersistenceAccess.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/persistence/access/IPersistenceAccess.java
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/persistence/access/PersistenceAccess.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/persistence/access/PersistenceAccess.java
    uima/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/DuccHead.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/DuccHead.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/DuccHead.java
      - copied unchanged from r1829908, uima/uima-ducc/branches/reliable-ducc/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/DuccHead.java
Removed:
    uima/uima-ducc/trunk/src/main/admin/move_ducc
    uima/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbVerify.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorCheckpoint.java
Modified:
    uima/uima-ducc/trunk/   (props changed)
    uima/uima-ducc/trunk/src/main/admin/check_ducc
    uima/uima-ducc/trunk/src/main/admin/ducc.py
    uima/uima-ducc/trunk/src/main/admin/ducc_post_install
    uima/uima-ducc/trunk/src/main/admin/ducc_util.py
    uima/uima-ducc/trunk/src/main/admin/start_ducc
    uima/uima-ducc/trunk/src/main/admin/stop_ducc
    uima/uima-ducc/trunk/src/main/config/cassandra-env.sh
    uima/uima-ducc/trunk/src/main/resources/default.ducc.properties
    uima/uima-ducc/trunk/src/main/resources/log4j.xml
    uima/uima-ducc/trunk/src/main/scripts/properties.py
    uima/uima-ducc/trunk/src/main/test/service.py
    uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java
    uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/config/AgentConfiguration.java
    uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/deploy/DuccWorkHelper.java
    uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/event/AgentEventListener.java
    uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsTest.java
    uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/Launcher.java
    uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/DefaultNodeInventoryProcessor.java
    uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/DefaultNodeMetricsProcessor.java
    uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/LinuxNodeMetricsProcessor.java
    uima/uima-ducc/trunk/uima-ducc-cli/src/main/java/org/apache/uima/ducc/cli/DuccUiUtilities.java
    uima/uima-ducc/trunk/uima-ducc-cli/src/test/java/org/apache/uima/ducc/cli/test/ServiceTester.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/IDuccEnv.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/NodeIdentity.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/boot/DuccDaemonRuntimeProperties.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/component/AbstractDuccComponent.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/main/DuccService.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/services/IStateServices.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/services/NullStateServices.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/persistence/services/StateServices.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/DuccPropertiesHelper.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/DuccPropertiesResolver.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/IDuccLoggerComponents.java
    uima/uima-ducc/trunk/uima-ducc-common/src/main/java/org/apache/uima/ducc/common/utils/id/DuccIdFactory.java
    uima/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbDuccWorks.java
    uima/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbLoader.java
    uima/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbManager.java
    uima/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/DbUtil.java
    uima/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/HistoryManagerDb.java
    uima/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/RmStatePersistence.java
    uima/uima-ducc/trunk/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/StateServicesDb.java
    uima/uima-ducc/trunk/uima-ducc-duccdocs/pom.xml
    uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part1/terminology.tex
    uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/admin-commands.tex
    uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/ducc-aguide.tex
    uima/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/sm.tex
    uima/uima-ducc/trunk/uima-ducc-examples/src/main/scripts/start_sim
    uima/uima-ducc/trunk/uima-ducc-examples/src/main/scripts/stop_sim
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorCommonArea.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorComponent.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorRecovery.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/OrchestratorState.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ProcessAccounting.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/ReservationFactory.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/StateJobAccounting.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/StateManager.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/WorkMapHelper.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/config/OrchestratorConfiguration.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/event/OrchestratorEventListener.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/factory/JobFactory.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/jd/scheduler/JdReservation.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/jd/scheduler/JdScheduler.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/HealthMonitor.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/MaintenanceThread.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/maintenance/NodeAccounting.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/main/java/org/apache/uima/ducc/orchestrator/system/events/log/SystemEventsLogger.java
    uima/uima-ducc/trunk/uima-ducc-orchestrator/src/test/java/org/apache/uima/ducc/orchestrator/jd/scheduler/test/TestSuite.java
    uima/uima-ducc/trunk/uima-ducc-pm/src/main/java/org/apache/uima/ducc/pm/ProcessManagerComponent.java
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/JobManagerConverter.java
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/NodeStability.java
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/ResourceManagerComponent.java
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/event/ResourceManagerEventListener.java
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Machine.java
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/NodePool.java
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Scheduler.java
    uima/uima-ducc/trunk/uima-ducc-rm/src/main/java/org/apache/uima/ducc/rm/scheduler/Share.java
    uima/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceHandler.java
    uima/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerComponent.java
    uima/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceManagerHelper.java
    uima/uima-ducc/trunk/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
    uima/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/configuration/jd/JobDriverStateExchanger.java
    uima/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/configuration/jp/DuccHttpClient.java
    uima/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/AbstractDuccEvent.java
    uima/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/DaemonDuccEvent.java
    uima/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/DuccEvent.java
    uima/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/JdEvent.java
    uima/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/common/DuccWorkReservation.java
    uima/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/common/DuccWorkUtil.java
    uima/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/rm/Resource.java
    uima/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/event/sm/IService.java
    uima/uima-ducc/trunk/uima-ducc-transport/src/main/java/org/apache/uima/ducc/transport/json/jp/JobProcessCollection.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/Distiller.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/DuccBoot.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/DuccData.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/DuccDataHelper.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/DuccMachinesData.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/MachineInfo.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/WebServerComponent.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/config/WebServerConfiguration.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/helper/BrokerHelper.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccAbstractHandler.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccHandler.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccHandlerClassic.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccHandlerJsonFormat.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccWebServer.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccWebServerHelper.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/DuccWebUtil.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/Helper.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/java/org/apache/uima/ducc/ws/server/nodeviz/NodeViz.java
    uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/$banner/c4-ducc-mon.jsp
    uima/uima-ducc/trunk/uima-ducc-web/src/main/webapp/root/js/ducc.js

Propchange: uima/uima-ducc/trunk/
------------------------------------------------------------------------------
    svn:mergeinfo = /uima/uima-ducc/branches/reliable-ducc:1825443-1829908

Modified: uima/uima-ducc/trunk/src/main/admin/check_ducc
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/check_ducc?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/check_ducc (original)
+++ uima/uima-ducc/trunk/src/main/admin/check_ducc Mon Apr 23 19:07:31 2018
@@ -99,6 +99,10 @@ class CheckDucc(DuccUtil):
 
                 signal = self.kill_signal
                 
+                if(self.is_reliable_backup()):
+                    if ( component == 'agent' ):
+                        continue
+                    
                 if ( component == 'orchestrator' ):
                     component = 'or'
                     
@@ -116,18 +120,21 @@ class CheckDucc(DuccUtil):
                     else:
                         messages.append((spacer, 'Killing (' +  signal + ')', process_id))
                         self.kill_process(node, proc, signal)
-                        self.pids.delete(pid)
+                        if ( component == 'agent' ):
+                            self.pids_agents.delete(pid)
+                        else:
+                            self.pids_daemons.delete(pid)
                         process_changes = True
 
                 else:
                     messages.append((spacer, 'Found', process_id))
                     full_name = component + '@' + node
                     if ( component == 'agent' ):
-                        self.pids.put(full_name, pid)
-
-                    if ( component in self.default_components ):
-                        self.pids.put(full_name, pid)
-                        self.pids.put(component, full_name)
+                        self.pids_agents.put(full_name, pid)
+                    else:
+                        if ( component in self.default_components ):
+                            self.pids_daemons.put(full_name, pid)
+                            self.pids_daemons.put(component, full_name)
         else:
             messages.append((spacer, 'no processes found.'))
 
@@ -172,6 +179,11 @@ class CheckDucc(DuccUtil):
         print ""
         print "      check_ducc -n ../resources/ducc.nodes"
         print ""
+        print "   For reliable DUCC agents will not be killed from backup head node. "
+        print ""
+        print "   Broker will not be killed when ducc.broker.automanage = false. "
+        print "   Database will not be killed when ducc.database.automanage = false. "
+        print ""
         print "Options:"
         print "    -n --nodelist nodefile"
         print "        Check for agents on the nodes in nodefile.  This option may be specified multiple time"
@@ -203,7 +215,7 @@ class CheckDucc(DuccUtil):
         print "       changes."
         print ""
         print "    -x localdate"
-        print "       Validate the local installation, called via ssh usually. The date is the dat on the calling machine."
+        print "       Validate the local installation, called via ssh usually. The date is the date on the calling machine."
         print ""
         print "    --nothreading"
         print "        Disable multithreaded operation if it would otherwise be used"
@@ -306,9 +318,12 @@ class CheckDucc(DuccUtil):
         self.verify_database() 
 
         # init the PID file
-        self.pids = Properties()
-        self.pids.load_if_exists(self.pid_file)
-
+        if(not self.is_reliable_backup()):
+        	self.pids_agents = Properties()
+        	self.pids_agents.load_if_exists(self.pid_file_agents)
+        self.pids_daemons = Properties()
+        self.pids_daemons.load_if_exists(self.pid_file_daemons)
+        
         # read the nodelists
         if ( len(nodefiles) == 0 ):
             nodefiles = self.default_nodefiles
@@ -348,11 +363,6 @@ class CheckDucc(DuccUtil):
             else:
                 print "NOTOK: Errors in class or node configuration."
 
-            if self.verify_head_failover_configuration():
-                print "OK: Failover configuration checked"
-            else:
-                print "NOTOK: Errors in failover configuration."
-
             return
 
         # checking starts here        
@@ -373,6 +383,11 @@ class CheckDucc(DuccUtil):
     
                     checked[node] = node
                     self.threadpool.invoke(self.check_node, node)
+            # check backup head node(s)
+            for node in self.get_head_node_list():
+                if(not node in checked):
+                    checked[node] = node
+                    self.threadpool.invoke(self.check_node, node)
         except:
             self.threadpool.quit()
             print sys.exc_info()[0], "Exiting."
@@ -381,17 +396,30 @@ class CheckDucc(DuccUtil):
         self.threadpool.quit()
 
         if ( self.kill_signal != None ):
-            print 'Stopping broker'
-            self.stop_broker()
-            print 'Stopping database'
-            self.db_stop()
-                
-        if ( len(self.pids) == 0):
-            if ( os.path.exists(self.pid_file) ):
-                os.remove(self.pid_file)
+            if(self.automanage_broker):
+                print 'Stopping broker'
+                self.stop_broker()
+            else:
+                print 'Not stopping broker - not automanaged'
+            if(self.automanage_database):
+                print 'Stopping database'
+                self.db_stop()
+            else:
+                print 'Not stopping database - not automanaged'
+            
+        if(not self.is_reliable_backup()):
+            if ( len(self.pids_agents) == 0):
+                if ( os.path.exists(self.pid_file_agents) ):
+                    os.remove(self.pid_file_agents)
+            elif (process_changes or redo_pids):
+                self.pids_agents.write(self.pid_file_agents)
+                    
+        if ( len(self.pids_daemons) == 0):
+            if ( os.path.exists(self.pid_file_daemons) ):
+                os.remove(self.pid_file_daemons)
         elif (process_changes or redo_pids):
-            self.pids.write(self.pid_file)
-    
+            self.pids_daemons.write(self.pid_file_daemons)
+            
 if __name__ == "__main__":
     checker = CheckDucc()
     checker.main(sys.argv[1:])

Modified: uima/uima-ducc/trunk/src/main/admin/ducc.py
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/ducc.py?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/ducc.py (original)
+++ uima/uima-ducc/trunk/src/main/admin/ducc.py Mon Apr 23 19:07:31 2018
@@ -71,9 +71,7 @@ class Ducc(DuccUtil):
             os.environ['LOCAL_JMX'] = 'no'
 
         os.chdir(self.DUCC_HOME + "/cassandra-server")
-        pidfile = self.DUCC_HOME + '/state/cassandra.pid'
-        logfile = self.DUCC_HOME + '/logs/cassandra.console'
-        CMD = "bin/cassandra -p " + pidfile + " > " + logfile + " 2>&1"
+        CMD = "bin/cassandra -p " + self.db_pidfile + " > " + self.db_logfile + " 2>&1"
         print '------- Running', CMD
 
         os.system(CMD);

Modified: uima/uima-ducc/trunk/src/main/admin/ducc_post_install
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/ducc_post_install?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/ducc_post_install (original)
+++ uima/uima-ducc/trunk/src/main/admin/ducc_post_install Mon Apr 23 19:07:31 2018
@@ -472,14 +472,14 @@ class PostInstall():
 
         statedir = self.DUCC_HOME + "/state"
         logdir = self.DUCC_HOME + "/logs"
-        logwsdir = self.DUCC_HOME + "/logs/webserver"
+        #logwsdir = self.DUCC_HOME + "/logs/webserver"
         historydir = self.DUCC_HOME + "/history"
         if ( not os.path.exists(statedir) ):
             os.mkdir(statedir)
         if ( not os.path.exists(logdir) ):
             os.mkdir(logdir)
-        if ( not os.path.exists(logwsdir) ):
-            os.mkdir(logwsdir)
+        #if ( not os.path.exists(logwsdir) ):
+        #    os.mkdir(logwsdir)
         if ( not os.path.exists(historydir) ):
             os.mkdir(historydir)
 

Modified: uima/uima-ducc/trunk/src/main/admin/ducc_util.py
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/ducc_util.py?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/ducc_util.py (original)
+++ uima/uima-ducc/trunk/src/main/admin/ducc_util.py Mon Apr 23 19:07:31 2018
@@ -65,6 +65,12 @@ import db_util as dbu
 global use_threading
 use_threading = True
 
+ducc_util_debug_flag = False
+
+def debug(label,data):
+    if(ducc_util_debug_flag):
+        print label, data
+
 # The "ducc" userid is the user that installed DUCC and created this file.
 # If the admin dir's permissions were 700 then could assume the current user is the ducc user
 def find_ducc_uid():
@@ -138,6 +144,13 @@ class ThreadPool:
 
 class DuccUtil(DuccBase):
 
+    def makedirs(self,dir):
+        try:
+            os.makedirs(dir)
+            print 'make: '+dir
+        except:
+            pass
+        
     def update_properties(self):
 
         if ( self.ducc_properties == None ):
@@ -149,7 +162,7 @@ class DuccUtil(DuccBase):
         self.ducc_uid          = find_ducc_uid()
         # self.broker_url      = self.ducc_properties.get('ducc.broker.url')
         self.broker_protocol   = self.ducc_properties.get('ducc.broker.protocol')
-        self.broker_host       = self.ducc_properties.get('ducc.broker.hostname')
+        self.broker_host       = self.localhost
         self.broker_port       = self.ducc_properties.get('ducc.broker.port')
         self.broker_jmx_port   = self.ducc_properties.get('ducc.broker.jmx.port')
         self.broker_decoration = self.ducc_properties.get('ducc.broker.url.decoration')
@@ -204,12 +217,10 @@ class DuccUtil(DuccBase):
                 
     # does the database process exist?  
     def db_process_alive(self):
-        pidfile = self.DUCC_HOME + '/state/cassandra.pid'
-
-        if ( not os.path.exists(pidfile) ):
+        if ( not os.path.exists(self.db_pidfile) ):
             return False
 
-        f = open(self.DUCC_HOME + '/state/cassandra.pid')
+        f = open(self.db_pidfile)
         pid = f.read();
         f.close()
         answer = []
@@ -239,8 +250,7 @@ class DuccUtil(DuccBase):
                 print 'No database location defined.'
             return False
 
-        pidfile = self.DUCC_HOME + '/state/cassandra.pid'
-        if ( not os.path.exists(pidfile) ):
+        if ( not os.path.exists(self.db_pidfile) ):
             if(verbose):
                 print 'Database pid file does not exist.  Checking DB connectivity.'
 
@@ -306,17 +316,37 @@ class DuccUtil(DuccBase):
         return False
 
     def db_stop(self):
-
-        if ( self.db_bypass == True) :
-            print '   (Bypass database stop because ducc.database.host =', self.db_disabled + ')'
-            return True
-
-        pidfile = self.DUCC_HOME + '/state/cassandra.pid'
-        if ( os.path.exists(pidfile) ):
-            # for cassandra, just send it a terminate signal.  a pidfile is written on startup
-            CMD = ['kill', '-TERM', '`cat ' + pidfile + '`']
-            CMD = ' '.join(CMD)
-            os.system(CMD)
+        try:
+            if ( self.db_bypass == True) :
+                print '   (Bypass database stop because ducc.database.host =', self.db_disabled + ')'
+                return True
+            dbnode = self.ducc_properties.get('ducc.database.host')
+            dbnode = dbnode.strip()
+            pidfile = os.path.join(DUCC_HOME,'state','database',dbnode,'cassandra.pid')
+            cmd = [ 'less', '-FX', pidfile ]
+            cmd = ' '.join(cmd)
+            #print cmd
+            stdout = self.ssh(dbnode, True, cmd)
+            result = stdout.read().strip()
+            tokens = result.split()
+            if(len(tokens) == 1):
+                pid = tokens[0].strip()
+                #print pid
+                cmd = [ 'kill', '-15', pid ]
+                cmd = ' '.join(cmd)
+                #print cmd
+                stdout = self.ssh(dbnode, True, cmd)
+                result = stdout.read().strip()
+                #print result
+                print 'Database stopped.'
+                return True
+            else:
+                #print result
+                print 'Database not running.'
+                return True
+        except Exception,e:
+            print e
+            return False
 
     def find_netstat(self):
         # don't you wish people would get together on where stuff lives?
@@ -354,7 +384,7 @@ class DuccUtil(DuccBase):
 
     def stop_broker(self):
 
-        broker_host = self.ducc_properties.get('ducc.broker.hostname')
+        broker_host = self.localhost
         broker_home = self.ducc_properties.get('ducc.broker.home')
         broker_name = self.ducc_properties.get('ducc.broker.name')
         broker_jmx  = self.ducc_properties.get('ducc.broker.jmx.port')
@@ -384,10 +414,23 @@ class DuccUtil(DuccBase):
         if ( showpid ) :
             print 'PID', ducc.pid
 
+    def get_hostname(self):
+        hostname = '?'
+        cmd = '/bin/hostname'
+        resp = self.popen(cmd)
+        lines = resp.readlines()
+        if(len(lines)== 1):
+            line = lines[0]
+            line = line.strip();
+            hostname = line.split('.')[0]
+        return hostname
+    
     def ssh_operational(self, node):
         is_operational = False
         req = node.split('.')[0]
         cmd = '/bin/hostname'
+        if(node == 'localhost'):
+            req = self.get_hostname()
         ssh_cmd = 'ssh -q -o BatchMode=yes -o ConnectTimeout=10'+' '+node+" "+cmd
         resp = self.popen(ssh_cmd)
         lines = resp.readlines()
@@ -528,40 +571,127 @@ class DuccUtil(DuccBase):
             print 'NOTOK', CMD, 'returns', int(rc), '.  Must return rc 0.  Startup cannot continue.'
             return False
         return True
-
-    # inspect ducc.head.failover
-    def verify_head_failover(self, head):
-        key = "ducc.head.failover"
-        failover = self.ducc_properties.get(key)
-        # check for no failover
-        if(failover == None):
-            logger.debug(key+" not specified")
-        else:
-            # insure ducc.head listed in ducc.head.failover
-            if(not head in failover):
-                text = head+" not found in "+key
-                logger.error(text)
+    
+    # determine if string represent an integer
+    def is_int(self,string):
+        result = True
+        try:
+            number = int(string)
+        except:
+            result = False
+        return result
+    
+    # transform hostname into ip address
+    def get_ip_address(self,hostname):
+        label = 'get_ip_address'
+        result = None
+        try:
+            p = subprocess.Popen(['/usr/bin/nslookup', hostname], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            output, err = p.communicate()
+            #print hostname, output, err
+            name = None
+            for line in output.splitlines():
+                tokens = line.split()
+                if(len(tokens) == 2):
+                    t0 = tokens[0]
+                    t1 = tokens[1]
+                    if(t0 == 'Address:'):
+                        if(name != None):
+                            result = t1
+                            break
+                    elif(t0 == 'Name:'):
+                        name = t1
+        except Exception as e:
+            print e
+        debug(label, str(result))
+        return result
+    
+    # get ducc.head.reliable.list
+    def get_head_node_list(self):
+        head_node_list = []
+        # add ducc.head.reliable.list node(s)
+        ducc_head_list = self.ducc_properties.get("ducc.head.reliable.list")
+        if(ducc_head_list != None):
+            ducc_head_nodes = ducc_head_list.split()
+            if(len(ducc_head_nodes)== 0):
+                pass
+            elif(len(ducc_head_nodes)== 1):
+                print '>>> ERROR - "ducc.head.reliable.list" missing or invalid.'
                 sys.exit(1);
-            # test viability fo failover nodes
-            nodes = failover.replace(',',' ').split()
-            for node in nodes:
-                if(self.ssh_operational(node)):
-                    text = "ssh is operational to "+node
-                    logger.debug(text)
-                else:
-                    text = "ssh to specified failover node unsuccessful or otherwise problematic: "+node
-                    logger.warn(text)
+            else:
+                head_node_list = ducc_head_nodes
+        return head_node_list
+    
+    # get all possible hostnames & ip addresses for a head node
+    def get_head_node_list_variations(self):
+        # start with ducc.head.reliable.list node(s)
+        head_node_list = self.get_head_node_list()
+        # add ducc.head node
+        ducc_head = self.ducc_properties.get("ducc.head")
+        if(ducc_head == None):
+            print '>>> ERROR - "ducc.head" missing or invalid.'
+            sys.exit(1);
+        ducc_head_nodes = ducc_head.split()
+        if(len(ducc_head_nodes) != 1):
+            print '>>> ERROR - "ducc.head" missing or invalid.'
+            sys.exit(1);
+        head_node = ducc_head_nodes[0]
+        if(not head_node in head_node_list):
+            head_node_list.append(head_node)
+        # add short names
+        list = head_node_list
+        for node in list:
+            short_name = node.split('.')[0]
+            if(not self.is_int(short_name)):
+                if(not short_name in head_node_list):
+                    head_node_list.append(short_name)
+        # add ip addresses
+        list = head_node_list
+        for node in list:
+            ip = self.get_ip_address(node)
+            if(ip != None):
+                if(not ip in head_node_list):
+                    head_node_list.append(ip)
+        #
+        debug('head_node_list: ', head_node_list)
+        return head_node_list
+    
+    # drop domain and whitespace
+    def normalize(self,name):
+        result = name
+        if(name != None):
+            result = name
+            result = result.strip()
+            result = result.split('.')[0]
+        return result
+    
+    # get current host's name
+    def get_node_name(self):
+        node_name = 'unknown'
+        cmd = '/bin/hostname'
+        resp = self.popen(cmd)
+        lines = resp.readlines()
+        if(len(lines)== 1):
+            name = lines[0]
+            node_name = self.normalize(name)
+        debug('node_name: ', node_name)
+        return node_name
     
     # Exit if this is not the head node.  Ignore the domain as uname sometimes drops it.
     # Also check that ssh to this node works
     # Also restrict operations to the userid that installed ducc
     def verify_head(self):
-        head = self.ducc_properties.get("ducc.head").split('.')[0]
-        local = self.localhost.split('.')[0]
-        if local != head:
-            print ">>> ERROR - this script must be run from the head node"
-            sys.exit(1);
-        node = head
+        head_node_list = self.get_head_node_list_variations()
+        node = self.get_node_name()
+        if(node in head_node_list):
+            pass
+        else:
+            ip = self.get_ip_address(node)
+            if(ip in head_node_list):
+                pass
+            else:
+                print ">>> ERROR - "+node+" not configured as head node."
+                sys.exit(1);
         if(self.ssh_operational(node)):
             text = "ssh is operational to "+node
             #print text
@@ -573,8 +703,6 @@ class DuccUtil(DuccBase):
         if dir_stat.st_uid != os.getuid():
             print ">>> ERROR - this script must be run by the userid that installed DUCC"
             sys.exit(1);
-        self.verify_head_failover(head)
-
 
     #
     # Verify the viability of ducc_ling.
@@ -1060,33 +1188,6 @@ class DuccUtil(DuccBase):
         #print 'result: '+result
         #print 'status: '+str(status)
         return result
-    
-    def verify_head_failover_configuration(self):
-        rc = 0
-        failover_nodes = self.ducc_properties.get('ducc.head.failover')
-        message = "OK: Head node failover not configured."
-        if(failover_nodes != None):
-            failover_nodes = failover_nodes.strip()
-            if(len(failover_nodes) >= 0):
-                nodes = failover_nodes.split()
-                head_node = self.ducc_properties.get('ducc.head')
-                head_pool = self.get_nodepool(head_node,'<None>')
-                for node in nodes:
-                    node_pool = self.get_nodepool(node,'<None>')
-                    #print 'head:'+head_pool+' '+'node:'+node_pool
-                    if( head_pool != node_pool):
-                        if(rc == 0):
-                            message = 'OK: Head failover node '+head_node+' in node pool '+head_pool
-                            print message
-                        message = 'NOTOK: Head failover node '+node+' in node pool '+node_pool
-                        print message
-                        rc = 1
-                if (rc > 0):
-                    message = "NOTOK: Head failover nodepools incorrectly configured."
-                else:
-                    message = "OK: Head failover nodepools correctly configured."
-        print message
-        return (rc == 0)
         
     def disable_threading(self):
         global use_threading
@@ -1098,6 +1199,46 @@ class DuccUtil(DuccBase):
             return False
         return True
 
+    keepalivd_conf = '/etc/keepalived/keepalived.conf'
+
+    # eligible when keepalived config comprises the ip
+    def is_reliable_eligible(self, ip):
+        retVal = False
+        if ( os.path.exists(self.keepalivd_conf) ):
+            with open(self.keepalivd_conf) as f:
+                for line in f:
+                    if ip in line:
+                        retVal = True
+                        break
+        return retVal
+        
+    # master when current node keepalived answers for head node ip
+    # backup when current node keepalived does not answer for head ip, but is capable in config
+    # unspecified otherwise
+    def get_reliable_state(self):
+        label = 'get_reliable_state'
+        result = 'unspecified'
+        try:
+            ducc_head = self.ducc_properties.get('ducc.head')
+            head_ip = self.get_ip_address(ducc_head)
+            if(self.is_reliable_eligible(head_ip)):
+                text = 'cmd: ', '/sbin/ip', 'addr', 'list'
+                debug(label, text)
+                p = subprocess.Popen(['/sbin/ip', 'addr', 'list'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                output, err = p.communicate()
+                text = "output: "+output
+                debug(label, text)
+                if(head_ip in output):
+                    result = 'master'
+                else:
+                    result = 'backup'
+        except Exception as e:
+            print e
+        return result
+    
+    def is_reliable_backup(self):
+        return self.get_reliable_state() == 'backup'
+    
     def __init__(self, merge=False):
         global use_threading
         DuccBase.__init__(self, merge)
@@ -1109,25 +1250,45 @@ class DuccUtil(DuccBase):
         self.broker_host = 'localhost'
         self.broker_port = '61616'
         self.default_components = ['rm', 'pm', 'sm', 'or', 'ws', 'db', 'broker']
+        self.local_components = ['rm', 'pm', 'sm', 'or', 'ws', 'broker']
         self.default_nodefiles = [self.DUCC_HOME + '/resources/ducc.nodes']
 
         if ( self.localhost == self.ducc_properties.get("ducc.head")):
             self.is_ducc_head = True
 
-        os.environ['DUCC_NODENAME'] = self.localhost    # to match java code's implicit propery so script and java match
+        os.environ['DUCC_NODENAME'] = self.localhost    # to match java code's implicit property so script and java match
+
+        dbhost = self.ducc_properties.get('ducc.database.host')
+        if ( dbhost == None ):
+            dbhost = self.ducc_properties.get('ducc.head')
+        if ( dbhost == None ):
+            dbhost = 'localhost'
+
+        dir_db_state = self.DUCC_HOME + '/state/database/'+dbhost
+        self.makedirs(dir_db_state)
+        dir_db_logs = self.DUCC_HOME + '/logs/database/'+dbhost
+        self.makedirs(dir_db_logs)
 
-        self.pid_file  = self.DUCC_HOME + '/state/ducc.pids'
+        self.db_pidfile = dir_db_state+ '/cassandra.pid'
+        self.db_logfile = dir_db_logs + '/cassandra.console'
+        
+        self.pid_file_agents  = self.DUCC_HOME + '/state/agents/ducc.pids'
+        self.pid_file_daemons  = self.DUCC_HOME + '/state/daemons/'+self.get_node_name()+'/ducc.pids'
         self.set_classpath()
         self.os_pagesize = self.get_os_pagesize()
         self.update_properties()
 
         self.db_configure()
         
-
         manage_broker = self.ducc_properties.get('ducc.broker.automanage')
-        self.automanage = False
+        self.automanage_broker = False
         if (manage_broker in ('t', 'true', 'T', 'True')) :
-            self.automanage = True                    
+            self.automanage_broker = True                    
+
+        manage_database = self.ducc_properties.get('ducc.database.automanage')
+        self.automanage_database = False
+        if (manage_database in ('t', 'true', 'T', 'True')) :
+            self.automanage_database = True     
 
         py_version = platform.python_version().split('.')
         if ( int(py_version[0]) > 2 ):

Modified: uima/uima-ducc/trunk/src/main/admin/start_ducc
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/start_ducc?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/start_ducc (original)
+++ uima/uima-ducc/trunk/src/main/admin/start_ducc Mon Apr 23 19:07:31 2018
@@ -41,7 +41,7 @@ class StartDucc(DuccUtil):
 
     def start_broker(self):
         
-        broker_host = self.ducc_properties.get('ducc.broker.hostname')
+        broker_host = self.localhost
         print 'Starting broker on', broker_host
         lines = self.ssh(broker_host, True, "'", self.DUCC_HOME + '/admin/ducc.py', '-c', 'broker', "'")
         while 1:
@@ -52,7 +52,7 @@ class StartDucc(DuccUtil):
             if ( line.startswith('PID') ):
                 toks = line.split(' ')    # get the PID
                 print "Broker on", broker_host, 'PID', toks[1]
-                self.pids.put('broker@' + broker_host, toks[1])
+                self.pids_daemons.put('broker@' + broker_host, toks[1])
                 lines.close()
                 break
 
@@ -68,14 +68,13 @@ class StartDucc(DuccUtil):
         msgs = []
 
         node = self.ducc_properties.get('ducc.head')
+        
         com = component
         if ( com.find('@') >= 0 ):            
             com, node = com.split('@')
-
-        if ( ( com == 'ws' ) and ( node == 'local' ) and ( self.webserver_node != 'localhost' )):
-            if ( self.webserver_node != None ):
-                node = self.webserver_node
-                component = com + '@' + node
+            
+        if (com in self.local_components):
+			node = self.localhost
 
         if ((com in self.default_components) or ( com == 'agent')) :
             msgs.append((node, 'Starting', com))
@@ -104,7 +103,7 @@ class StartDucc(DuccUtil):
             if ( line.startswith('PID') ):
                 toks = line.split(' ')    # get the PID
                 msgs.append(('     PID', toks[1]))
-                self.pids.put(com + '@' + node, toks[1])
+                self.pids_daemons.put(com + '@' + node, toks[1])
                 lines.close()
                 break
             if ( line.startswith('WARN') ):
@@ -112,7 +111,7 @@ class StartDucc(DuccUtil):
             
         if ( com in self.default_components ):           # tracks where the management processes are
             self.pidlock.acquire()
-            self.pids.put(com, com + '@' + node)
+            self.pids_daemons.put(com, com + '@' + node)
             self.pidlock.release()
 
         return msgs
@@ -132,7 +131,7 @@ class StartDucc(DuccUtil):
                 toks = line.split(' ')
                 pid = toks[1]
                 self.pidlock.acquire()
-                self.pids.put('agent@' + host, pid)
+                self.pids_agents.put('agent@' + host, pid)
                 self.pidlock.release()
 
                 lines.close()
@@ -173,6 +172,11 @@ class StartDucc(DuccUtil):
         print "        If no options are given, all DUCC processes are started, using the default"
         print "        nodelist, DUCC_HOME/resources/ducc.nodes. "
         print ""
+        print "        For reliable DUCC agents will not be started from backup head node. "
+        print ""
+        print "        Broker will not be started when ducc.broker.automanage = false. "
+        print "        Database will not be started when ducc.database.automanage = false. "
+        print ""
         print "Options:"
         print "   -n --nodelist nodefile"
         print "        Start agents on the nodes in the nodefile.  Multiple nodefiles may be specified:"
@@ -243,8 +247,11 @@ class StartDucc(DuccUtil):
         nodefiles = []
         components = []
         or_parms = self.ducc_properties.get('ducc.orchestrator.start.type')
-        self.pids = Properties()
-        self.pids.load_if_exists(self.pid_file)
+        if(not self.is_reliable_backup()):
+            self.pids_agents = Properties()
+            self.pids_agents.load_if_exists(self.pid_file_agents)
+        self.pids_daemons = Properties()
+        self.pids_daemons.load_if_exists(self.pid_file_daemons)
         
         try:
             opts, args = getopt.getopt(argv, 'c:mn:sh?v', ['component=', 'help', 'nodelist=', 'cold', 'warm', 'nothreading'])
@@ -328,7 +335,7 @@ class StartDucc(DuccUtil):
             sys.exit(1)
         
         # activeMQ needs to be started externally before starting any DUCC processes
-        if ( self.automanage and ('broker' in components) ):
+        if ( self.automanage_broker and ('broker' in components) ):
             if ( self.is_amq_active() ):
                 print 'ActiveMQ broker is already running on host and port:', self.broker_host + ':' + self.broker_port, 'NOT restarting'
             else:
@@ -338,7 +345,7 @@ class StartDucc(DuccUtil):
                     print sys.exc_info()[0], "DUCC may not be started correctly."
                     sys.exit(1)
 
-        if ( 'db' in components ):
+        if ( self.automanage_database and ('db' in components) ):
             try:
                 if ( not self.db_start() ):
                     print "Failed to start or connect to the database."
@@ -355,8 +362,7 @@ class StartDucc(DuccUtil):
             sys.exit(1)
 
         ducc = Ducc()
-
-        print "Starting", n_nodes, "agents"        
+    
         self.threadpool = ThreadPool(n_nodes + 5)      # a few more for the head processes
         self.pidlock = threading.Lock()
         
@@ -373,16 +379,20 @@ class StartDucc(DuccUtil):
                         sys.exit(1)
                     # give 'or' a small head start
                     time.sleep(2)
-                    
-        for (nodefile, nodelist) in nodes.items():
-            print '********** Starting agents from file', nodefile
-            try:
-                for node in nodelist:
-                    self.threadpool.invoke(self.start_one_agent, node)
-            except:
-                self.threadpool.quit()
-                print sys.exc_info()[0], "DUCC may not be started correctly."
-                sys.exit(1)
+        
+        if(self.is_reliable_backup()):
+            print '********** "backup" head node -> not starting agents'
+        else:
+            print "Starting", n_nodes, "agents"    
+            for (nodefile, nodelist) in nodes.items():
+                print '********** Starting agents from file', nodefile
+                try:
+                    for node in nodelist:
+                        self.threadpool.invoke(self.start_one_agent, node)
+                except:
+                    self.threadpool.quit()
+                    print sys.exc_info()[0], "DUCC may not be started correctly."
+                    sys.exit(1)
                     
         if ( len(components) != 0 ):
             print 'Starting', or_parms
@@ -401,8 +411,11 @@ class StartDucc(DuccUtil):
 
         self.threadpool.quit()
 
-        if ( len(self.pids) > 0 ):
-            self.pids.write(self.pid_file)
+        if(not self.is_reliable_backup()):
+            if ( len(self.pids_agents) > 0 ):
+                self.pids_agents.write(self.pid_file_agents)
+        if ( len(self.pids_daemons) > 0 ):
+            self.pids_daemons.write(self.pid_file_daemons)
         return
 
 if __name__ == "__main__":

Modified: uima/uima-ducc/trunk/src/main/admin/stop_ducc
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/admin/stop_ducc?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/admin/stop_ducc (original)
+++ uima/uima-ducc/trunk/src/main/admin/stop_ducc Mon Apr 23 19:07:31 2018
@@ -33,12 +33,11 @@ class StopDucc(DuccUtil):
 
     def stop_component(self, component, force):
 
-        if ( (component == 'broker') and self.automanage ):
+        if ( (component == 'broker') and self.automanage_broker ):
             print 'Stopping broker'
             self.stop_broker()
             return
-
-        if ( component == 'db' ):
+        if ( (component == 'db') and self.automanage_database ):
             print 'Stopping database'
             self.db_stop()
             return
@@ -47,11 +46,18 @@ class StopDucc(DuccUtil):
         # If it's an unqualified management component, we need to get it's qualified name
         #
         if ( component in self.default_components ):
-            if ( self.pids.has_key(component) ):
-                component = self.pids.get(component)
+            if( component == 'agent' ):
+                if ( self.pids_agents.has_key(component) ):
+                    component = self.pids_agents.get(component)
+                else:
+                    print 'Skipping', component, 'not in pids file.'
+                    return
             else:
-                print 'Skipping', component, 'not in pids file.'
-                return
+                if ( self.pids_daemons.has_key(component) ):
+                    component = self.pids_daemons.get(component)
+                else:
+                    print 'Skipping', component, 'not in pids file.'
+                    return
 
         #
         # If the name is not qualified we've got a problem, everything in the pids file is qualified
@@ -64,11 +70,19 @@ class StopDucc(DuccUtil):
         #
         # If despite all that we can't find the pid, we need to run check_ducc
         #        
-        if ( not self.pids.has_key(component) ):
-            print "Cannot find PID for component", component, ". Run check_ducc -p to refresh PIDS and then rerun stop_ducc."
-            return
-
-        pid = self.pids.get(component)
+        if( com == 'agent' ):
+            if ( not self.pids_agents.has_key(component) ):
+                print "Cannot find PID for component", component, ". Run check_ducc -p to refresh PIDS and then rerun stop_ducc."
+                return
+            else:
+                pid = self.pids_agents.get(component)
+        else:
+            if ( not self.pids_daemons.has_key(component) ):
+                print "Cannot find PID for component", component, ". Run check_ducc -p to refresh PIDS and then rerun stop_ducc."
+                return
+            else:
+                pid = self.pids_daemons.get(component)
+        
     
         if ( force ):
             print 'Stopping component', com, 'on node', target_node, 'with PID', pid, 'forcibly (kill -9)'
@@ -80,9 +94,12 @@ class StopDucc(DuccUtil):
             self.nohup(['ssh', target_node, 'kill', '-INT', pid], False)
 
         # clear the short name if it exists, and the long name
-        self.pids.delete(com)
-        self.pids.delete(component)
-
+        if( com == 'agent' ):
+            self.pids_agents.delete(com)
+            self.pids_agents.delete(component)
+        else:
+            self.pids_daemons.delete(com)
+            self.pids_daemons.delete(component)
 
     def quiesce_agents(self, components, nodes):
         allnodes = []
@@ -106,7 +123,7 @@ class StopDucc(DuccUtil):
         # NOTE: quiesce does not actually cause agents to terminate so we don't update the PIDs file
         return
 
-    def stop_agents(self, node, force):
+    def stop_agent(self, node, force):
         self.stop_component('agent@' + node.strip(), force)
     
     def usage(self, msg):
@@ -116,6 +133,11 @@ class StopDucc(DuccUtil):
         print 'stop_ducc [options]'
         print '    If no options are given, this help screen is shown.'
         print ''
+        print '    For reliable DUCC agents will not be stopped from backup head node. '
+        print ''
+        print '    Broker will not be stopped when ducc.broker.automanage = false. '
+        print '    Database will not be stopped when ducc.database.automanage = false. '
+        print ''
         print 'Options:'
         print '   -a --all'
         print '        Stop all the DUCC processes, including agents and management processes.'
@@ -253,34 +275,38 @@ class StopDucc(DuccUtil):
                 print "Waiting " + str(wait_time) + " seconds to broadcast agent shutdown."
                 time.sleep(wait_time)
 
-                if ( self.automanage ):
+                if ( self.automanage_broker ):
                     print "Stopping broker"
                     self.stop_broker()                
 
-                print "Stopping database"
-                self.db_stop()
-
-                if ( os.path.exists(self.pid_file) ):
-                    os.remove(self.pid_file)
+                if ( self.automanage_database ):
+                	print "Stopping database"
+                	self.db_stop()
+
+                if ( os.path.exists(self.pid_file_agents) ):
+                    os.remove(self.pid_file_agents)
+                if ( os.path.exists(self.pid_file_daemons) ):
+                    os.remove(self.pid_file_daemons)
                 return
             else:
                 if ( len(nodefiles) == 0 ):
                     nodefiles = self.default_nodefiles
 
 
-        self.pids = Properties()
+        self.pids_agents = Properties()
+        self.pids_daemons = Properties()
         sc = set(components)
         sb = set(['broker', 'db'])
         read_pids = True
         if ( sc.issubset(sb) ):
             read_pids = False
-    
 
         # The broker and db do not set the pid file
         if ( read_pids ):
             try:
-                self.pids.load(self.pid_file)
-                pass
+                if(not self.is_reliable_backup()):
+                    self.pids_agents.load(self.pid_file_agents)
+                self.pids_daemons.load(self.pid_file_daemons)
             except PropertiesException, (inst):
                 print inst.msg
                 print ''
@@ -309,11 +335,17 @@ class StopDucc(DuccUtil):
             sys.exit(1)
 
         if ( quiesce ):
-            self.quiesce_agents(components, nodes)
+            if(self.is_reliable_backup()):
+                print '********** "backup" head node -> not quiescing agents'
+            else:
+                self.quiesce_agents(components, nodes)
         else:
-            for (nf, nl) in nodes.items():
-                for n in nl:
-                    self.stop_agents(n, force)       
+            if(self.is_reliable_backup()):
+                print '********** "backup" head node -> not stopping agents'
+            else:
+                for (nf, nl) in nodes.items():
+                    for n in nl:
+                        self.stop_agent(n, force)       
             host = self.localhost.split('.')[0]
             for c in components:
                 c = c.strip()
@@ -333,10 +365,15 @@ class StopDucc(DuccUtil):
                     self.stop_component(c, force)         
 
         if ( read_pids ):
-            if ( len(self.pids) > 0 ):
-                self.pids.write(self.pid_file)
+            if(not self.is_reliable_backup()):
+                if ( len(self.pids_agents) > 0 ):
+                    self.pids_agents.write(self.pid_file_agents)
+                else:
+                    os.remove(self.pid_file_agents)
+            if ( len(self.pids_daemons) > 0 ):
+                self.pids_daemons.write(self.pid_file_daemons)
             else:
-                os.remove(self.pid_file)
+                os.remove(self.pid_file_daemons)
 
         return
 

Modified: uima/uima-ducc/trunk/src/main/config/cassandra-env.sh
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/config/cassandra-env.sh?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/config/cassandra-env.sh (original)
+++ uima/uima-ducc/trunk/src/main/config/cassandra-env.sh Mon Apr 23 19:07:31 2018
@@ -300,7 +300,7 @@ JVM_OPTS="$JVM_OPTS -Djava.net.preferIPv
 if [ "x$LOCAL_JMX" = "x" ]; then
     LOCAL_JMX=yes
 fi
-
+LOCAL_JMX=no
 if [ "$LOCAL_JMX" = "yes" ]; then
   JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT -XX:+DisableExplicitGC"
 else

Modified: uima/uima-ducc/trunk/src/main/resources/default.ducc.properties
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/resources/default.ducc.properties?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/resources/default.ducc.properties (original)
+++ uima/uima-ducc/trunk/src/main/resources/default.ducc.properties Mon Apr 23 19:07:31 2018
@@ -47,14 +47,17 @@
 # Resource Manager, Process Manager, Service Manager).  This property is required and MUST be
 # configured in new installation.  The installation script ducc_post_install initializes this 
 # property to the node the script is executed on.
+# Reliable DUCC: if running reliably, then this value must resolve to the same ip address
+# specified for the virtual_ipaddress in /etc/keepalived/keepalived.conf for master and
+# backup nodes.  DUCC CLI and Agents employ this value to connect to the current reliable 
+# DUCC head node.
 ducc.head = <head-node>
 
-# This property declares the nodes which may become the ducc.head.
-# This property is optional, but if specified then the node specified as ducc.head must also
-# appear in this comma separated failover list.
-# The failover list allows the agents (at boot time only) to configure themselves to employ
-# a broker on any of the listed nodes.
-ducc.head.failover = ${ducc.head}
+# Reliable DUCC: if running reliably, then this value must comprise the blank delimited list
+# of nodes that are eligible to become the DUCC head node.  Admin commands start_ducc and 
+# stop_ducc are only allowed on the ducc.head node or any node in the ducc.head.reliable.list.
+# An empty ducc.head.reliable.list indicates that DUCC is not running in reliably.
+ducc.head.reliable.list = 
 
 # The full name of the Java command.
 # This specifies the full path to the JVM to be used by the DUCC processes. This MUST be
@@ -91,11 +94,6 @@ ducc.jms.provider=activemq
 
 #Declare the wire protocol used to communicate with ActiveMQ.
 ducc.broker.protocol=tcp
-
-# This declares the node where the ActiveMQ broker resides. It MUST be updated to 
-# the actual node where the broker is running as part of DUCC installation. 
-# The default value will not work.          
-ducc.broker.hostname=${ducc.head}
  
 # This declares the port on which the ActiveMQ broker is listening for messages.        
 ducc.broker.port=61617
@@ -104,7 +102,6 @@ ducc.broker.port=61617
 ducc.broker.url.decoration=jms.useCompression=true&jms.prefetchPolicy.all=0
 
 # The Broker's name must match the actual broker name in the broker config.  
-# THIS IS NOT THE BROKER HOSTNAME WHICH IS CONFIGURED IN ducc.broker.hostname.
 # This is the internal name of the broker, used to locate Broker's MBean in 
 # JMX Registry. It is NOT related to any node name. When using the ActiveMQ 
 # distribution supplied with DUCC it should always be set to localhost.  
@@ -555,8 +552,7 @@ ducc.sm.meta.ping.timeout =  15000
 # *****
 ducc.sm.http.port=${ducc.orchestrator.http.port}
 
-# This is the node where the Service Manager runs. It MUST be configured as part of DUCC 
-# setup. The ducc_post_install procedures initialize this to ${ducc.head}
+# This is the node where the Service Manager runs. It MUST be ${ducc.head}.
 ducc.sm.http.node=${ducc.head}
 
 # This is the length of time, in milliseconds, that the SM allows a service to remain alive after 
@@ -606,9 +602,7 @@ ducc.orchestrator.maintenance.rate=60000
 # This is the HTTP port used by the Orchestrator to field requests from the CLI / API. 
 ducc.orchestrator.http.port=19988
 
-# Node where OR is running. This is needed by CLI to compose a URL to access OR jetty server.
-# This is the node where the Orchestrator runs. It MUST be configured as part of DUCC 
-# setup. The ducc_post_install procedures initialize this to ${ducc.head}
+# Node where Orchestrator is running. It MUST be ${ducc.head}.
 ducc.orchestrator.http.node=${ducc.head}
 
 #------------------------------------------------------------------------------
@@ -1039,6 +1033,10 @@ ducc.database.jmx.host = localhost
 # Database JMX port.
 ducc.database.jmx.port = 7199
 
+# If set to true, DUCC will start and stop the Cassandra database as part of its normal
+# start/stop scripting.  
+ducc.database.automanage = true
+
 #-----------------------------------
 # DATABASE MEMORY TUNING
 #    WARNING: if you're not sure what these do you should probably not change them as incorrect

Modified: uima/uima-ducc/trunk/src/main/resources/log4j.xml
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/resources/log4j.xml?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/resources/log4j.xml (original)
+++ uima/uima-ducc/trunk/src/main/resources/log4j.xml Mon Apr 23 19:07:31 2018
@@ -59,7 +59,7 @@
   
   <appender name="system-events-log" class="org.apache.uima.ducc.common.utils.DeferredOpenRollingAppender"> 
     <param name="append" value="true"/>
-    <param name="file" value="${DUCC_HOME}/logs/system-events.log"/>
+    <param name="file" value="${DUCC_HOME}/logs/daemons/${DUCC_NODENAME}/system-events.log"/>
     <param name="maxBackupIndex" value="5" />
     <param name="maxFileSize" value="10MB" />
     <layout class="org.apache.log4j.PatternLayout"> 
@@ -69,7 +69,7 @@
   
   <appender name="rmlog" class="org.apache.uima.ducc.common.utils.DeferredOpenRollingAppender"> 
     <param name="append" value="true"/>
-    <param name="file" value="${DUCC_HOME}/logs/rm.log"/>
+    <param name="file" value="${DUCC_HOME}/logs/daemons/${DUCC_NODENAME}/rm.log"/>
     <param name="maxBackupIndex" value="5" />
     <param name="maxFileSize" value="10MB" />
     <layout class="org.apache.log4j.PatternLayout"> 
@@ -79,7 +79,7 @@
   
   <appender name="orlog" class="org.apache.uima.ducc.common.utils.DeferredOpenRollingAppender"> 
     <param name="append" value="true"/>
-    <param name="file" value="${DUCC_HOME}/logs/or.log"/>
+    <param name="file" value="${DUCC_HOME}/logs/daemons/${DUCC_NODENAME}/or.log"/>
     <param name="maxBackupIndex" value="5" />
     <param name="maxFileSize" value="10MB" />
     <layout class="org.apache.log4j.PatternLayout"> 
@@ -89,7 +89,7 @@
   
   <appender name="smlog" class="org.apache.uima.ducc.common.utils.DeferredOpenRollingAppender"> 
     <param name="append" value="true"/>
-    <param name="file" value="${DUCC_HOME}/logs/sm.log"/>
+    <param name="file" value="${DUCC_HOME}/logs/daemons/${DUCC_NODENAME}/sm.log"/>
     <param name="maxBackupIndex" value="5" />
     <param name="maxFileSize" value="10MB" />
     <layout class="org.apache.log4j.PatternLayout"> 
@@ -99,7 +99,7 @@
 
   <appender name="pmlog" class="org.apache.uima.ducc.common.utils.DeferredOpenRollingAppender"> 
     <param name="append" value="true"/>
-    <param name="file" value="${DUCC_HOME}/logs/pm.log"/>
+    <param name="file" value="${DUCC_HOME}/logs/daemons/${DUCC_NODENAME}/pm.log"/>
     <param name="maxBackupIndex" value="5" />
     <param name="maxFileSize" value="10MB" />
     <layout class="org.apache.log4j.PatternLayout"> 
@@ -109,7 +109,7 @@
 
   <appender name="wslog" class="org.apache.uima.ducc.common.utils.DeferredOpenRollingAppender"> 
     <param name="append" value="true"/>
-    <param name="file" value="${DUCC_HOME}/logs/ws.${DUCC_NODENAME}.log"/>
+    <param name="file" value="${DUCC_HOME}/logs/daemons/${DUCC_NODENAME}/ws.log"/>
     <param name="maxBackupIndex" value="5" />
     <param name="maxFileSize" value="10MB" />
     <layout class="org.apache.log4j.PatternLayout"> 
@@ -139,7 +139,7 @@
 
   <appender name="agentlog" class="org.apache.uima.ducc.common.utils.DeferredOpenRollingAppender"> 
     <param name="append" value="true"/>
-    <param name="file" value="${DUCC_HOME}/logs/${DUCC_NODENAME}.${ducc.deploy.components}.log"/>
+    <param name="file" value="${DUCC_HOME}/logs/agents/${DUCC_NODENAME}.${ducc.deploy.components}.log"/>
     <param name="maxBackupIndex" value="5" />
     <param name="maxFileSize" value="10MB" />
     <layout class="org.apache.log4j.PatternLayout"> 

Modified: uima/uima-ducc/trunk/src/main/scripts/properties.py
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/scripts/properties.py?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/scripts/properties.py (original)
+++ uima/uima-ducc/trunk/src/main/scripts/properties.py Mon Apr 23 19:07:31 2018
@@ -213,11 +213,22 @@ class Properties:
         if ( self.props.has_key(key) ):
             del self.props[key]
             self.keys.remove(key)
-
+        
+    #
+    # make directories, if need be
+    #
+    def _makedirs(self, path):
+        try:
+            dir_path = path.rsplit('/',1)[0]
+            os.makedirs(dir_path)
+        except:
+            pass
+        
     #
     # Write the has as a Java-like properties file
     #
     def write(self, propsfile):
+        self._makedirs(propsfile)
         f = open(propsfile, 'w')
         for k in self.keys:
             p = self.props[k]

Modified: uima/uima-ducc/trunk/src/main/test/service.py
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/src/main/test/service.py?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/src/main/test/service.py (original)
+++ uima/uima-ducc/trunk/src/main/test/service.py Mon Apr 23 19:07:31 2018
@@ -201,7 +201,7 @@ def mkBrokerUrl(ducc_home):
     props = DuccProperties()
     props.load(ducc_home + '/resources/ducc.properties')
     protocol = props.get('ducc.broker.protocol')
-    host     = props.get('ducc.broker.hostname')
+    host     = props.get('ducc.head')
     port     = props.get('ducc.broker.port')
     return protocol + '://' + host + ':' + port
 

Modified: uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java Mon Apr 23 19:07:31 2018
@@ -242,7 +242,7 @@ public class NodeAgent extends AbstractD
   		NodeIdentity nodeIdentity = new NodeIdentity();
       	DaemonDuccEvent ev = new DaemonDuccEvent(daemon, eventType, nodeIdentity);
           ORDispatcher.dispatch(stateChangeEndpoint, ev, "");
-          logger.info(methodName, null, stateChangeEndpoint, eventType.name(), nodeIdentity.getName());
+          logger.info(methodName, null, stateChangeEndpoint, eventType.name(), nodeIdentity.getCanonicalName());
       }
   	catch(Exception e) {
   		logger.error(methodName, null, e);
@@ -620,7 +620,7 @@ public class NodeAgent extends AbstractD
   public void start(DuccService service) throws Exception {
     super.start(service, null);
     String methodName = "start";
-    String name = nodeIdentity.getName();
+    String name = nodeIdentity.getShortName();
     String ip = nodeIdentity.getIp();
     String jmxUrl = getProcessJmxUrl();
     DuccDaemonRuntimeProperties.getInstance().bootAgent(name, ip, jmxUrl);
@@ -2233,10 +2233,7 @@ public class NodeAgent extends AbstractD
       BufferedReader br = new BufferedReader(new FileReader(exclusionFile));
       String line;
       NodeIdentity node = getIdentity();
-      String nodeName = node.getName();
-      if (nodeName.indexOf(".") > -1) {
-        nodeName = nodeName.substring(0, nodeName.indexOf("."));
-      }
+      String nodeName = node.getShortName();
 
       while ((line = br.readLine()) != null) {
         if (line.startsWith(nodeName)) {
@@ -2277,7 +2274,7 @@ public class NodeAgent extends AbstractD
       String[] nodes = ((DuccAdminEventStopMetrics) event).getTargetNodes().split(",");
       //  Check if this message applies to this node
       for (String targetNode : nodes) {
-        if (Utils.isMachineNameMatch(targetNode.trim(), getIdentity().getName())) {
+        if (Utils.isMachineNameMatch(targetNode.trim(), getIdentity().getCanonicalName())) {
           logger.info("handleAdminEvent", null,
                   "... Agent Received an Admin Request to Stop Metrics Collection and Publishing");
           //  Stop Camel route responsible for driving collection and publishing of metrics

Modified: uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/config/AgentConfiguration.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/config/AgentConfiguration.java?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/config/AgentConfiguration.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/config/AgentConfiguration.java Mon Apr 23 19:07:31 2018
@@ -487,7 +487,7 @@ public class AgentConfiguration {
       camelContext.addRoutes(inventoryRouteBuilder);
 
       logger.info("nodeAgent", null, "------- Agent Initialized - Identity Name:"
-              + agent.getIdentity().getName() + " IP:" + agent.getIdentity().getIp()
+              + agent.getIdentity().getCanonicalName() + " IP:" + agent.getIdentity().getIp()
               + " JP State Update Endpoint:" + common.managedProcessStateUpdateEndpoint);
       return agent;
 

Modified: uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/deploy/DuccWorkHelper.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/deploy/DuccWorkHelper.java?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/deploy/DuccWorkHelper.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/deploy/DuccWorkHelper.java Mon Apr 23 19:07:31 2018
@@ -59,7 +59,7 @@ public class DuccWorkHelper {
 		String location = "getTargetUrl";
 		String targetUrl = null;
 		String server = getServer();
-		String host = DuccPropertiesResolver.get("ducc." + server + ".http.node");
+		String host = DuccPropertiesResolver.get("ducc.head");
 	    String port = DuccPropertiesResolver.get("ducc." + server + ".http.port");
         if ( host == null || port == null ) {
         	String message = "ducc." + server + ".http.node and/or .port not set in ducc.properties";

Modified: uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/event/AgentEventListener.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/event/AgentEventListener.java?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/event/AgentEventListener.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/event/AgentEventListener.java Mon Apr 23 19:07:31 2018
@@ -18,9 +18,9 @@
 */
 package org.apache.uima.ducc.agent.event;
 
-import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.camel.Body;
@@ -28,6 +28,7 @@ import org.apache.uima.ducc.agent.Agent;
 import org.apache.uima.ducc.agent.NodeAgent;
 import org.apache.uima.ducc.agent.ProcessLifecycleController;
 import org.apache.uima.ducc.agent.deploy.DuccWorkHelper;
+import org.apache.uima.ducc.common.head.IDuccHead.DuccHeadState;
 import org.apache.uima.ducc.common.utils.DuccLogger;
 import org.apache.uima.ducc.common.utils.Utils;
 import org.apache.uima.ducc.common.utils.id.DuccId;
@@ -39,7 +40,6 @@ import org.apache.uima.ducc.transport.ev
 import org.apache.uima.ducc.transport.event.ProcessStartDuccEvent;
 import org.apache.uima.ducc.transport.event.ProcessStateUpdateDuccEvent;
 import org.apache.uima.ducc.transport.event.ProcessStopDuccEvent;
-import org.apache.uima.ducc.transport.event.common.DuccUserReservation;
 import org.apache.uima.ducc.transport.event.common.DuccWorkPopDriver;
 import org.apache.uima.ducc.transport.event.common.IDuccJobDeployment;
 import org.apache.uima.ducc.transport.event.common.IDuccProcess;
@@ -57,6 +57,7 @@ import org.springframework.beans.factory
 
 public class AgentEventListener implements DuccEventDelegateListener {
 	DuccLogger logger = DuccLogger.getLogger(this.getClass(), Agent.COMPONENT_NAME);
+	DuccId jobid = null;
 	ProcessLifecycleController lifecycleController = null;
 	// On startup of the Agent we may need to do cleanup of cgroups.
 	// This cleanup will happen once right after processing of the first OR publication.
@@ -86,10 +87,27 @@ public class AgentEventListener implemen
   	        IDuccProcess process = jobDeployment.getJdProcess();
   	        sb.append("\nJD--> JobId:"+jobDeployment.getJobId()+" ProcessId:"+process.getDuccId()+" PID:"+process.getPID()+" Status:"+process.getProcessState() + " Resource State:"+process.getResourceState()+" isDeallocated:"+process.isDeallocated());
   	      }
+  	      /*
+  	      else {
+  	    	IDuccProcess process = jobDeployment.getJdProcess();
+  	    	String ip1 = process.getNodeIdentity().getIp();
+  	    	String ip2 = agent.getIdentity().getIp();
+  	    	sb.append("\nREJECTED: processIP="+ip1+" "+"agentIP="+ip2);
+  	    	sb.append("\nREJECTED: JD--> JobId:"+jobDeployment.getJobId()+" ProcessId:"+process.getDuccId()+" PID:"+process.getPID()+" Status:"+process.getProcessState() + " Resource State:"+process.getResourceState()+" isDeallocated:"+process.isDeallocated());
+  	      }
+		  */
   	      for( IDuccProcess process : jobDeployment.getJpProcessList() ) {
   	        if ( isTargetNodeForProcess(process) ) {
   	          sb.append("\n\tJob ID:"+jobDeployment.getJobId()+" ProcessId:"+process.getDuccId()+" PID:"+process.getPID()+" Status:"+process.getProcessState() + " Resource State:"+process.getResourceState()+" isDeallocated:"+process.isDeallocated());
   	        }
+  	        /*
+  	        else {
+  	          String ip1 = process.getNodeIdentity().getIp();
+  	  	      String ip2 = agent.getIdentity().getIp();
+  	  	      sb.append("\nREJECTED: processIP="+ip1+" "+"agentIP="+ip2);
+  	          sb.append("\n\tREJECTED: Job ID:"+jobDeployment.getJobId()+" ProcessId:"+process.getDuccId()+" PID:"+process.getPID()+" Status:"+process.getProcessState() + " Resource State:"+process.getResourceState()+" isDeallocated:"+process.isDeallocated());
+  	        }
+			*/
   	      }
   	    }
   	    logger.info("reportIncomingStateForThisNode",null,sb.toString());
@@ -215,7 +233,9 @@ public class AgentEventListener implemen
 			}  // for
 		}
 	}
- 
+	
+	private Map<String,String> map = new ConcurrentHashMap<String,String>();
+	
 	/**
 	 * This method is called by Camel when PM sends DUCC state to agent's queue. It 
 	 * takes responsibility of reconciling processes on this node. 
@@ -225,11 +245,32 @@ public class AgentEventListener implemen
 	 */
 	public void onDuccJobsStateEvent(@Body DuccJobsStateEvent duccEvent)
 			throws Exception {
+		String location = "onDuccJobsStateEvent";
 		long sequence = duccEvent.getSequence();
 
 		try {
 
 			synchronized (this) {
+				
+				String host = duccEvent.getProducerHost();
+				DuccHeadState dhs = duccEvent.getDuccHeadState();
+				switch(dhs) {
+				case backup:
+					if(!map.containsKey(host)) {
+						map.put(host, host);
+						logger.warn(location, jobid, "suspended"+" "+"host:"+host);
+					}
+					return;
+				case master:
+					if(map.containsKey(host)) {
+						map.remove(host);
+						logger.warn(location, jobid, "resumed"+" "+"host:"+host);
+					}
+					break;
+				default:
+					break;
+				}
+				
 				// check for out of band messages. Expecting a message with a
 				// sequence number larger than the previous message.
 				if (sequence > lastSequence.get()) {

Modified: uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsTest.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsTest.java?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsTest.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsTest.java Mon Apr 23 19:07:31 2018
@@ -83,7 +83,7 @@ public class CGroupsTest {
 	}
 	public void initialize() throws Exception {
 
-		idFactory = new DuccIdFactory(null,null);
+		idFactory = new DuccIdFactory();
 		String cgroupsUtilsDirs = System.getProperty("ducc.agent.launcher.cgroups.utils.dir");
       	String cgUtilsPath=null;
       	if (cgroupsUtilsDirs == null) {

Modified: uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/Launcher.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/Launcher.java?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/Launcher.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/Launcher.java Mon Apr 23 19:07:31 2018
@@ -86,7 +86,7 @@ public class Launcher {
     		throws Exception {
         //	Instantiate executor that will actually exec the process using java's ProcessBuilder
         DuccCommandExecutor executor = 
-            new DuccCommandExecutor(agent, commandLine, nodeIdentity.getName(),nodeIdentity.getIp(), managedProcess);
+            new DuccCommandExecutor(agent, commandLine, nodeIdentity.getCanonicalName(),nodeIdentity.getIp(), managedProcess);
         Future<?> future = executorService.submit(executor);
         //	if we are launching a process, save the future object returned from Executor above
     	managedProcess.setFuture(future);

Modified: uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/DefaultNodeInventoryProcessor.java
URL: http://svn.apache.org/viewvc/uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/DefaultNodeInventoryProcessor.java?rev=1829921&r1=1829920&r2=1829921&view=diff
==============================================================================
--- uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/DefaultNodeInventoryProcessor.java (original)
+++ uima/uima-ducc/trunk/uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/processors/DefaultNodeInventoryProcessor.java Mon Apr 23 19:07:31 2018
@@ -258,7 +258,7 @@ public class DefaultNodeInventoryProcess
 
 				}
 				logger.info(methodName, null, "Agent "
-						+ agent.getIdentity().getName() + " Posting Inventory:"
+						+ agent.getIdentity().getCanonicalName() + " Posting Inventory:"
 						+ sb.toString());
 				outgoingMessage.getIn().setBody(new NodeInventoryUpdateDuccEvent(inventory,agent.getLastORSequence(), agent.getIdentity()));