You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ch...@apache.org on 2013/06/11 00:03:43 UTC

svn commit: r1491620 - in /uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook: part2/ part2/cli/ part2/webserver/ part4/ part4/admin/

Author: challngr
Date: Mon Jun 10 22:03:43 2013
New Revision: 1491620

URL: http://svn.apache.org/r1491620
Log:
UIMA-2682 Duccbook updates.

Added:
    uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/managed-reservations.tex
    uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/service-details.tex
Modified:
    uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/ducc-process-submit.tex
    uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/services.tex
    uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver.tex
    uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/job-details.tex
    uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/jobs.tex
    uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/reservations.tex
    uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/services.tex
    uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/system.tex
    uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-nodes.tex
    uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-properties.tex
    uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/rm.tex

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/ducc-process-submit.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/ducc-process-submit.tex?rev=1491620&r1=1491619&r2=1491620&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/ducc-process-submit.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/cli/ducc-process-submit.tex Mon Jun 10 22:03:43 2013
@@ -4,7 +4,7 @@
 \HCode{<a name='DUCC_CLI_PROCESS_SUBMIT'></a>}
 \fi
     \section{ducc\_process\_submit}
-
+    \label{sec:cli.ducc-process-submit}
     \paragraph{Description:}
        Usse {\em ducc\_process\_submit} to submit a Managed Reservation, also known as an
        arbitrary process to DUCC.  The intention

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/services.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/services.tex?rev=1491620&r1=1491619&r2=1491620&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/services.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/services.tex Mon Jun 10 22:03:43 2013
@@ -79,20 +79,22 @@
       among the various behaviours and management mechanisms for service we define a number
       of {\em service classes}.
 
-      \subsection{External Services.} 
-      \label{sec:services.external}
+      \subsection{Implicit Services.} 
+      \label{sec:services.implicit}
       An external service is started externally to DUCC and discovered by DUCC only when it is
-      referenced by a job's {\em service\_dependency} parameter. On
-      submission of a job with a dependency on an external service, the SM sets up a "ping" thread
-      that check if the service exists at the endpoint. If so, the SM adds the service to its list
-      of known services and marks the job "ready to schedule".
+      referenced by a job's {\em service\_dependency} parameter.  Such a service is called
+      an {\em implicit} service.
 
-      When jobs referencing external services exit, a timer is set and DUCC continues to monitor the
+      On submission of a job with a dependency on a, the SM sets up a "ping" thread based on the
+      service endpoint of the dependency to discover if the service exists at the endpoint. If so,
+      the SM adds the service to its list of known services and marks the job "ready to schedule".
+
+      When jobs referencing implicit services exit, a timer is set and DUCC continues to monitor the
       service against the possibility that subsequent jobs will need it. Once the last job using the
       service has exited and the service timer expired, the SM stops the monitors and purges the
       service from its records.
 
-      \subsubsection{External UIMA-AS services}
+      \subsubsection{Implicit UIMA-AS services}
       If a job \hyperref[sec:service.endpoints]{references} a UIMA-AS service that is not known to the
       DUCC Service Manager, the Service Manager will start
       its default internal pinger to monitoring the ActiveMq queue and service response.  The
@@ -100,7 +102,7 @@
       responding, its state is updated to "not-responding" but the job is allowed to continue as
       DUCC cannot tell if the job is using the service.
 
-      \subsubsection{External CUSTOM services, or ``ping-only'' Service} 
+      \subsubsection{Implicit CUSTOM services, or ``ping-only'' Service} 
       If a job \hyperref[sec:service.endpoints]{references} a CUSTOM service, the service must be
       registered and have a CUSTOM \hyperref[sec:service.pingers]{pinger} associated with it. Such a
       service is refered to as a ``ping-only'' service.  DUCC will start the pinger and monitor the
@@ -110,7 +112,7 @@
       \subsection{Submitted Services.} A \hyperref[sec:cli.service-submit]{submitted service} is a
       service that is submitted to DUCC as a with the ducc\_service\_submit CLI.  Both UIMA-AS and
       CUSTOM services may be submitted for execution by DUCC. Because DUCC is managing this service
-      it can provide more support than for external services.  However, DUCC does not persist the
+      it can provide more support than for implicit services.  However, DUCC does not persist the
       service definition. If a submitted service exits involuntarily (crashes), DUCC will make
       some number of attempts to restart it.  If the configured restart count is exceeded DUCC
       will stop the service.
@@ -146,7 +148,9 @@
           jobs or services, the on-demand service is automatically stopped to free up its resources for
           other work.
 
-        \item[External Services] \hyperref[sec:services.external]{External services} consist of only
+        \item[Ping-Only Services] 
+          \phantomsection\label{subsub:services.ping-only}
+          \hyperref[sec:services.implicit]{Ping-only services} consist of only
           a ping thread.  The service itself is not managed in any way by DUCC.  This is useful for
           managing dependencies on services that are not under DUCC control: DUCC can detect and
           report on the health of these services and take appropriate actions on dependent jobs if

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver.tex?rev=1491620&r1=1491619&r2=1491620&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver.tex Mon Jun 10 22:03:43 2013
@@ -89,6 +89,13 @@
       % Create well-known link to this spot for HTML version
       \ifpdf
       \else
+      \HCode{<a name='DUCC_WS_RESERVATIONS_DETAILS'></a>}
+      \fi
+      \input{part2/webserver/managed-reservations.tex}
+
+      % Create well-known link to this spot for HTML version
+      \ifpdf
+      \else
       \HCode{<a name='DUCC_WS_SERVICES'></a>}
       \fi
       \input{part2/webserver/services.tex}
@@ -96,6 +103,13 @@
       % Create well-known link to this spot for HTML version
       \ifpdf
       \else
+      \HCode{<a name='DUCC_WS_SERVICE_DETAILS'></a>}
+      \fi
+      \input{part2/webserver/service-details.tex}
+
+      % Create well-known link to this spot for HTML version
+      \ifpdf
+      \else
       \HCode{<a name='DUCC_WS_SYSTEM></a>}
       \fi
       \input{part2/webserver/system.tex}

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/job-details.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/job-details.tex?rev=1491620&r1=1491619&r2=1491620&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/job-details.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/job-details.tex Mon Jun 10 22:03:43 2013
@@ -21,7 +21,7 @@
 
         \item[Id] \hfill \\
           This is the DUCC-assigned numeric id of the process (not the Operating System's
-          processid). Process 0 is alwyas the Job Driver. 
+          processid). Process 0 is alwyas the Job Driver.          
 
         \item[Log] \hfill \\
           This is the log name for the process. It is hyperlinked to the log itself.
@@ -47,6 +47,8 @@
           \end{description}
 
         \item[Reason Scheduler or extraordinary status] \hfill \\
+          \phantomsection\label{itm:job-details-sched}
+
           This shows why a process was terminated.  These all have ``hovers'' that provide more information
           if it is available.
             \begin{description}          
@@ -81,6 +83,8 @@
             \end{description}
 
           \item[State Agent] \hfill \\
+          \phantomsection\label{itm:job-details-state}
+
             If there's an error detected only by the DUCC Agent, this shows the Agent's reason for
             a process's death.
             \begin{description}
@@ -108,6 +112,8 @@
             \end{description}
             
           \item[Reason Agent] \hfill \\
+          \phantomsection\label{itm:job-details-agent}
+
             If there's an error detected only by the agent, this shows the Agent's reason for
             a process's death.
             \begin{description}
@@ -164,7 +170,7 @@
             up to 100\% CPU, so, for example, on a 16-core machine, this can be as high
             as 1600\%.
             
-          \item[\%RSS] \hfill \\
+          \item[RSS] \hfill \\
             The amount of real memory being consumed by the process (Resident Storage Size)
             
           \item[Time Avg] \hfill \\

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/jobs.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/jobs.tex?rev=1491620&r1=1491619&r2=1491620&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/jobs.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/jobs.tex Mon Jun 10 22:03:43 2013
@@ -1,5 +1,6 @@
 
     \section{Jobs Page}
+    \label{sec:ws.jobs-page}
         The Web Server's home page is also the Jobs page. This page has links to all the rest of the content 
         at the site and shows the status of all the jobs in the system. 
     

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/managed-reservations.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/managed-reservations.tex?rev=1491620&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/managed-reservations.tex (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/managed-reservations.tex Mon Jun 10 22:03:43 2013
@@ -0,0 +1,77 @@
+\section{Managed Reservation Details Page}
+\label{sec:ws-managed-reservation-details}
+
+This page shows details of the processes which run in a managed reservation.  The
+information is divided between two tabs:
+
+   \begin{description}
+       \item[Processes] This tab contains details on all the proceses contained in the
+         reserved space.
+       \item[Specification] This tab shows the specification for the process.
+   \end{description}  
+
+   \subsection{Processes}
+   \label{sec:ws-manres-processes}
+
+   The processes page contains the following columns:
+   \begin{description}
+      \item[ID] \hfill \\
+        This is the DUCC-assigned numeric id of the process.  This format of this
+        id is two numbers:
+\begin{verbatim}
+    RESID.SHAREID
+\end{verbatim}
+        Here, the {\em RESID} is the reservation ID.  The {\em SHAREID} is the 
+        share ID assigned by the Resource Manager.  Together these form a unique
+        ID for each process that runs in the reservation.
+        
+        Note: The current version of DUCC supports only one process per managed
+        reservation.  Future versions are expected to support multiple processes
+        within a single managed reservation.
+        
+      \item[Log] \hfill \\
+        This is the log name for the process. It is hyperlinked to the log itself.
+        
+      \item[Size] \hfill \\
+        This is the size of the log in MB. If you find you have trouble viewing the log
+        from the web server it could be because it is too big to view in the browser.
+        
+      \item[Hostname] \hfill \\
+        This is the name of the node where the process is running (or ran).
+        
+      \item[PID] \hfill \\
+        This is the Unix process ID (PID) of the process.
+        
+      \item[State Scheduler] \hfill \\
+        This shows the Resesource Manager state of the job. It is one of:
+        
+        \begin{description}
+            \item[Allocated] - The node is still allocated for this job by the RM.
+            \item[Deallocated] - The resource manager has deallocated the shares for the job on
+              this node.
+        \end{description}
+        
+      \item[Reason Scheduler or Extraordinary Status] \hfill \\
+        These are the same as for the \hyperref[itm:job-details-sched]{job details.}
+
+      \item[State Agent] \hfill \\
+        These are the same as for the \hyperref[itm:job-details-state]{job details.}
+
+      \item[Reason Agent] \hfill \\
+        These are the same as for the \hyperref[itm:job-details-agent]{job details.}
+
+      \item[Time Run] \hfill \\
+        The current duration of the reservation, or total duration if it has 
+        terminated.
+        
+      \item[RSS] \hfill \\
+        The amount of real memory being consumed by the process (Resident Storage Size)
+
+   \end{description}
+
+   \subsection{Specification}
+   \label{sec:ws-service-specification}
+   This tab shows the full job specification in the form of a Java Properties
+   file.  This will include all the parameters specified by the user, plus those
+   filled in by DUCC.
+        

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/reservations.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/reservations.tex?rev=1491620&r1=1491619&r2=1491620&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/reservations.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/reservations.tex Mon Jun 10 22:03:43 2013
@@ -1,54 +1,87 @@
 
-    \section{Reservation Details Page}
+\section{Reservation Page}
+\label{sec:ws-reservations}
 
-        This page shows details of all reservations. The Reservations page contains the following columns: 
-        \begin{description}
+This page shows details of all reservations.  There are two types of reservations: {\em managed}
+and {\em unmanaged.}.
+
+A {\em managed reservation} is a reservation whose process is fully managed by DUCC.  This process
+is any arbitrary process and is submitted with the
+\hyperref[sec:cli.ducc-process-submit]{ducc\_process\_submit} CLI.  The lifetime of the reservation
+starts at the time DUCC assignes a unique ID, and ends when the process terminates for any reason.
+
+An {\em unmanaged reservation} is essentially a sandbox for the user.  DUCC starts no processes
+in the reservation and manages none of the processes which run on that node.  The lifetime of the
+reservation starts at the time DUCC assigns a unique ID, and ends when the submitter or system
+administrator cancels it.  {\em Managed reservations} can potentially last an indefinite
+period of time.
+
+The Reservations page contains the following columns: 
+\begin{description}
+
+\item[Id] \hfill \\
+  This is the unique DUCC numeric id of the reservation as assigned when the reservation is made.
+  If this is a {\em managed} reservation, the ID is hyperlinked to a
+  \hyperref[sec:ws-managed-reservation-details]{Managed Reservation Details} page with extended
+  details on the process running in the reservation.
+
+\item[Start] \hfill \\
+  This is the time the reservation was mde.
+  
+\item[End] \hfill \\
+  This is the time the reservation was canceled or otherwise ended.
+  
+\item[User] \hfill \\
+  This is the userid if the person who made the reservation.
+  
+\item[Class] \hfill \\
+  This is the scheduling class used to schedule the reservation.
+  
+\item[Type] \hfill \\
+  This is the reservation type, {\em managed} or {\em unmanaged}, as described 
+  \hyperref[sec:ws-reservations]{above}.
+
+\item[State] \hfill \\
+  This is the status of the reservation. Values include: Received - Reservation
+  has been vetted, persisted, and assigned unique Id.
+  \begin{description}
+  \item[WaitingForResources] - The reservation is waitng for the Resource Manager to find and 
+    schedule esources. 
+  \item[Assigned] - The reservation is active. 
+  \item[Completed] - The reservation has been terminated.
+  \end{description}
+
+\item[Reason] \hfill \\
+  If a reservation is not active, the reason. Reasons include:
+  \begin{description}
+  \item[CanceledBySystem] - The job was canceled because DUCC was shutdown. 
+  \item[CanceledByUser] - The owner or administrator released the reservation. 
+  \item[ResourcesUnavailable] - The Resource Manager was unable to find free or freeable resources 
+    match the resource request. 
+  \item[ProgramExit] - The reservation is a {\em managed} reservation and the associated
+    process has exited.
+  \end{description}
+
+\item[Allocation] \hfill \\
+  This is the number of resources (shares for FIXED policy reservartions, processes for
+  RESERVE policy reservations) that are allocated.
+
+\item[UserProcesses] This is the number of processes owned by the user running in all
+  shares of the reservation.  
+  
+  Note that even for {\em unmanaged} reservations, the DUCC agent tracks processes owned
+  by the user and reports on them.  This allows better identification and management of
+  abandonded reservations.
+
+\item[Size] \hfill \\
+  The memory size in GB of the each allocated unit.  This is the amount of memory that
+  was {\em requested}.  In the case of RESERVE policy reservations, that actual memory
+  of the reserved machine may be greater.
+  
+\item[Host Names] \hfill \\
+  The node names of the machines where the resources are allocated.
+  
+\item[Description] \hfill \\
+  This is the descriptin string from the --description string from submit.
+\end{description}
 
-            \item[Id] \hfill \\
-              This is the DUCC process id of the reservation as provided when the reservation is
-              made.
-
-            \item[Start] \hfill \\
-              This is the time the reservation was mde.
-              
-            \item[End] \hfill \\
-              This is the time the reservation was canceled.
-              
-            \item[User] \hfill \\
-              This is the userid if the person who made the reservation.
-              
-            \item[Class] \hfill \\
-              This is the resource class used to schedule the reservation.
-              
-            \item[Status] \hfill \\
-              This is the status of the reservation. Values include: Received - Reservation
-              has been vetted, persisted, and assigned unique Id.
-              \begin{description}
-                  \item[WaitingForResources] - The reservation is waitng for the Resource Manager to find and 
-                  \item[schedule] resources. 
-                  \item[Assigned] - The reservation is active. 
-                  \item[Completed] - The reservation has been canceled.                     
-              \end{description}
-
-            \item[Reason] \hfill \\
-              If a reservation is not active, the reason. Reasons include:
-                \begin{description}
-                    \item[ResourcesUnavailable] - The Resource Manager was unable to find free or freeable resources 
-                    \item[to] match the resource request. 
-                    \item[CanceledBySystem] - The job was canceled because DUCC was shutdown. 
-                    \item[CanceledByUser] - The owner or administrator released the reservation. 
-                \end{description}
-
-            \item[Allocation] \hfill \\
-              The number of resources (shares for FIXED policy reservartions, processes for
-              RESERVE policy reservations) that are allocated.
-
-            \item[Size] \hfill \\
-              The memory size in GB of the each allocated unit.
-              
-            \item[List] \hfill \\
-              The node names of the machines where the resource is allocated.
-              
-            \item[Description] \hfill \\
-              This is the descriptin string from the --description string from submit.
-        \end{description}

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/service-details.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/service-details.tex?rev=1491620&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/service-details.tex (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/service-details.tex Mon Jun 10 22:03:43 2013
@@ -0,0 +1,113 @@
+\section{Service Details Page}
+\label{sec:ws-service-details}
+
+This page shows details of the processes which implement.  Note that in the case of
+\hyperref[sec:services.implicit]{implicit} and \hyperref[subsub:services.ping-only]{ping-only}
+services there will be no processes to show.
+
+The information is divided between two tabs:
+
+   \begin{description}
+       \item[Processes] This tab contains details on all the proceses implementing
+         the service, if any.
+       \item[Specification] This tab shows the specification for the service.  In the
+         case of \hyperref[sec:services.implicit]{implicit} services, this shows the generated Service Manager
+         state for the service.
+   \end{description}  
+
+   \subsection{Processes}
+   \label{sec:ws-services-processes}
+
+   The processes page contains the following columns:
+   \begin{description}
+      \item[ID] \hfill \\
+        This is the DUCC-assigned numeric id of the process.  This format of this
+        id is two numbers:
+\begin{verbatim}
+    RESID.SHAREID
+\end{verbatim}
+        Here, the {\em RESID} is the reservation ID.  The {\em SHAREID} is the 
+        share ID assigned by the Resource Manager.  Together these form a unique
+        ID for each process that runs in the reservation.
+                
+      \item[Log] \hfill \\
+        This is the log name for the process. It is hyperlinked to the log itself.
+        
+      \item[Size] \hfill \\
+        This is the size of the log in MB. If you find you have trouble viewing the log
+        from the web server it could be because it is too big to view in the browser.
+        
+      \item[Hostname] \hfill \\
+        This is the name of the node where the process is running (or ran).
+        
+      \item[PID] \hfill \\
+        This is the Unix process ID (PID) of the process.
+        
+      \item[State Scheduler] \hfill \\
+        This shows the Resesource Manager state of the job. It is one of:
+        
+        \begin{description}
+            \item[Allocated] - The node is still allocated for this job by the RM.
+            \item[Deallocated] - The resource manager has deallocated the shares for the job on
+              this node.
+        \end{description}
+        
+      \item[Reason Scheduler or Extraordinary Status] \hfill \\
+        These are the same as for the \hyperref[itm:job-details-sched]{job details.}
+
+      \item[State Agent] \hfill \\
+        These are the same as for the \hyperref[itm:job-details-state]{job details.}
+
+      \item[Reason Agent] \hfill \\
+        These are the same as for the \hyperref[itm:job-details-agent]{job details.}
+
+
+      \item[Time Init] \hfill \\
+        Most services are UIMA-AS services and therefore have an {\em initialization} phase
+        to their lifetimes.  This field shows the time spent in that phase.
+
+      \item[Time Run] \hfill \\
+        The current duration of the reservation, or total duration if it has 
+        terminated.
+        
+      \item[Time GC] \hfill \\
+        This is amount of time spent in Java Garbage Collection for the process.
+
+      \item[Pgin] \hfill \\
+        This is the number of page-in events on behalf of the process.
+        
+      \item[Swap] \hfill \\
+        This is the amount of swap space on the machine being consumed by the process.
+        
+      \item[\%CPU] \hfill \\
+        Currnt CPU percent consumed by the process.  This will be $>$ 100\% on 
+        multi-core systems if more than one core is being used.  Each core contributes
+        up to 100\% CPU, so, for example, on a 16-core machine, this can be as high
+        as 1600\%.
+
+      \item[RSS] \hfill \\
+        The amount of real memory being consumed by the process (Resident Storage Size)
+
+      \item[JConsole URL] \hfill \\
+        This is a URL that can be used to connect via JMX to the processes, e.g. via
+        jconsole.
+
+   \end{description}
+
+   \subsection{Specification}
+   \label{sec:ws-managed-reservation-specification}
+   This tab shows the full job specification in the form of a Java Properties
+   file.  This will include all the parameters specified by the user, plus those
+   filled in by DUCC.
+        
+   The specification for a Service contains two types of entries:
+   \begin{enumerate}
+     \item Service specification properties, prefixed with ``svc''. These comprise
+       the service specification that the Service Manager submits on behalf of
+       a user in order to start registered services.
+     \item Meta properties, prefixed with ``meta''.  This is the Service Manager's state
+       record for the sesrvice as it is running.  In addition to state it contains
+       properties required for service registration that are not used for
+       service submission.
+   \end{enumerate}
+   

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/services.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/services.tex?rev=1491620&r1=1491619&r2=1491620&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/services.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/services.tex Mon Jun 10 22:03:43 2013
@@ -1,54 +1,103 @@
 
-    \section{Services Details Page}
+    \section{Services Page}
 
-        This page shows details of all reservations. The Reservations page contains the following columns: 
+        This page shows details of all services.           
+
+        The Services page contains the following columns: 
         \begin{description}
 
             \item[Id] \hfill \\
-              This is the DUCC process id of the reservation as provided when the reservation is
-              made.
+              This is the unique numeric DUCC id of the service.  This ID is hyperlinked to a
+              \hyperref[sec:ws-service-details]{Servic Details} page with extended
+              details on the service.  Note that for some types of services, DUCC may not
+              know more about the service than is shown on the main page.
+
+            \item[Name] \hfill \\
+              This is the unique service endpoint of the service.  
+              
+            \item[Type] \hfill \\
+              This is the service type.
+              
+              There are a number of variants on service types, as discussed in the
+              \hyperref[sec:services.types]{services} section of this book.  The webserver
+              simplifies these into the following three values:
+              \begin{itemize}
+                \item Registered
+                \item Submitted
+                \item Implicit
+              \end{itemize}
+              
+            \item[State] \hfill \\
+              This is the state of the service with respect to the service manager.  It is a
+              consolidated state over all the service instances.  Valid states are
+              \begin{description}
+                \item[Available] At least one service instance is responding to the service
+                  pinger, indicating it is functional.
+                \item[Initializing] No service instances are running but at least one instance
+                  is in its UIMA-AS {\em initializing} phase.
+                \item[Waiting] At least one service instance is in Running state, and the Service
+                  Manager is waiting for a response from the service pinger.
+                \item[NotAvailable] No service instance is running. 
+                \item[Stopping] The service has been stopped for some reason, but not all 
+                  instances have terminated.
+              \end{description}
+
+              DUCC will start dependent jobs ONLY if it's services are in state Available.  Otherwise
+              DUCC attempts to start the service, and if successful, allows the job to start.  
+
+              If a job is already running and a service becomes other than Available, the
+              \hyperref[sec:ws.jobs-page]{jobs page} indicates the service is not available but the job is 
+              allowed to continue.
+              
+            \item[Pinger] \hfill \\
+              This indicates whether the Service Manager is running a pinger for the service.
+              
+            \item[Health] \hfill \\
+              {\em Health} is a status returned by each pinger and is the result of that pinger's
+              evaluation of the state of the service.  It is shown as on of
+              \begin{itemize}
+                \item {\em Good}
+                \item {\em Bad}
+              \end{itemize}
+              Both terms are highly subjective.  Pingers may return a summary of the underlying
+              data used to label a service as good or bad.  That status is shown as a hover over
+              this field.
+              
+            \item[Instances] \hfill \\
+              This is the number of instances (processes) currently registered for the service.  
+
+            \item[Deployments] \hfill \\
+              This is the number of actual instances deployed for the service.  Note that this may
+              be greater, or less, than the number of registered instances, if the service owner
+              decides to temporarily start or stop additional instances.
 
-            \item[Start] \hfill \\
-              This is the time the reservation was mde.
-              
-            \item[End] \hfill \\
-              This is the time the reservation was canceled.
-              
             \item[User] \hfill \\
-              This is the userid if the person who made the reservation.
+              This is the userid of the service owner.
               
             \item[Class] \hfill \\
-              This is the resource class used to schedule the reservation.
+              This is the scheduling class the service is running in. 
               
-            \item[Status] \hfill \\
-              This is the status of the reservation. Values include: Received - Reservation
-              has been vetted, persisted, and assigned unique Id.
-              \begin{description}
-                  \item[WaitingForResources] - The reservation is waitng for the Resource Manager to find and 
-                  \item[schedule] resources. 
-                  \item[Assigned] - The reservation is active. 
-                  \item[Completed] - The reservation has been canceled.                     
-              \end{description}
+              If a service is registered as ``ping-only'', no resources are allocated for it.  This
+              is shown as a class of {\tt ping-only}.
+              
+            \item[Size] \hfill \\
+              This is the memory size, in GB, of each service instance
 
-            \item[Reason] \hfill \\
-              If a reservation is not active, the reason. Reasons include:
-                \begin{description}
-                    \item[ResourcesUnavailable] - The Resource Manager was unable to find free or freeable resources 
-                    \item[to] match the resource request. 
-                    \item[CanceledBySystem] - The job was canceled because DUCC was shutdown. 
-                    \item[CanceledByUser] - The owner or administrator released the reservation. 
-                \end{description}
-
-            \item[Allocation] \hfill \\
-              The number of resources (shares for FIXED policy reservartions, processes for
-              RESERVE policy reservations) that are allocated.
+            \item[Jobs] \hfill \\
+              This is the number of jobs currently using the service.  The IDs of the jobs are
+              shown as hovers over this field.
+
+            \item[Services] \hfill \\
+              Services may themselves depend on other services.  This field shows the number of
+              services dependent on this service.  The dependent service IDs are shown with a 
+              hover over the field.
+
+            \item[Reservations] \hfill \\
+              This field shows the number of
+              managed reservations dependent on this service. The IDs of the managed reservations
+              rea shown as a hover over the field.
 
-            \item[Size] \hfill \\
-              The memory size in GB of the each allocated unit.
-              
-            \item[List] \hfill \\
-              The node names of the machines where the resource is allocated.
               
             \item[Description] \hfill \\
-              This is the descriptin string from the --description string from submit.
+              This is the description string from the --description string from submit.
         \end{description}

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/system.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/system.tex?rev=1491620&r1=1491619&r2=1491620&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/system.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/system.tex Mon Jun 10 22:03:43 2013
@@ -1,54 +1,181 @@
 
-    \section{System  Details Page}
+\section{System  Details Page}
+\label{sec:system-details}
 
-        This page shows details of all reservations. The Reservations page contains the following columns: 
+This page shows information relating to the DUCC System itself:
+\begin{description}
+  \item[Admistration]This displays system adminstrators and implements
+    the interface to various administrative controls.
+  \item[Classes] This shows the curent system's scheduling class definitions.
+  \item[Daemons] This shows the status of all DUCC processes.
+  \item[DuccBook] This is a link to the book you are reading.
+  \item[Machines] This shows details of all the machines in the DUCC cluster.
+\end{description}
+
+\subsection{Administration}
+
+   This page has two tabs:
+   \begin{description}   
+     \item[Administrators] This shows the userids that are authorized to administer
+       DUCC.  In addition to executing the ``Control'' functions described below,
+       administrators may cancel any job, reservation, or service, and may modify
+       services they do not own.  
+
+       In order to perform administrative funcrtions, the following must be satisfied:
+       \begin{enumerate}
+         \item The user is logged-in to the webserver.
+         \item The user is a registered administrator.
+         \item The user has set the role as ``administrator'' in the DUCC Preferences
+           page.  This is a safeguard so that adminimstrators who are also users
+           are less likely to inadvertantlly affect other people's jobs.
+       \end{enumerate}
+     \item[Control] Currently DUCC supports a single administrative control function
+       via the webserver: Stop new job submissions and reenable them.  If submissions
+       are blocked, all existing work runs normally, but no new work is accepted.
+     \end{description}
+
+
+\subsection{Classes}
+This page shows the definitions of the DUCC scheduling classes.  The scheduling classes are
+discussed in more detail in the \hyperref[sec:rm.job-classes]{Resource Manager} section.
+
+\subsection{Daemons}
+
+This page shows the current state of all DUCC processes.  By default, only the administrative
+processes, Orchestrator, ProcessManager, ResourceManager, ServiceManager, and Webserver are
+shown.  A button in the upper left of the page titled ``Show Agents'' enables display of
+the status of all the DUCC agents as well. (Agents are suppressed by default because the
+page is expensive to render for large systems.)
+
+The coloumns shown on this page include
+
+   \begin{description}
+      \item[Status] \hfill \\
+        This indicats whether the daemon is running and broadcasting state {\em up},
+        or not {\em down}.  
+        
+        All DUCC daemons broadcast a heartbeat containing process state.  If the Status
+        is {\em down}, either the daemon is not functioning, or something is preventing
+        state from reaching the webserver via DUCC's ActiveMq instance.
+
+      \item[Daemon Name] \hfill \\
+        This is the name of the process.
+
+      \item[Boot Time] \hfill \\ 
+        This shows the date and time of the latest boot of the specific process.
+          
+      \item[Host IP] \hfill \\ 
+        This is the IP address of the processor where the process is running.
+
+      \item[Host Name] \hfill \\ 
+        This shows the hostname of the processor where the process is running.
+
+      \item[PID] \hfill \\ 
+        This is the Unix processid of the DUCC process.
+
+
+      \item[Publication Size (last)] \hfill \\ 
+        This shows the size of the most recent state publication of the process, in bytes.
+
+      \item[Publication Size (max)] \hfill \\ 
+        This shows the size of the largest state publication of the process, in bytes.
+
+      \item[Heartbeat (last)] \hfill \\ 
+        This shows the number of seconds since the last state publication for the process. 
+         Large numbers here indicate potential cluster or DUCC problems.
+
+      \item[Heartbeat (max)] \hfill \\ 
+        This shows the longest delay since a state publication for the process was received
+        at the webserver.  Large numbers here indicate potential cluster or DUCC problems.
+
+      \item[Heartbeat (max) TOD] \hfill \\ 
+        This shows the time the longest delay of a state publicatin occurred.
+
+      \item[JConsole URL] \hfill \\ 
+        This is the jconsole URL for the process.
+
+   \end{description}
+      
+\subsection{Machines}
+
+This page shows the states of all the machines in the DUCC cluster.
+
+The coloumns shown on this page include
+
+   \begin{description}
+      \item[Status] \hfill \\
+        This shows the current state of a machine.  Values include:
         \begin{description}
+          \item[defined] The node is in the DUCC
+            \hyperref[sec:admin-ducc.nodes]{nodes file}, but no DUCC process has been
+            started there, or else there is a communication problem and
+            the state messages are not being delivered.
+            \item[up] The node has a DUCC Agent process running on it and the
+              webserver is receiving regular heartbeat packets from it.
+            \item[down] The node had a healthy DUCC Agent on it at some point
+              in the past (since the last DUCC boot), but the webserver has stopped
+              receiving heartbeats from it. 
 
-            \item[Id] \hfill \\
-              This is the DUCC process id of the reservation as provided when the reservation is
-              made.
-
-            \item[Start] \hfill \\
-              This is the time the reservation was mde.
-              
-            \item[End] \hfill \\
-              This is the time the reservation was canceled.
-              
-            \item[User] \hfill \\
-              This is the userid if the person who made the reservation.
-              
-            \item[Class] \hfill \\
-              This is the resource class used to schedule the reservation.
-              
-            \item[Status] \hfill \\
-              This is the status of the reservation. Values include: Received - Reservation
-              has been vetted, persisted, and assigned unique Id.
-              \begin{description}
-                  \item[WaitingForResources] - The reservation is waitng for the Resource Manager to find and 
-                  \item[schedule] resources. 
-                  \item[Assigned] - The reservation is active. 
-                  \item[Completed] - The reservation has been canceled.                     
-              \end{description}
-
-            \item[Reason] \hfill \\
-              If a reservation is not active, the reason. Reasons include:
-                \begin{description}
-                    \item[ResourcesUnavailable] - The Resource Manager was unable to find free or freeable resources 
-                    \item[to] match the resource request. 
-                    \item[CanceledBySystem] - The job was canceled because DUCC was shutdown. 
-                    \item[CanceledByUser] - The owner or administrator released the reservation. 
-                \end{description}
-
-            \item[Allocation] \hfill \\
-              The number of resources (shares for FIXED policy reservartions, processes for
-              RESERVE policy reservations) that are allocated.
-
-            \item[Size] \hfill \\
-              The memory size in GB of the each allocated unit.
-              
-            \item[List] \hfill \\
-              The node names of the machines where the resource is allocated.
-              
-            \item[Description] \hfill \\
-              This is the descriptin string from the --description string from submit.
+              The agent may have been manuallly shut down, may have crashed, or there
+              may be a communication problem.
+
+              Additionally, very heavy loads from jobs running the the node can cause
+              the DUCC Agents heartbeats to be delayed.
         \end{description}
+
+
+      \item[IP] \hfill \\
+        This is the IP address of the node.
+
+      \item[Name] \hfill \\
+        This is the hostname of the node.
+
+      \item[Reserve(GB) size] \hfill \\
+        This is the largest reservation that can be made on this node.
+
+        This is usually somewhat less than the physical memory size because it is 
+        rounded down to the nearest \hyperref[chap:rm]{share quantum}.  The purpose of this
+        column is to assist users in requesting the right size for full machine 
+        reservations.
+
+      \item[Memory(GB) total] \hfill \\
+        This is the amount of memory, in GB, as reported by each machine.
+        
+        Usually the amount will be slightly less than the installed memory.  This is because
+        a small bit of memory is usually reserved by the hardware for its own purposes.  For 
+        example, a machine with 48GB of installed memory may report only 47GB available.
+
+      \item[Swap(GB) in use] \hfill \\
+        This is the total size in-use swap data.  DUCC shows any value greater than 0 in
+        red as swapping can very significantly slow applications.  However, swap use does
+        not always mean there is a performance problem.  This is flagged by DUCC simply
+        as an alert of a potential problem
+
+      \item[Alien PIDs] \hfill \\
+        This shows the number of processes not owned by DUCC, the opertating system, or
+        jobs sheduled on each node.  The Unix Process IDS of these processes is displayed
+        in a hover.
+
+        DUCC preconfigures many of the standard operating 
+        \hyperref[itm:props-rogue.process]{system process} and 
+        \hyperref[itm:props-rogue.user]{userids}.  This list may be updated by each
+        installation.
+
+        A common cause of alien PIDs is errant processe run in unmanaged reservations.  A
+        user may reserve a machine for use as a sandbox.  If the reservation is released
+        without properly terminating all the processes, they may linger.  When ducc 
+        schedules the node for other purposes, significant performance penalties may be
+        paid due to competition between the legitimately scheduled work and the leftover
+        ``alien'' processes.  The purpose of this column is to bring attention to this situation.
+
+      \item[Shares (total)] \hfill \\
+        This shows the total number of scheduling share supported on this node.
+
+      \item[Shares(inuse)] \hfill \\
+        This shows the total number of scheduling share in use on the node.
+
+      \item[Heartbeat(last)] \hfill \\
+        This shows the number of seconds since the last agent heartbeat from this machine.
+
+      \end{description}
+      

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-nodes.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-nodes.tex?rev=1491620&r1=1491619&r2=1491620&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-nodes.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-nodes.tex Mon Jun 10 22:03:43 2013
@@ -1,5 +1,5 @@
 \section{Ducc Node Definitions}
-
+\label{sec:admin-ducc.nodes}
     The DUCC node definitions are specified by default in the file {\em ducc.nodes}.
 
     The DUCC node list is used to configure the nodes used to run jobs and assign reservations. A 

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-properties.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-properties.tex?rev=1491620&r1=1491619&r2=1491620&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-properties.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-properties.tex Mon Jun 10 22:03:43 2013
@@ -1025,6 +1025,7 @@
           
 
         \item[ducc.agent.launcher.share.size.fudge.factor] \hfill \\
+
           The DUCC agent monitors the size of the resident memory of its spawned processes. If a 
           process exceeds its declared memory size by any significant amount it is terminated and 
           a ShareSizeExceeded message is sent. The Job Driver counts this towards the maximum 
@@ -1040,6 +1041,8 @@
           \end{description}
           
           \item[ducc.agent.rogue.process.user.exclusion.filter] \hfill \\
+          \phantomsection\label{itm:props-rogue.user}
+
             The DUCC Agents scan nodes for processes that should not be running; for example, 
             a job may have left a 'rogue' process alive when it exits, or a user may log in to a node 
             unexpectedly. These processes are reported to the administrators via the webserver for 
@@ -1053,6 +1056,7 @@
             \end{description}
             
           \item[ducc.agent.rogue.process.exclusion.filter] \hfill \\
+          \phantomsection\label{itm:props-rogue.process}
             The DUCC Agents scan nodes for processes that should not be running; for example, 
             a job may have left a 'rogue' process alive when it exits, or a user may log in to a node 
             unexpectedly. These processes are reported to the administrators via the webserver for 

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/rm.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/rm.tex?rev=1491620&r1=1491619&r2=1491620&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/rm.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/rm.tex Mon Jun 10 22:03:43 2013
@@ -202,6 +202,7 @@
     are available. Use the nodepool configurations "order" directive to do this.
 
     \section{Job Classes}
+    \label{sec:rm.job-classes}
     The primary abstraction to control and configure the scheduler is the class. A class is simply a set 
     of rules used to parameterize how resources are assigned to jobs. Every job that enters the system is 
     associated with one job class.