You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ch...@apache.org on 2013/06/07 14:00:34 UTC

svn commit: r1490601 [2/2] - in /uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook: part2/ part2/cli/ part2/webserver/ part4/admin/

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/job-details.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/job-details.tex?rev=1490601&r1=1490600&r2=1490601&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/job-details.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/job-details.tex Fri Jun  7 12:00:33 2013
@@ -1,14 +1,27 @@
   
     \section{Job Details Page}
     \label{sec:ws-job-details}
-    This page shows details of all the processes that run in support of a job. 
-    The Jobs page contains the following columns: 
 
+    This page shows details of all the processes that run in support of a job. 
+    The information is divided among four tabs:
+    \begin{description}
+      \item[Processes] This tab conains details on all the processes for the job, both
+        active, and defunct.
+      \item[Work Items] This tab shows details for each individual work-item in the job.
+      \item[Performance] This tab shows a performance break-down of all the UIMA analytics
+        in the job.
+      \item[Specification] This tab shows the job specification for the job.
+      \end{description}
+      
+    \subsection{Processes}
+    \label{sec:ws-processes}
+    The processes page contains the following columns:
+    
     \begin{description}
 
         \item[Id] \hfill \\
-          This is the DUCC process id (not the Operating System's processid). Process 0 is
-          alwyas the Job Driver. It is hyperlinked to jd.out.log.
+          This is the DUCC-assigned numeric id of the process (not the Operating System's
+          processid). Process 0 is alwyas the Job Driver. 
 
         \item[Log] \hfill \\
           This is the log name for the process. It is hyperlinked to the log itself.
@@ -22,32 +35,26 @@
           This is the name of the node where the process ran.
 
         \item[PID] \hfill \\
-          This is the Operating Systems' PID for the process.
+          This is the Unix process ID (PID) of the process.
 
-        \item[State:Scheduler] \hfill \\
+        \item[State Scheduler] \hfill \\
           This shows the Resesource Manager state of the job. It is one of:
 
           \begin{description}
-              \item[Allocated] - The node is still allocated for this job by the RM 
+              \item[Allocated] - The node is still allocated for this job by the RM.
               \item[Deallocated] - The resource manager has deallocated the shares for the job on
                 this node.
           \end{description}
 
-        \item[Reason:Schedule] \hfill \\
-          This shows why a process is terminated, from the system's point
-          of view.
+        \item[Reason Scheduler or extraordinary status] \hfill \\
+          This shows why a process was terminated.  These all have ``hovers'' that provide more information
+          if it is available.
             \begin{description}          
-                \item[AutonomousStop] - The process terminated unexpectedly of its own accord ("crashed") for no 
-                  detectable reason.. 
-                  
-                \item[JobCanceled] - The job was canceled by the user or a system administrator. 
-                  
-                \item[JobCompleted] - The process is canceled because of DUCC restart. 
-                  
-                \item[JobFailure] - The job failure limit is exceeded, causing the job to be canceled by the JD. 
-                  
+                \item[AutonomousStop] - The process terminated unexpectedly of its own accord ("crashed", or
+                  simply exited.) 
+
                 \item[Exception] - The process is terminated by the JD exception handler. 
-                  
+
                 \item[Failed] - The process is terminated by the Agent because the JP wrapper was able to detect and 
                   communicate a fatal condition (Exception) in the pipeline.. 
                   
@@ -55,6 +62,12 @@
                   
                 \item[Forced] - The node is preempted by RM for other work because of fair share. 
                   
+                \item[JobCanceled] - The job was canceled by the user or a system administrator. 
+                  
+                \item[JobCompleted] - The process is canceled because of DUCC restart. 
+                  
+                \item[JobFailure] - The job failure limit is exceeded, causing the job to be canceled by the JD.                    
+                  
                 \item[InitializationTimeout] - The initialization phase exceeded the configured timeout. 
                   
                 \item[Killed] - The agent terminated the process for some reason. 
@@ -67,46 +80,101 @@
                   possible causes..                   
             \end{description}
 
-          \item[State:Agent] \hfill \\
-            If there's an error detected only by the agent, this shows the Agent's reason for
+          \item[State Agent] \hfill \\
+            If there's an error detected only by the DUCC Agent, this shows the Agent's reason for
             a process's death.
-
-          \item[Reason:Agent]I\hfill \\
-            f there's an error detected only by the agent, this shows the Agent's reason for
+            \begin{description}
+               \item[Starting] The DUCC process manager as issued a request to the assigned to
+                 start the process.
+               \item[Initializing] The process is initializing.  Usually this means the UIMA analytic
+                 pipeline (Job Process) is executing it's initialization method.
+              \item[Running] The Job Process has completed the initialization phase and is ready for, 
+                or actively executing work.
+              \item[Stopped] The DUCC Agent reports the process is stopped and (and has exited).
+              \item[Failed] The DUCC Agent reports the process failed with errors.  This usually
+                means that UIMA-AS has detected exceptions in the pipeline and reported them
+                to the Job Driver for logging.
+              \item[FailedInitialization] The process died during the UIMA initializaiton phase.
+              \item[InitializationTimeout] The process exceeded the site's limit for time spent
+                in UIMA initialization.
+              \item[Killed] The DUCC Agent killed the process for some reason.  There are
+                three resons for this:
+                \begin{enumerate}
+                  \item The Job Processes failed to initialize,
+                  \item The Job Process times out during initialization,
+                  \item The process exceedes its allowed swap.
+                \end{enumerate}
+              \item[Abandonded] 
+            \end{description}
+            
+          \item[Reason Agent] \hfill \\
+            If there's an error detected only by the agent, this shows the Agent's reason for
             a process's death.
+            \begin{description}
+              \item[AgentTimedOutWatingForORState] The DUCC Agent is expecting a state update
+                from the DUCC Orchestrator.  Timer on this wait has expired.  This usually 
+                indicates an infrastructure or communication problem.
+              \item[Croaked] The process exited for no good or clear reason, it simply vanished.
+              \item[Deallocated] 
+              \item[ExceededShareSize] The process exceeded it's declared memory size.
+              \item[ExceededSwapThreshold] The process exceeded the configured swap threshold.
+              \item[FailedInitialization] The process was terminated because the UIMA 
+                initialization step failed.
+              \item[InitializationTimeout] The process was terminated because the UIMA initialization
+                step took too long.
+              \item[JPHasNoActiveJob] This is set when an agent looses connectivity while its
+                JPs are running. The job finishes (stopped or killed). The agent regains
+                connectivity. The OR publish no longer includes the job but the agent still has
+                processes running for that job. The agent kills ghost processes with the reason:
+                JPHasNoActiveJob.
+              \item[LowSwapSpace] The process was terminated because the system is about to run
+                out of swap space.  This is a preemptive measure taken by DUCC to avoid exhaustion
+                of swap, to effect orderly eviction of the job before the operating system starts
+                its own reaping procedures.
+              \item[AdministratorInitiated] The process was canceled by an adminstrator.
+              \item[UserInitated] The process was canceled by the owning user.
+            \end{description}
             
-          \item[Time:Init] \hfill \\
+          \item[Time Init] \hfill \\
             This is the clock time this process spent in initializaiton.
             
-          \item[Time:Run] \hfill \\
+          \item[Time Run] \hfill \\
             This is the clock time this process spent in executing, not including
             initialization.
             
-          \item[Time:GC] \hfill \\
+          \item[Time GC] \hfill \\
             This is amount of time spent in Java Garbage Collection for the process.
             
-          \item[Count:GC] \hfill \\
+          \item[Count GC] \hfill \\
             This is the number of garbage collections performed by the process.
             
-          \item[\%GC]P\hfill \\
-            rocess percentage of time spent in garbage collections, relative to total of
+          \item[Pgin] \hfill \\
+            This is the number of page-in events on behalf of the process.
+
+          \item[Swap] \hfill \\
+            This is the amount of swap space on the machine being consumed by the process.
+
+          \item[\%GC] \hfill \\
+            Percentage of time spent in garbage collections by this process, relative to total of
             initialization + run times.
             
-          \item[CPU] \hfill \\
-            Cumulative CPU time for the process.
+          \item[\%CPU] \hfill \\
+            Currnt CPU percent consumed by the process.  This will be $>$ 100\% on 
+            multi-core systems if more than one core is being used.  Each core contributes
+            up to 100\% CPU, so, for example, on a 16-core machine, this can be as high
+            as 1600\%.
             
           \item[\%RSS] \hfill \\
-            Resident Storage Size, as a percentage of process memory requirement in job
-            specification.
+            The amount of real memory being consumed by the process (Resident Storage Size)
             
-          \item[Time:Avg] \hfill \\
-            Average seconds spent per work item in the process.
+          \item[Time Avg] \hfill \\
+            This is the average time in seconds spent per work item in the process.
             
-          \item[Time:min] \hfill \\
-            This is the minimum time spent per work item in the process.
+          \item[Time max] \hfill \\
+            This is the minimum time in seconds spent per work item in theprocess.
             
-          \item[Time:max] \hfill \\
-            This is the minimum time spent per work item in theprocess.
+          \item[Time min] \hfill \\
+            This is the minimum time in seconds spent per work item in the process.
             
           \item[Done] \hfill \\
             This is the number of work items processed in this process.
@@ -115,14 +183,65 @@
             This is the number of exceptions processing work items in this process.
             
           \item[Retry] \hfill \\
-            This is the number of work items that were retried for any reason, excluding
+            This is the number of work items that were retried in this process for any reason, excluding
             preemptions.
             
           \item[Preempt] \hfill \\
-            This is the number of work items that had to be retried because of preemption.
+            This is the number of work items that were preempted from this process, if
+            fair-share caused preemption.
             
           \item[JConsole URL] \hfill \\
             This is a URL that can be used to connect via JMX to the processes, e.g. via
             jconsole.
 
       \end{description}
+
+   \subsection{Work Items}
+   This tab provides details for each individual work item.  Columns include:
+   
+   \begin{description}
+     \item[SeqNo] This is the sequence work items are fetched from the Collection Reader's
+       getNext() method by the DUCC Job Driver.
+     \item[Id] This is the name of the work item.
+     \item[Status] The is the current state of the work item.  
+       States include:
+       \begin{description}
+         \item[ended] The work item is complete.
+         \item[error] The work item ended with errors.
+         \item[operating] The work item is current being executed.
+         \item[retry] The work item is being retried.
+         \item[start] The work item has been picked up for execution and DUCC is waiting
+           for confirmation that it is running.
+         \item[queued] The work item has been queued to ActiveMQ but not picked up by any
+           Job Process yet.
+       \end{description}
+       If a work item has not yet been retrieved from the Collect Reader it does not show
+       on this page.
+     \item[Queuing Time (sec)]  The time spent in ActiveMQ after being queued, and before
+       being picked up by a Job Process.
+     \item[Processing Time (sec)] The time spent processing the work item.
+     \item[Node (IP)] The node IP where the work item was processed.
+     \item[Node (Name] The node name where the work item was processed.
+     \item[PID] The Unix Process Id that the work item was processed in.
+   \end{description}
+   
+
+   \subsection{Performance}
+   This tab shows performance summaries of all the pipeline components.  The statistics
+   are aggregated over all instances of each component in each process of the job.
+   
+   \begin{description}
+     \item[Name] The short name of the analytic.  The full name is shown in the command-line
+       tool \hyperref[sec:cli.ducc-perf-stats]{ducc\_perf\_stats}
+     \item[Total] This is the total time in days, hours, minutes, and seconds taken by each
+       component of the pipeline.
+     \item[\% of Total] This is the percent of the total usage consumed by this analytic.
+     \item[Avg] This is the average time spent by all the instances of the analytic.
+     \item[Min] This is the minimum time spent by any instance of the analytic.
+     \item[Max] This is the maximum time spent by any instance of the analytic.
+   \end{description}
+   
+   \subsection{Specification}
+   This tab shows the full job specification in the form of a Java Properties
+   file.  This will include all the parameters specified by the user, plus those
+   filled in by DUCC.

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/jobs.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/jobs.tex?rev=1490601&r1=1490600&r2=1490601&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/jobs.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/jobs.tex Fri Jun  7 12:00:33 2013
@@ -8,15 +8,18 @@
         \begin{description}
 
             \item[Id] \hfill \\
-              This is the ID as assigned by DUCC. This field is hyperlinked to a "Job Details" page
-              that shows the breakdown of all the processes assigned to the job and their state.
+              This is the ID as assigned by DUCC. This field is hyperlinked to a
+              \hyperref[sec:ws-job-details]{Job Details} page for that job that shows the breakdown of
+              all the processes assigned to the job and their state.
               
             \item[Start] \hfill \\
               This is the time the Job is accepted into DUCC.
               
-            \item[End] \hfill \\
-              This is the time the Job completes.
-              
+            \item[Duration] \hfill \\
+              This shows two times.  In green the length of time the job has been running.  In black is
+              the estimated time of completion, based on current resources and remaining work.  When
+              the job completes, the time shown is the total elapsed time of the job.
+                            
             \item[User] \hfill \\
               This is the userid of the job owner.
               
@@ -26,7 +29,7 @@
             \item[State] \hfill \\
               This shows the state of the job. States include:
               \begin{description}
-                  \item[Received] - The jobhas ben vetted, persisted, and assigned a unique ID. 
+                  \item[Received] - The job has ben vetted, persisted, and assigned a unique ID. 
                   \item[WaitingForDriver] - The job is waiting for the Job Driver to initialize. 
                   \item[WaitingForServices] - The job is waiting to verify that any declared services are available. 
                   \item[WaitingForResources] - The job is waiting to be scheduled. 
@@ -49,6 +52,8 @@
                     the field with your mouse for details (if any are available), and check your JD log. 
                   \item[DriverProcessFailed] - The Job Driver (JD) process failed for some reason. Hover over the 
                     field with your mouse for details (if any), and check your JD log. 
+                  \item[MonitorActive] The job has a console monitor active.  This is enabled with the
+                    job's ``wait\_for\_completion'' parameter on job submission.
                   \item[ServicesUnavailable] - The job declared a dependency on one or more services, and the 
                     Service Manager (SM) cannot find or start the required service. 
                   \item[Premature] - The job was terminated for some unknown reason before all work items were 
@@ -62,18 +67,26 @@
                     it. For all jobs, it could be because the job class is invalid. 
               \end{description}
 
+            \item[Services] \hfill \\
+              This is the number of services the job has declared dependencies on.  There is a ``hover'' that
+              shows the ids of the services, if any.
 
             \item[Processes] \hfill \\
               This is the number of processes currently assigned to the job.
 
-            \item[Init Fails]T\hfill \\
-              his is the total number of initialization failures experienced by the job. This
+            \item[Init Fails] \hfill \\
+              This is the total number of initialization failures experienced by the job. This
               field is hyperlinked to pages showing the specific failures.
               
-            \item[Run Fails]T\hfill \\
-              his is the total number of process failures experienced by the job. This field is
+            \item[Run Fails] \hfill \\
+              This is the total number of process failures experienced by the job. This field is
               hyperlinked to a page showing the specific failures.
               
+            \item[Pgin] This is the number of page-in events, over all processes, on the machines
+              running the job.
+
+            \item[Swap] This is the total swap space, over all the processes, being used by the job.
+
             \item[Size] \hfill \\
               This is the declared memory size of the job
               
@@ -83,19 +96,19 @@
             \item[Done] \hfill \\
               This is the total number of work items successfuly completed for the job.
               
-            \item[Error]T\hfill \\
-              his is the total number of exceptions thrown or other errors experienced by work
+            \item[Error] \hfill \\
+              This is the total number of exceptions thrown or other errors experienced by work
               items. This field is hyperlinked to a page showing the specific failures.
               
-            \item[Dispatch]T\hfill \\
-              his is the total number CASs that are currently dispatched. This is usally
+            \item[Dispatch] \hfill \\
+              This is the total number CASs that are currently dispatched. This is usally
               min(Processes * Threads, incomplete\_work\_items - errors)
               
             \item[Retry] \hfill \\
               This is the number of CASs that were retried for any reason (such as timeout).
               
-            \item[Preempt]T\hfill \\
-              his is the total number of processes that have been preempted to make room for
+            \item[Preempt] \hfill \\
+              This is the total number of processes that have been preempted to make room for
               other work due to Fair Share.
               
             \item[Description] \hfill \\

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/services.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/services.tex?rev=1490601&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/services.tex (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/services.tex Fri Jun  7 12:00:33 2013
@@ -0,0 +1,54 @@
+
+    \section{Services Details Page}
+
+        This page shows details of all reservations. The Reservations page contains the following columns: 
+        \begin{description}
+
+            \item[Id] \hfill \\
+              This is the DUCC process id of the reservation as provided when the reservation is
+              made.
+
+            \item[Start] \hfill \\
+              This is the time the reservation was mde.
+              
+            \item[End] \hfill \\
+              This is the time the reservation was canceled.
+              
+            \item[User] \hfill \\
+              This is the userid if the person who made the reservation.
+              
+            \item[Class] \hfill \\
+              This is the resource class used to schedule the reservation.
+              
+            \item[Status] \hfill \\
+              This is the status of the reservation. Values include: Received - Reservation
+              has been vetted, persisted, and assigned unique Id.
+              \begin{description}
+                  \item[WaitingForResources] - The reservation is waitng for the Resource Manager to find and 
+                  \item[schedule] resources. 
+                  \item[Assigned] - The reservation is active. 
+                  \item[Completed] - The reservation has been canceled.                     
+              \end{description}
+
+            \item[Reason] \hfill \\
+              If a reservation is not active, the reason. Reasons include:
+                \begin{description}
+                    \item[ResourcesUnavailable] - The Resource Manager was unable to find free or freeable resources 
+                    \item[to] match the resource request. 
+                    \item[CanceledBySystem] - The job was canceled because DUCC was shutdown. 
+                    \item[CanceledByUser] - The owner or administrator released the reservation. 
+                \end{description}
+
+            \item[Allocation] \hfill \\
+              The number of resources (shares for FIXED policy reservartions, processes for
+              RESERVE policy reservations) that are allocated.
+
+            \item[Size] \hfill \\
+              The memory size in GB of the each allocated unit.
+              
+            \item[List] \hfill \\
+              The node names of the machines where the resource is allocated.
+              
+            \item[Description] \hfill \\
+              This is the descriptin string from the --description string from submit.
+        \end{description}

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/system.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/system.tex?rev=1490601&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/system.tex (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part2/webserver/system.tex Fri Jun  7 12:00:33 2013
@@ -0,0 +1,54 @@
+
+    \section{System  Details Page}
+
+        This page shows details of all reservations. The Reservations page contains the following columns: 
+        \begin{description}
+
+            \item[Id] \hfill \\
+              This is the DUCC process id of the reservation as provided when the reservation is
+              made.
+
+            \item[Start] \hfill \\
+              This is the time the reservation was mde.
+              
+            \item[End] \hfill \\
+              This is the time the reservation was canceled.
+              
+            \item[User] \hfill \\
+              This is the userid if the person who made the reservation.
+              
+            \item[Class] \hfill \\
+              This is the resource class used to schedule the reservation.
+              
+            \item[Status] \hfill \\
+              This is the status of the reservation. Values include: Received - Reservation
+              has been vetted, persisted, and assigned unique Id.
+              \begin{description}
+                  \item[WaitingForResources] - The reservation is waitng for the Resource Manager to find and 
+                  \item[schedule] resources. 
+                  \item[Assigned] - The reservation is active. 
+                  \item[Completed] - The reservation has been canceled.                     
+              \end{description}
+
+            \item[Reason] \hfill \\
+              If a reservation is not active, the reason. Reasons include:
+                \begin{description}
+                    \item[ResourcesUnavailable] - The Resource Manager was unable to find free or freeable resources 
+                    \item[to] match the resource request. 
+                    \item[CanceledBySystem] - The job was canceled because DUCC was shutdown. 
+                    \item[CanceledByUser] - The owner or administrator released the reservation. 
+                \end{description}
+
+            \item[Allocation] \hfill \\
+              The number of resources (shares for FIXED policy reservartions, processes for
+              RESERVE policy reservations) that are allocated.
+
+            \item[Size] \hfill \\
+              The memory size in GB of the each allocated unit.
+              
+            \item[List] \hfill \\
+              The node names of the machines where the resource is allocated.
+              
+            \item[Description] \hfill \\
+              This is the descriptin string from the --description string from submit.
+        \end{description}

Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-properties.tex
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-properties.tex?rev=1490601&r1=1490600&r2=1490601&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-properties.tex (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-duccdocs/src/site/tex/duccbook/part4/admin/ducc-properties.tex Fri Jun  7 12:00:33 2013
@@ -1,5 +1,5 @@
 \section{ducc.properties}
-
+\label{sec:ducc.properties}
     The primary configuration file is called ducc.properties and always resides in the directory
     ducc\_runtime/resources.