You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by cw...@apache.org on 2013/01/02 20:12:11 UTC

svn commit: r1427917 [5/8] - in /uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook: ./ images/ images/ducc-overview/ images/job-manager/ part-admin/ part-admin/admin/ part-introduction/ part-user/ part-user/cli/ unused/

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-glossary.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-glossary.xml?rev=1427917&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-glossary.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-glossary.xml Wed Jan  2 19:12:10 2013
@@ -0,0 +1,371 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<chapter id="ducc.glossary">
+
+  <title>DUCC Terminology, Acronuyms, and Glosssary</title>
+  <para>
+    <emphasis>The source for this chapter is ducc_ducbook/documents/introduction/terminology.xml</emphasis>
+  </para>
+
+  <section id="ducc.glossary.terms">
+    <title>Terms</title>
+    <para>
+      This section defines terms and phrases as used in the context of DUCC.
+    </para>
+
+    <variablelist>
+
+      <varlistentry>
+        <term id="ducc.glossary.automatic-service"> <emphasis role="bold">Automatic Service</emphasis></term>
+        <listitem>
+          <para>
+            An <emphasis>automatic service</emphasis> is a registered service that is started automatically
+            by DUCC when the DUCC system is booted.
+          </para>
+        </listitem>
+      </varlistentry>
+
+
+      <varlistentry>
+        <term id="ducc.glossary.dependent-entity"> <emphasis role="bold">Dependent service or job</emphasis></term>
+        <listitem>
+          <para>
+            A <emphasis>dependent service or job</emphasis> is a job or service that specifies
+            one or more <link linkend="ducc.glossary.service-endpoint">service endpoint</link>
+            in their job specification.  The service or job is dependent upon the referenced
+            service being operational before being started by DUCC.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term id="ducc.glossary.ducc"> <emphasis role="bold">DUCC</emphasis></term>
+        <listitem>
+          <para>DUCC stands for "Distributed UIMA  Cluster Computing."</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term id="ducc.glossary.implicit-service"> <emphasis role="bold">Implicit service</emphasis></term>
+        <listitem>
+          <para>
+            An <emphasis>emplicit service</emphasis> is a service that is started externally
+            to DUCC but referenced by some <link linkend="ducc.glossary.dependent-entity">dependent service or job.</link>
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term id="ducc.glossary.registered-service"> <emphasis role="bold">Registered service</emphasis></term>
+        <listitem>
+          <para>
+            A <emphasis>registered service</emphasis> is a service that is registered with DUCC. DUCC saves the
+            service specification and fully manages the service, insuring it is running when needed, and
+            shutdown when not.  DUCC manages the usage of the service and (in a future verseion of DUCC) automatically
+            increases and decreases the number of service instances as dictated by demand.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term id="ducc.glossary.on-demand-service"> <emphasis role="bold">On-Demand Service</emphasis></term>
+        <listitem>
+          <para>
+            An <emphasis>on-demand service</emphasis> is a registered service that is not started when
+            DUCC is started.  Instead, the service is started when referenced in some job or services
+            service dependency, and stopped when the referencing entity exits.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term id="ducc.glossary.service-instance"> <emphasis role="bold">Service Instance</emphasis></term>
+        <listitem>
+          <para>
+            A <emphasis>service instance</emphasis> is one physical process which runs a <emphasis>CUSTOM</emphasis> or
+            <emphasis>UIMA-AS</emphasis> service.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term id="ducc.glossary.or"> <emphasis role="bold">Orchestrator (OR)</emphasis></term>
+        <listitem>
+          <para>
+            The Orchestrator coordinates all work in the system.  All new work enters through the
+            orchestrator which guides it through the various DUCC components.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term id="ducc.glossary.pm"> <emphasis role="bold">Process Manager (PM)</emphasis></term>
+        <listitem>
+          <para>
+            The Process Manager coordinates distribution of work among the Agents.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term id="ducc.glossary.rm"> <emphasis role="bold">Resource Manager (RM)</emphasis></term>
+        <listitem>
+          <para>
+            The Resource Manager allocates and schedules  physical resources among the jobs.
+          </para>
+        </listitem>
+      </varlistentry>
+
+
+      <varlistentry>
+        <term id="ducc.glossary.service-class"> <emphasis role="bold">Service Class</emphasis></term>
+        <listitem>
+          <para>
+            The three <emphasis>service classes</emphasis> are 
+            <itemizedlist>
+              <listitem>
+                <para>
+                  <emphasis>implicit</emphasis>, referring to
+                  a service started independently from DUCC, 
+                </para>
+                </listitem>
+              <listitem>
+                <para>
+                  <emphasis>submitted</emphasis>, referring to a service submitted as a job
+                  to DUCC, and
+                </para>
+              </listitem>
+              <listitem><para><emphasis>registered</emphasis>, referring to a registered DUCC service.</para></listitem>
+            </itemizedlist>
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry  id="ducc.glossary.service-endpoint">
+        <term> <emphasis role="bold">Service Endpoint</emphasis></term>
+        <listitem>
+          <para>
+            In DUCC, the <emphasis>service endpoint</emphasis> provides a unique identifier for a
+            service and in the case of UIMA-AS services, a well-known address for contacting the
+            service.  For CUSTOM services, the endpoint is of the form
+            CUSTOM:<emphasis>string</emphasis> where <emphasis>string</emphasis> is any alphanumeric
+            string provided by the service owner.  For UIMA-AS services, the endpoint is of the form
+            UIMA-AS:<emphasis>queue name</emphasis>:<emphasis>ActiveMQ broker URL</emphasis>.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term id="ducc.glossary.sm"> <emphasis role="bold">Service Manager (SM)</emphasis></term>
+        <listitem>
+          <para>
+            The Service Manager manages the life-cycles of UIMA-AS and custom services.  It coordinates
+            registration of services, starting and stopping of services, and ensures that services
+            are available and remain available for the lifetime of the jobs.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term> <emphasis role="bold">Agent</emphasis></term>
+        <listitem>
+          <para>
+            DUCC Agent processes run on every node in the system.  The Agent receives orders to start and stop
+            processes on each node.  Agents also monitor nodes, sending heartbeat packets with node
+            statistics to interested components (such as the RM and web-server).  All Job Driver and
+            Job Process processes are managed as children of the agents.  
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term> <emphasis role="bold">Ducc-mon</emphasis></term>
+        <listitem>
+          <para>
+            Ducc-mon is the DUCC web-server. All DUCC state of import or interest is presented here including
+            job state, cluster state, DUCC daemon state, and visualization of the system.  Various controlling
+            actions such as canceling jobs, submitting reservations, and administrative functions are supported.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term> <emphasis role="bold">Job Driver (JD)</emphasis></term>
+        <listitem>
+          <para>
+            The Job Driver is a thin Java wrapper that encapsulates a Job's Collection Reader.  The JD executes
+            as a process that is scheduled and deployed by DUCC.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term> <emphasis role="bold">Job Process (JP)</emphasis></term>
+        <listitem>
+          <para>
+            The Job Process is a thin java wrapper that encapsulates a job's Analysis Engine.  The JP executes
+            in a process that is scheduled and deployed by DUCC.  
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term> <emphasis role="bold">Job specification</emphasis></term>
+        <listitem>
+          <para>
+            The Job Specification is a collection of properties that describe a job. It identifies the
+            UIMA components (CR, AE, etc) that comprise the job, and it specifies system-wide
+            properties of the job (classpaths, RAM requirements, etc).  The properties may be provided as
+            (key, value) pairs to the CLI/API, or in a Java propeties file.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term> <emphasis role="bold">Job</emphasis></term>
+        <listitem>
+          <para>
+            A DUCC job consists of the components required to deploy and execute a UIMA pipeline over
+            a computing cluster.  It consist of a JD to run the Collection Reader, a set of JPs to
+            run the UIMA AEs, and a Job Specification to describe how the parts fit together.
+          </para>
+        </listitem>
+      </varlistentry>
+
+
+      <varlistentry>
+        <term> <emphasis role="bold">Share Quantum</emphasis></term>
+        <listitem>
+          <para>
+            In DUCC, a "share quantum" refers to some quantity of memory; for example, 15GB.  The RM
+            schedules resources according to share quanta.  The 
+            share quantum is the smallest unit of memory that can be assigned.  See the section
+            describing the Resource Manager for details.
+          </para>
+          <para>             
+            The terms "share" and "share quantum" are synonymous in DUCC.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term> <emphasis role="bold">Process</emphasis></term>
+        <listitem>
+          <para>
+            A process is one physical process executing on a machine in the DUCC cluster.  DUCC
+            jobs are comprised of one or more processes (JDs and JPs).
+          </para>
+          <para>
+            From the Resource Management view, a process is comprised of one or more share quanta.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term id="ducc.glossary.fair-share"> <emphasis role="bold">Weighted Fair Share</emphasis></term>
+        <listitem>
+          <para>
+            The Weighted Fair Share calculation is used to apportion resources in a "fair" manner
+            to the outstanding work in the system.  To account for some work being more
+            "important" than others, a weighting factor may be applied to bias the 
+            fair-share calculations in favor of such work.
+          </para>
+          <para>
+            See the Resource Manager section for more details on  Weighted Fair Share 
+            in DUCC.
+          </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term id="ducc.glossary.work-items"> <emphasis role="bold">Work Items</emphasis></term>
+        <listitem>
+          <para>
+            A <emphasis>work item</emphasis> is one unit of work to be completed in a single
+            DUCC process.  It is usually initiated by the submission of a single CAS from the
+            CR to a UIMA service.  It could be thought of as a single "question" to be answered
+            by a UIMA analytic.  Usually each DUCC JP executes many work items per job.
+          </para>
+        </listitem>
+      </varlistentry>
+
+	</variablelist>
+  </section>
+
+  <section>
+    <title>Acronyms</title>
+    <para>This section defines acronims as used in the context of DUCC.</para>
+
+    <para>
+      AE: UIMA Analysis Engine
+    </para>
+    <para>
+
+      CAS: UIMA Common Analysis Structure
+    </para>
+
+    <para>
+      CC: CAS Consumer
+    </para>
+
+    <para>
+      CM: UIMA CAS Multiplier
+    </para>
+
+    <para>
+      CR: UIMA Collection Reader
+    </para>
+
+    <para>
+      DUCC: Distributed UIMA Cluster Computing
+    </para>
+
+    <para>
+      JD: Job Driver
+    </para>
+
+    <para>
+      JP: Job Process
+    </para>
+
+    <para>
+      OR: Orchestrator
+    </para>
+
+    <para>
+      PM: Process Manager
+    </para>
+
+    <para>
+      RM: Resource Manager
+    </para>
+
+    <para>
+      SM: Service Manager
+    </para>
+
+    <para>
+      UIMA: Unstructured Information Management Architecture (see http://uima.apache.org/)
+    </para>
+
+    <para>
+      UIMA-AS: UIMA Asynchronous Scaleout (see http://uima.apache.org/doc-uimaas-what.html)
+    </para>
+  </section>
+</chapter>

Propchange: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-glossary.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-introduction.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-introduction.xml?rev=1427917&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-introduction.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-introduction.xml Wed Jan  2 19:12:10 2013
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<chapter id="ducc.introduction">
+
+  <title>Introduction To Distributed UIMA Cluster Computing </title>
+  <para>
+    <emphasis>The source for this chapter is ducc_ducbook/documents/chapter-introduction.xml</emphasis>
+  </para>
+
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"  href="introduction/overview.xml" />
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"  href="introduction/terminology.xml" />
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"  href="introduction/quick-start.xml" />
+
+</chapter>

Propchange: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-introduction.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-overview.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-overview.xml?rev=1427917&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-overview.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-overview.xml Wed Jan  2 19:12:10 2013
@@ -0,0 +1,705 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+       "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd" [
+<!ENTITY imgroot "images/" >
+<!ENTITY imgrootpdf "../images/" >
+]>
+
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<chapter id="ducc.overview">
+    <title>DUCC Overview</title>
+
+    <para>
+      <emphasis>The source for this chapter is ducc_ducbook/documents/part-introduction/chapter-overview.xml.</emphasis>
+    </para>
+
+    <section><title>What is DUCC?</title>
+      <para>
+        DUCC stands for Distributed Uima Cluster Computing. DUCC is a cluster management
+        system providing tooling, management, and scheduling facilities to automate the scale-out of
+        applications written to the UIMA framework.
+      </para>
+      
+      <para>
+        Core UIMA provides a generalized framework for applications that process unstructured
+        information such as human language, but does not provide a scale-out mechanism.  UIMA-AS
+        provides a scale-out mechanism to distribute UIMA pipelines over a cluster of computing
+        resources, but does not provide job or cluster management of the resources.  DUCC defines a
+        formal job model that closely maps to a standard UIMA pipeline. Around this job model DUCC
+        provides cluster management services to automate the scale-out of UIMA pipelines over
+        computing clusters.  
+      </para>
+
+    </section>
+
+    <section><title>DUCC Job Model</title>
+
+      <para>
+        The DUCC job model is defined in terms of the UIMA and UIMA-AS framework.  A UIMA pipeline
+        contains a Collection Reader, one or more Analysis Engines connected in a
+        pipeline, and a CAS Consumer as shown in <xref linkend="ducc.overview.uima-pipeline" />.
+      </para>
+
+      <figure id="ducc.overview.uima-pipeline">
+        <title>Standard UIMA Pipeline</title>
+        <mediaobject>
+          <imageobject role="html">
+            <imagedata scale="50" format="JPG"
+                       fileref="&imgroot;uima-pipeline.jpg"></imagedata>
+          </imageobject>
+          <imageobject role="fo">
+            <imagedata scale="50" format="JPG"
+                       fileref="&imgrootpdf;uima-pipeline.jpg"></imagedata>
+          </imageobject>
+        </mediaobject>
+      </figure>
+      
+
+      <para>
+        With UIMA-AS the CR is separated into a discrete process and a CAS Multiplier is introduced
+        into the analytic pipeline as an interface between the CR and the pipeline, as shown in
+        <xref linkend="ducc.overview.uima-as-pipeline" />.  Multiple analytic pipelines are serviced
+        by the CR and are scaled-out over a computing cluster.
+      </para>
+
+
+      <figure id="ducc.overview.uima-as-pipeline">
+        <title>UIMA Pipeline As Scaled by UIMA-AS</title>
+        <mediaobject>
+          <imageobject role="html">
+            <imagedata scale="50" format="JPG"
+                       fileref="&imgroot;uima-as-pipeline.jpg"></imagedata>
+          </imageobject>
+          <imageobject role="fo">
+            <imagedata scale="50" format="JPG"
+                       fileref="&imgrootpdf;uima-as-pipeline.jpg"></imagedata>
+          </imageobject>
+        </mediaobject>
+      </figure>
+
+      <para>
+        Under DUCC, the Collection Reader is executed in a process called the Job Driver (or JD).
+        The analytic pipelines are executed in one or more processes called Job
+        Processes (or JPs).  The JD process provides a thin wrapper over the CR to enable
+        communication with DUCC and to direct CASs to the JPs. Similarly the JP provides a thin
+        wrapper over the analytics as shown in <xref linkend="ducc.overview.ducc-sequential" />. 
+      </para>
+
+      <figure id="ducc.overview.ducc-sequential">
+        <title>UIMA Pipeline As Automatically Scaled Out By DUCC</title>
+        <mediaobject>
+          <imageobject role="html">
+            <imagedata scale="50" format="JPG"
+                       fileref="&imgroot;ducc-sequential.jpg"></imagedata>
+          </imageobject>
+          <imageobject role="fo">
+            <imagedata scale="50" format="JPG"
+                       fileref="&imgrootpdf;ducc-sequential.jpg"></imagedata>
+          </imageobject>
+        </mediaobject>
+      </figure>            
+
+      <para>
+        On job submission, the DUCC CLI inspects the XML defining the analytic and generates a
+        UIMA-AS Deployment Descriptor (DD) from it.  DUCC generates job-unique queue endpoints, setups
+        up the queues, and sets up multiple pipeline threads so that the entire
+        transformation from the user's core-UIMA job to full UIMA-AS scalout is transparent and automatic.
+        ( Users may supply their own CM but it is not necessary as DUCC provides a default CM.)
+        A simple collection of parameters, known as the Job Specification (essentially a Java
+        properties file) defines the CR, CM, AE, and CC, threading level, logging parameters, etc.
+        Taken together the Job Descriptor, Job Driver, and set of Job Processes comprise a DUCC job.
+      </para>
+
+      <para>
+        Users may want to provide their own DDs to more fully control the pipeline in the JPs.  This model
+        is also support by DUCC; see <xref linkend="ducc.overview.ducc-parallel" />.
+      </para>
+      <figure id="ducc.overview.ducc-parallel">
+        <title>UIMA Pipeline With User-Supplied DD as Automatically Scaled Out By DUCC</title>
+        <mediaobject>
+          <imageobject role="html">
+            <imagedata scale="50" format="JPG"
+                       fileref="&imgroot;ducc-parallel.jpg"></imagedata>
+          </imageobject>
+          <imageobject role="fo">
+            <imagedata scale="50" format="JPG"
+                       fileref="&imgrootpdf;ducc-parallel.jpg"></imagedata>
+          </imageobject>
+        </mediaobject>
+      </figure>            
+      
+  
+      <para>
+        The DUCC Job Descriptor includes properties to enable automated management and scale-out
+        over large computing clusters.  Such management includes  multiple-user support
+        (jobs run under the identity of the submitting user), a fair-share scheduler capable of
+        balancing resources among all users, automated performance monitoring via the UIMA-AS
+        monitoring facilities, display of job status and performance statistics via a built-in web server, and
+        error-handling of the UIMA pipelines, also using the UIMA-AS facilities.
+      </para>
+
+      <para>
+        DUCC provides a Command Line Interface (CLI) to submit UIMA pipelines for execution as jobs.
+        (An Application Programming Interface (API) is in progress but not available with the
+        current releaase.) The CLI inspects the pipeline XML descriptors (as named in the Job
+        Specification) and automatically generates UIMA-AS Deployment Descriptors.  The descriptors
+        are passed to the DUCC orchestration tools which establish the Collection Reader inside a
+        Job Driver (JD) process as a UIMA-AS service client.  The Job Specification is given to the
+        Resource Manager which returns the identities of the nodes where the JPs (Job Processes) are
+        to be run.  Finally the JPs are started with the pipeline's AEs as UIMA-AS services and the
+        JD starts the CRs which begin delivering CASs.  Endpoint management, creation of the DD,
+        spawning and management of the CR and AEs are all automated by DUCC.
+      </para>
+
+    </section>
+
+    <section><title>Default Collection Readers and CAS Consumers</title>
+      <para>Descibe what we provide - the zip CR and zip CM or equivalent, with some motherhood about
+           why using these is good, but pointing out that users are free to make their own.
+      </para>
+      <para> What will these be - the moral equivalent of the zip reader and the one used for NLP?</para>
+
+    </section>
+
+    <section><title>Error Management</title>
+
+      <para>
+        A classic problem of large distributed systems is error management.  Small errors can
+        scale-out so that a single typo or oversight can flood the system with redundant error
+        notifications and waste significant resources with useless computation.  It can also be very
+        difficult to isolate errors which can occur anywhere in the network.  To manage this process
+        DUCC provides a number of features.
+      </para>
+      
+      <para>
+        DUCC uses the UIMA-AS error-handling facilities to reflect errors from the JPs to the JDs.
+        The JD wrappers implement logic to enforce error thresholds, to log the errors coherently,
+        and to inform the web server.  All error thresholds are configurable.  Additionally, the
+        user may implement custom logic to determine whether errors should be considered fatal or
+        transient, and the number of failures to tolerate.  Each job may provide its own fully
+        customized error handling policy.
+      </para>
+
+      <para>
+        A large UIMA application can take significant time initializing, reading from databases,
+        and so on.  The initialization process itself can be fragile and error-prone. It would be
+        wasteful and useless to allow such an application to be scheduled on a large number of 
+        cluster nodes only to fail.  To manage this, DUCC enforces two policies:
+        <itemizedlist>
+          <listitem>
+            <para>
+              JPs are allowed a maximum number of failures in the initialization stage before
+              DUCC terminates the job. 
+            </para>
+          </listitem>
+          <listitem>
+            <para>A minimum number of processes is allocated to a job when it starts.  The job
+              is not allocated additional processes until at least one JP completes the initialization
+              phase, at which point the job becomes eligible for more processes.
+            </para>
+          </listitem>
+        </itemizedlist>
+      </para>
+
+      <para>
+        Once a JP is initialized the error handling is slightly different. Errors at this stage may
+        be transient: network failures, service failures, etc.  Or they may be systemic in the
+        application (bugs).  DUCC allows a maximum number of JP failures after initialization and if
+        the threshold is exceeded, the job is terminated.  If a process has a failure, the failure
+        is reflected back to the JP and the process is terminated.  If the threshold has not yet
+        been exceeded, the Resource Manager will allocate space and a new process will be started.
+      </para>
+
+      <para>
+        In all failure scenarios, DUCC attempts to capture the associated stack traces and error
+        messages and presents them as links in the job pages from the DUCC web server.
+      </para>
+
+    </section>
+
+    <section><title>Cluster and Job Management</title>
+      <para>
+        Distributing work over multiple physical processors on a network can be difficult to manage, even
+        for relatively small numbers of processors.  DUCC provides extensive tooling manage the cluster 
+        and the jobs running on it.
+      </para>
+
+
+      <formalpara><title>Multiple User Support</title>
+        <para>
+          DUCC runs all work under the identity of the submitting user.  This provides a level of
+          security and privacy for each user and their job.  Logs are written with the user's
+          credentials into the user's file space designated at job submission, enabling users to
+          manage them as needed.
+        </para>
+      </formalpara>
+
+      <formalpara><title>Fair-Share Scheduling</title>
+        <para>
+          DUCC is intended to support UIMA processing of natural language.  This work is inherently
+          much more memory intensive than it is CPU intensive.  In order to insure that the pipelines
+          execute efficiently, nodes need to be allocated according to the amount of real RAM they
+          support.  With few exceptions, these jobs encounter bottlenecks on real memory well before
+          CPU bottlenecks.
+        </para>
+      </formalpara>
+
+      <para>
+        To manage this, DUCC contains a scheduler designed to allocate nodes in the cluster
+        according to declared memory usage.  All RAM is treated as a single, distributed pool of
+        memory.  "Fair" share means that the memory is allocated such that each user is allocated
+        the same amount regardless of the number of jobs the user has submitted.  Each user's
+        fair-share is then divided equally among all their jobs. Machines are
+        then allocated to jobs so the total memory in the machines assigned to a user is the same as
+        their fair-share.  Often some users don't need (and can't use) their fair share, in which
+        case the DUCC scheduler allocates the lefovers to users that are able to use it.
+      </para>
+
+      <para>
+        The DUCC scheduler provides the ability to "weight" some users so they are allocated more
+        than their simple fair-share of memory.  There is also a priority scheme that insures some
+        types of work are always scheduled irregardless of fair-share considerations.  There is also
+        a mechanism for partitioning the nodes according to arbitrary constraints ("closeness" to
+        constrained resources, priority usage such as "production" vs. "development" use, etc.), and
+        assigning jobs to specific partitions or "nodepools".
+      </para>
+      
+      <para>
+        DUCC assumes that most jobs written to the UIMA framework are fully parallel and that
+        individual processes can be evicted as needed; as well it assumes that process can be added
+        to a job if resources are available.  The DUCC scheduler uses these properties to
+        dynamically expand or reduce the number of processes assigned to jobs, according to
+        fair-share policies and the amount of work in the system.  For example, if a new user
+        submits a job, this will generally reduce everybody's fair-share, and result in some
+        processes being evicted to make room for the new user.  Similarly, if all of a user's jobs
+        exit, then the remaining jobs will be allocated the resources that are now freed.  Note that
+        if a user who already has jobs running submits a new job, then <emphasis>only</emphasis> that
+        user's jobs are affected because his/her fair-share is the same, and has to accomodate new work.
+      </para>
+
+      <para>
+        Some jobs may not be parallel, or for some other reason, cannot tolerate being evicted and
+        restarted. The DUCC scheduler implements a policy to allow jobs with "fixed" (or "pinned") 
+        node allocations whic prevents those jobs from being preempted; conversly the "fixed" policy
+        prevents those jobs from growing.  Thus, once scheduled, this type of job is "fixed" in place
+        and will never move to different nodes.
+      </para>
+
+      <para>
+        The scheduler also supports the concept of <emphasis>Reservations</emphasis>.  A reservation
+        has no job associated with it; users are allowed to use the reserved resources as they 
+        wish (within reason).  Reservations for full (dedicated) nodes or for partial nodes
+        (based on RAM) are supported.
+      </para> 
+
+      <formalpara><title>Job Lifetime Management and Orchestration</title>
+        <para>
+          DUCC includes an <emphasis>orchestrator</emphasis> that manages the lifetimes of all jobs,
+          services, and reservations.  Jobs are submitted to the orchestrator, which is responsible
+          for insuring pre-requisite services are available and that resources are scheduled for the
+          job.  It starts the job's JD and JP processes and signals the JD to start servicing work
+          to the JPs.  It is also responsible to keep the scheduler and web server apprised of the
+          status of all jobs.
+        </para>
+      </formalpara>
+
+      <formalpara><title>DUCC Agents</title>
+        <para>
+          A process called the DUCC <emphasis>Agent</emphasis> is run on each node managed by DUCC.  This process has several roles:
+          <itemizedlist>
+            <listitem>
+              <para>Manage JP and JD processes.  The agent starts, stop, and manages the life cycle of these processes.
+                It also monitors performance statistics on behalf of these processes, reporting to the web server.
+              </para>
+            </listitem>
+            <listitem>
+              <para>Monitor node performance and "aliveness".  The agent monitors CPU, memory, etc, and provides the information
+                in regular <emphasis>heartbeats</emphasis> that are watched by the Resource Manager and Web server for scheduling
+                and reporting purposes.
+              </para>
+            </listitem>
+            <listitem>
+              <para>Watch for <emphasis>rogue processes</emphasis>.  The agents watch for processes not associated with DUCC jobs or other
+                DUCC-initiated work and reports to the web server.  Administrators are then able to easily identify
+                and reap processes that may be interfering with DUCC jobs.
+              </para>
+            </listitem>
+          </itemizedlist>                  
+      </para>
+      </formalpara>
+
+      <formalpara><title>DUCC Web server</title>
+        <para>
+          DUCC provides a web server displaying all aspects of the system:
+        <itemizedlist>
+          <listitem>
+            <para>All jobs in the system with relevant information: user, times, work finished work
+              completed, processes allocated, and many other.  For each job, additional pages provide
+              details including node, PID, stat status of all work items, the submitted job specification, etc.  If
+              errors occur, links from the job entry to the errors in the logs are provided.
+            </para>
+          </listitem>
+          <listitem>
+            <para>All reserved nodes with relevant information: user, times, nodes, processes running in the reservation, etc.
+            </para>
+          </listitem>
+          <listitem>
+            <para>All nodes in the system and their status, usage, etc.
+            </para>
+          </listitem>
+          <listitem>
+            <para>All services and rel-event information: user, nodes, usage, who is using the services, queue size, etc.
+            </para>
+          </listitem>
+          <listitem>
+            <para>The status of all DUCC management processes.
+            </para>
+          </listitem>
+        </itemizedlist>
+        </para>
+      </formalpara>
+
+      <formalpara><title>Management Scripting</title>
+        <para>
+          DUCC provides rich scripting support to:
+          <itemizedlist>
+            <listitem>
+              <para>Start and stop full DUCC systems.</para>
+            </listitem>
+            <listitem>
+              <para>Start and stop and individual DUCC components.</para>
+            </listitem>
+            <listitem>
+              <para>Add and delete nodes from the DUCC system.</para>
+            </listitem>
+            <listitem>
+              <para>Discover DUCC processes (e.g. after partial failures).</para>
+            </listitem>
+            <listitem>
+              <para>Find and kill errant job processes belonging to individual users.</para>
+            </listitem>
+          </itemizedlist>
+        </para>
+      </formalpara>
+
+    </section>
+
+    <section><title>Service Management</title>
+      <formalpara>
+        <title>Overview</title>
+        <para>
+          <emphasis>Services</emphasis>, in the context of DUCC, are long-running processes that await
+          requests from UIMA pipeline components and return something in response.  Services can be
+          any arbitrary process using any arbitrary communication protocol but in the current
+          version of DUCC only UIMA-AS services are fully supported.  
+        </para>
+      </formalpara>
+      
+      <para>
+        The DUCC service manager implements several high-level functions:
+        <itemizedlist>
+          <listitem>
+            <para>
+              Insure services are available for jobs before allowing the jobs to start.  This fail-fast
+              prevents unncessary allocation of resources (with potential eviction of healthy processes)
+              for jobs that can't run, as well as quick feedback to users that something is amis.
+            </para>
+          </listitem>
+          <listitem>
+            <para>
+              Automate the startup, care, and management of services.
+            </para>
+          </listitem>
+          <listitem>
+            <para>
+              Report on the state of services: processes, queue depths, comsumers, and so on.
+            </para>
+          </listitem>
+        </itemizedlist>        
+      </para>
+
+      <formalpara>
+        <title>Service Types</title>
+        <para>
+          DUCC supports two types of services: UIMA-AS and CUSTOM:
+          <itemizedlist>
+            <listitem>
+              <para>
+                UIMA-AS.  This is a "normal" UIMA-AS service.  DUCC fully supports all aspects of
+                UIMA-AS services.
+              </para>
+            </listitem>
+            <listitem>
+              <para>
+                CUSTOM. This is any arbitrary service.  DUCC supports
+                monitoring of CUSTOM services and performs job dependency checks, but (in the
+                current version) does not support start and stop of CUSTOM services.
+              </para>
+            </listitem>
+          </itemizedlist>
+        </para>
+      </formalpara>
+
+      <formalpara>
+        <title>Service Endpoints</title>
+        <para>
+          Services are referenced by a specifier called a <emphasis>service endpoint.</emphasis>.  The
+          service endpoint is a formatted string indicating:
+          <itemizedlist>
+            <listitem>
+              <para>
+                The service type: UIMA-AS or CUSTOM.
+              </para>
+            </listitem>
+            <listitem>
+              <para>
+                The service name.  For UIMA-AS services, this is the name of the queue in the
+                ActiveMq Broker used for communication with the service.  For CUSTOM services this
+                is any arbitrary string as dictated by the service.  Service names must be
+                unique within the system.
+              </para>
+            </listitem>
+            <listitem>
+              <para>
+                For UIMA-AS services only, the URL of the ActiveMq broker.
+              </para>
+            </listitem>
+          </itemizedlist>
+        </para>
+      </formalpara>
+
+      <formalpara>
+        <title>Dependent and Pre-Requisite Services and Jobs</title>
+        <para>
+          A <emphasis>dependent service</emphasis> is a service which is dependent on at least
+          one service to perform it's function.  A <emphasis>dependent job</emphasis> is a job
+          which is dependent on at least one service to perform it's function.
+        </para>         
+      </formalpara>
+      <para>
+        A <emphasis>pre-requisite service</emphasis> is a service which is required by another
+        job or service.  (Note that there are no pre-requisite jobs.)
+      </para>
+
+      <formalpara>
+        <title>Service Classes</title>
+        <para>
+          Services may be started externally to DUCC, explicitly through DUCC as a job, or
+          as registered services.  These form three natural classes of services with slightly
+          different management characteristics.
+        </para>
+      </formalpara>
+      <formalpara>
+        <title>Implicit Services</title>
+        <para>
+          An <emphasis>implicit service</emphasis> is started externally to DUCC and discovered by
+          DUCC only when it is referenced by a job's <emphasis>service-dependency</emphasis>
+          parameter.  On submission of a job with a dependency on an implicit service, the SM sets up a
+          "ping" thread that check if the service exists at the endpoint.  If so, the SM adds the
+          service to its list of known services and marks the job "ready to schedule".  If the
+          service is a UIMA-AS service the SM establishes a monitor thread on the queue for
+          reporting purposes.  The service is monitored throughout the lifetime of the job.  If the
+          service should stop responding, its state is updated as "not-responding" but the job is
+          allowed to continue as DUCC cannot tell if the job is still using it or not, or if the
+          outage is temporary.  If the job is a CUSTOM service, the service owner may specifiy
+          custom code to run in the ping thread; for CUSTOM services, this same code is used
+          to run both ping and monitor functions.
+        </para>
+      </formalpara>
+      <para>
+        When the job exits, a timer is set and DUCC continues to monitor the service against the
+        possibility that subsequent jobs will need it.  Once the last job using the service has exited
+        and the service timer expired, the SM stops the monitors and purges the service from its records.
+      </para>
+
+      <formalpara>
+        <title>Submitted Services</title>
+        <para>
+          A <emphasis>submitted service</emphasis> is a service that is submitted to DUCC as 
+          a job.  A submitted service is essentially a normal DD-style job (a job in which the
+          user supplies the full UIMA-AS DD), but without a Collection Reader.  Because DUCC is
+          managing this service it can provide more support than for <emphasis>implicit services.</emphasis>
+        </para>
+      </formalpara>
+
+      <para>
+        Submitted services can be dependent upon other services.  When such a service
+        enters the system, DUCC verifies it's pre-requisite services.  When (or if) all
+        pre-requisite services are availble DUCC marks the new service "ready to schedule".  The
+        lifecycle of the service is monitored so that dependent services and jobs are marked "ready
+        to schedule" only after the submitted service has completed its initialization phase.  A
+        ping thread and queue monitor are also started against the newly submitted service.  If the
+        submitted service is unable to successfully initialize, services and jobs that are dependent
+        on it are marked "not runnable" and the DUCC Orchestrator cancels them.
+      </para>
+      
+      <para>
+        DUCC manages the lifecycle of submitted services, but because they are submitted
+        by entities other than DUCC, the SM performs no additional management for them.  When a
+        submitted service is canceled by its owner, DUCC stops the ping and queue monitors.  Any
+        jobs or services dependent on it are allowed to continue until they complete or fail due
+        to unavailability of the service.
+      </para>
+
+      <formalpara>
+        <title>Registered Services</title>
+        <para>
+          <emphasis>Registered services</emphasis> are fully managed by DUCC.  A service
+          is registered with DUCC using the CLI to provide the full job
+          specification of the service, the initial number of instances of the service, and whether
+          the service should be automatically started when DUCC itself is started.  Registered
+          services started when DUCC is started are called
+          <emphasis>automatic</emphasis> services.  Registered services that are started only when
+          referenced by other dependent jobs or services are called <emphasis>on-demand</emphasis>
+          services. The service is registered with the submitter's credentials
+          and is run with that user's credentials when it is started.
+        </para>
+      </formalpara>
+
+      <formalpara>
+        <title>Automatic Services</title>
+        <para>
+          An <emphasis>automatic</emphasis> service is a registered service that is flagged to be
+          automatically started when the DUCC system is started.  When DUCC is started, the SM
+          checks the service registry for all service that are marked for automatic startup.  The SM
+          submits the registered service specification on behalf of its owner.  Each such submission
+          is for a single service instance.  If found, the SM repeatedly submits the specification
+          until the registered number of instances is reached.  
+        </para>
+      </formalpara>
+      <para>
+        Ping and monitor threads are
+        started.  Jobs and other services may use these services in the same manner as submitted
+        services.  If an automatic service instance should die or be canceled out of
+        the scope of the SM, the SM will restart the instance, maintaining the registered number
+        of instances at all time. <emphasis>Automatic</emphasis> services are not terminated when
+        their dependent jobs/services exit; they're termanted only when DUCC itself is terminated,
+        or by use of the service <emphasis>stop</emphasis> command.
+      </para>
+
+      <formalpara>
+        <title>On-Demand Services</title>
+        <para>
+          An <emphasis>on-demand</emphasis> service is a registered service that is started
+          only when referenced by the <emphasis>service-dependency</emphasis> of another job or service.
+          f the service is already started,
+          the dependent job/service is marked ready to schedule as indicated above.  If not,
+          the service registry is checked and if a start-on-demand service with an endpoint
+          matching the <emphasis>service-dependency</emphasis> is found, DUCC
+          submits the service on behalf of the service owner (in the same manner as for automatic servic
+          establishing the registered number of service instances, a ping thread, and a monitor). When
+          the service has completed  initialization the dependent job/service is marked ready to schedule.
+          If the on-demand service cannot be found in the registery, the referring entity is marked
+          not-startable and the DUCC Orchestrator cancels it.
+        </para>
+      </formalpara>
+      <para>
+        Subsequent jobs and services that reference the on-demand service will use the started
+        instances.  When the last job/service that references the on-demand service exits, a
+        (configurable) timer is established to keep the service alive for a while (in anticipation
+        that it will be needed again soon.)  When the keep-alive timer exipires, and there are no
+        more dependent jobs/services, the on-demand service is automatically stopped to free
+        up its resources for other work.
+      </para>
+
+
+      <formalpara>
+        <title>Registered Service Management</title>
+        <para>
+          The CLI for registered services provides several functions:
+
+          <variablelist>
+            <varlistentry>
+              <term><emphasis role="bold">Register</emphasis></term>
+              <listitem>
+                <para>
+                  Register files a service specification with the SM.  The service may optionally
+                  be started as part of registration. The service definition and state is persisted over
+                  system restarts and is deleted only with the Unregister function.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><emphasis role="bold">Unregister</emphasis></term>
+              <listitem>
+                <para>
+                  Unregister removes the service specification.  The service is stopped if it is
+                  started and not busy.  (Note that if the service is busy, jobs and services that
+                  are dependent on it may subsequently fail.)
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><emphasis role="bold">Modify</emphasis></term>
+              <listitem>
+                <para>
+                  Modify allows dynamic update of some parameters of registered services:
+                  <itemizedlist>
+                    <listitem><para><emphasis>Automatic</emphasis> and <emphasis>On-Demand</emphasis> state.</para></listitem>
+                    <listitem><para>The minimum number of service instances to start when the service is started.</para></listitem>
+                  </itemizedlist>
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><emphasis role="bold">Start</emphasis></term>
+              <listitem>
+                <para>
+                  Start submits the service specification to the DUCC Orchestrator (repeatedly, until the
+                  correct number of instances are started). If the service is
+                  explicitly started with the <emphasis role="bold">start</emphasis> CLI, the service
+                  continues to run even after the last reference is gone, regardless of whether it is
+                  automatic or on-demand.  Start is also used to increase the number of
+                  running instances of a service.  The registry may be optionally updated to
+                  reflect the new number of started instances.
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><emphasis role="bold">Stop</emphasis></term>
+              <listitem>
+                <para>
+                  Stop stops the instances for a registered service.  The registry may
+                  be optionally updated to reflect the new number of instances that are still running.                
+                </para>
+              </listitem>
+            </varlistentry>
+
+            <varlistentry>
+              <term><emphasis role="bold">Query</emphasis></term>
+              <listitem>
+                <para>
+                  A CLI-based query is supplied to report on all services known to DUCC, their states,
+                  their instances, their dependent jobs, and performance statistics for the service.
+                </para>
+              </listitem>
+            </varlistentry>
+
+          </variablelist>
+        </para>
+      </formalpara>
+
+    </section>
+
+</chapter>

Propchange: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-overview.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-quick-start.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-quick-start.xml?rev=1427917&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-quick-start.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-quick-start.xml Wed Jan  2 19:12:10 2013
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<chapter id="ducc.quickstart">
+
+  <title>DUCC Application Quick Start</title>
+
+    <para>
+      <emphasis>The source for this chapter is ducc_ducbook/documents/introduction/quick-start.xml</emphasis>
+    </para>
+
+  <section id="ducc.quickstart.seciton">
+    <title>Section 1</title>
+    <para>This Sentence Intentionally Left Blank</para>
+  </section>
+</chapter>

Propchange: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-introduction/chapter-quick-start.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-api.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-api.xml?rev=1427917&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-api.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-api.xml Wed Jan  2 19:12:10 2013
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<chapter id="ducc.api">
+  <title>Application Programming Interface (API)</title>
+
+  <para>
+    <emphasis>The source for this chapter is ducc_ducbook/documents/chapter-api.xml</emphasis>
+  </para>
+
+  <para>
+    There is a partial DUCC API.  Completion of the API is planned for the next major update
+    and will not be documented until the design and first implementation is complete.
+  </para>
+
+</chapter>

Propchange: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-api.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-cli.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-cli.xml?rev=1427917&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-cli.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-cli.xml Wed Jan  2 19:12:10 2013
@@ -0,0 +1,157 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<chapter id="ducc.cli">
+  <title>Command Line Interface (CLI)</title>
+
+    <para>
+      <emphasis>The source for this chapter is ducc_ducbook/documents/part-user/chapter-cli.xml</emphasis>
+    </para>
+  
+  <para>
+    The Command Line Interface is provided in several forms:
+    <orderedlist>
+      <listitem>
+        <para>
+          A Java "main" class, suitable for invoking from user-supplied scripting such as Ant or Python.
+          Users of this must set the Java CLASSPATH to include a subset of the jar files supplied with DUCC.
+        </para>
+        <para>
+          To run the commands directly from Java the CLASSPATH must be set correctly and an environment
+          variable, <filename>DUCC_HOME</filename> must be set.
+        </para>
+        <variablelist>
+          <varlistentry>
+            <term><emphasis role="bold">DUCC_HOME </emphasis></term>
+            <listitem>
+              <para>
+                Set DUCC_HOME to the location where DUCC is installed.  For example:
+                <screen>export DUCC_HOME=/home/ducc/ducc_runtime</screen>
+              </para>
+            </listitem>
+          </varlistentry>
+          <varlistentry>
+            <term><emphasis role="bold">CLASSPATH </emphasis></term>
+            <listitem>
+              <para>
+              The CLASSPATH must include all of the following elements, relative to DUCC_HOME:
+              <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"  href="user-classpath.xml" />
+              </para>
+            </listitem>
+          </varlistentry>
+        </variablelist>
+      </listitem>
+      <listitem>
+        <para>
+        Executable jars for each CLI command.  These obiviate the need to establish a classpath but do require
+        DUCC_HOME to be set:
+        <screen>export DUCC_HOME=/home/ducc/ducc_runtime</screen>
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+          A script wrapper to the Java "main" that completely establishes the environment.  These wrappers use 
+          the execubable jars, establishing the DUCC environment and obiviating the need to set DUCC_HOME. 
+        </para>
+        <para>
+          While not required, it may be useful to put the DUCC bin directory into your path:
+          <screen>export PATH=$PATH:/home/ducc/ducc_runtime/bin</screen>
+        </para>
+      </listitem>
+    </orderedlist>
+  </para>
+ 
+  <para>
+    The following actions may be taken using the CLI:
+    <orderedlist>
+      <listitem>
+        <para>
+        Submit a job for ececution.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+        Cancel a job in progress.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+        Request a reservation of full or partial machines.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+        Cancel a reservation.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+        Monitor the progress of a job that is already submitted.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+        Submit a service for execution.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+        Cancel a service.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+        Register a service.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+        Unegister a service.
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+        Start a registered service (if not auto-started).
+        </para>
+      </listitem>
+      <listitem>
+        <para>
+        Stop a registered service.
+        </para>
+      </listitem>
+    </orderedlist>
+
+    The next sections describe these actions in detail.
+  </para>
+
+  <para>
+    
+  </para>
+
+
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"  href="cli/submit.xml" />
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"  href="cli/cancel.xml" />
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"  href="cli/reserve.xml" />
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"  href="cli/unreserve.xml" />
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"  href="cli/monitor.xml" />
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"  href="cli/service_submit.xml" />
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"  href="cli/service_cancel.xml" />
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"  href="cli/services.xml" />
+
+</chapter>

Propchange: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-cli.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-examples.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-examples.xml?rev=1427917&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-examples.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-examples.xml Wed Jan  2 19:12:10 2013
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<chapter id="ducc.example">
+<title>Examples: Building and Testing a Simple Application</title>
+  <para>
+    <emphasis>The source for this chapter is ducc_ducbook/documents/chapter-webserver.xml</emphasis>
+  </para>
+
+  <para>
+    This chapter intentionally left blank.
+  </para>
+
+  <para>
+    To hold you over until this chapter is filled in, the complete source for
+    the sample jobs is installed into
+    <filename class="directory">ducc_runtime/test/src</filename>.
+  </para>
+
+</chapter>

Propchange: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-examples.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-services.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-services.xml?rev=1427917&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-services.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-services.xml Wed Jan  2 19:12:10 2013
@@ -0,0 +1,267 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<chapter id="ducc.sm.ov">
+<title>Service Manager</title>
+
+    <para>
+      SM maintains map of all jobs and services and the states of these entities
+      relative to their dependencies.  This is called the <emphasis>service map</emphasis>.
+    </para>
+
+    <para>
+      A job may contain a list of service endpoints.  The SM maintains the state of these
+      in the job's service map entry.
+    </para>
+
+    <para>
+      The SM API is used to register, deregister, start, stop, and query services.
+      <variablelist>
+        <varlistentry>
+          <term><emphasis role="bold">Register</emphasis></term>
+          <listitem>
+            Register sends a service specification to the SM.  Register optionally
+            starts the service.  The SM uses the OR's DuccServiceSubmit API to
+            start the service.  The service definition and state is persisted
+            over system restarts.
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><emphasis role="bold">Unregister</emphasis></term>
+          <listitem>
+            Unregister removes the service spec.  It is stopped if it is
+            started and not busy. If still busy it is
+            marked implicit and stopped when the reference count goes to 0.
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><emphasis role="bold">Start</emphasis></term>
+          <listitem>
+            Start starts a service and marks it explicit.  If already started but marked
+            implicit it is marked explicit. Only registered services can be started.
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><emphasis role="bold">Stop</emphasis></term>
+          <listitem>
+            Stop stops a service.  If busy it is marked implict and stopped when
+            the reference count goes to 0.  Only registered services can be stopped.
+          </listitem>
+        </varlistentry>
+      </variablelist>
+    </para>
+
+    <para>
+      The OR's API allows services to be started and stopped directly.  It is intended for
+      but not restricted to use by the SM.  Services started with this API other than 
+      through the SM are "established" by the SM but not persisted.  There are two verbs:
+      <variablelist>
+        <varlistentry>
+          <term><emphasis role="bold">Submit</emphasis></term>
+          <listitem>
+            Submit is used to present a service specification to the OR for starting.  OR
+            passes it to SM which coordinates with OR to start it.  When started, SM
+            "establishes" it by starting a ping thread.
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><emphasis role="bold">Cancel</emphasis></term>
+          <listitem>
+            Cancel stops a service.  If the service is still busy it doesn't stop
+            until the reference count is 0.
+          </listitem>
+        </varlistentry>
+      </variablelist>
+    </para>
+
+    <para>
+      A service is defined to be <emphasis role="bold">established</emphasis> if it has a ping
+      thread.  The service may or may not be registered.  If registered, it isn't established until
+      it is started and has a ping thread.  If not registered the service is discovered only by
+      reference; on discovery a ping thread is started to establish it.
+    </para>
+
+    <para>
+      We distinguish implicitly started services (by reference from a job) and explicitly started
+      services (by API).  For short we call these implicit an explicit services.  This is orthogonal
+      to whether the service is registered.
+    </para>
+
+    <para>
+      A registered service can be started and stopped.  It stays registered until explicitly unregistered
+      by API.  An unregistered service is pinged on the endpoint provided by the job but cannot otherwise
+      be managed.
+    </para>
+
+    <para>
+      The service state indicates wheter a service is implicit or explicit and maintains a reference
+      count.  When the count goes to 0 for implicit services the service is stopped and the ping
+      thread deleted, perhaps after some linger period.  When it goes to 0 for unregistered services
+      the ping is stopped and the ping thread deleted.
+    </para>
+
+
+    <para>
+      If a reference is made to a service that is registered but not established the mechansism to
+      establish it is started: start the service and when it's ready, start it's ping thread,
+      marking it implicit.  Similarly if started by API only mark it explicit.  If a started
+      implicit service receives start from the API it is moved to explicit.  If a started busy
+      explicit service receives a stop from the API mark it implicit and stop it if the ref count is
+      0. If not 0, wait for 0 before stopping it.
+    </para>
+
+    <para>
+      There is one thread to manage the service map and publish to OR.  It is notified after the
+      incoming map is diffed and split.  New work, both job and service, is updated according to
+      service state and added to the map, removed jobs are deleted from the map.  New, updated, and
+      removed services are moved to the service handler thread.  The service map is then published.
+    </para>
+
+    <para>
+      There is another thread that handles only services (the service handler thread).  This one
+      runs on a clock.  The actions below are only in response to OR state, not the
+      register/deregister/start/stop API.  New services with specification are put in a list for
+      starting.  New services without a specification have ping threads started.  Modified services
+      are managed:
+      <itemizedlist>
+        <listitem>
+          If moved from not running to running, start a ping thread.
+        </listitem>
+        <listitem>
+          If moved from running to not running, kill the ping thread, update the service map, and
+          check reason.  If canceled by user or admin / removed (disappeared), delete.  If canceled
+          by system (restart) or crashed, restart.  We depend on OR state accuracy to know whether
+          to restart.
+        </listitem>
+      </itemizedlist>
+    </para>
+
+
+    <para>
+      Threads:
+      <orderedlist>
+        <listitem>One for incoming camel, notified on OR state arrival.
+          Splits the OR state and maintains the localMap.  Notifies the job 
+           threads.
+        </listitem>
+
+        <listitem>One to manage service map.  Notified by splitter thread,
+          updates map and publishes immediately.
+        </listitem>
+
+        <listitem>One thread per service, running pings on a timer.
+        </listitem>
+        
+        <listitem>One temporary thread per OR request used to handle the 
+          APIs to the Orchestrator.  This is created and runs on demand to
+          manage OR communication sessions.
+        </listitem>
+      </orderedlist>
+    </para>
+
+    <para>
+
+      The SM becomes aware of services by registration, submission via OR, and by job reference of 
+      endpoints in the job spec.  This table summarizes the rules for managing services.
+
+      <table frame="all">
+        <title>Service Management Rules</title>
+        <tgroup cols="6">
+          <thead>
+            <row>
+              <entry>Discover</entry>
+              <entry>Persist</entry>
+              <entry>Start By</entry>
+              <entry>Stop By</entry>
+              <entry>Undiscover</entry>
+              <entry>Validate jobs</entry>
+            </row>
+          </thead>
+          <tbody>
+            <row>
+              <entry>SM Register API</entry>
+              <entry>Yes</entry>
+              <entry>SM Start API, Job Reference</entry>
+              <entry>SM Stop API, Last De-reference </entry>
+              <entry>SM Unregister API</entry>
+              <entry>Yes</entry>
+            </row>
+            <row>
+              <entry>OR Submit API</entry>
+              <entry>No</entry>
+              <entry>At Submission</entry>
+              <entry>OR Cancel API</entry>
+              <entry>On Cancel</entry>
+              <entry>Yes</entry>
+            </row>
+            <row>
+              <entry>Reference</entry>
+              <entry>N/A</entry>
+              <entry>N/A </entry>
+              <entry>N/A </entry>
+              <entry>Last De-reference</entry>
+              <entry>Yes</entry>
+            </row>
+          </tbody>
+        </tgroup>
+      </table>
+
+    </para>
+
+    
+    <para>
+      These are discoverable in these ways:
+
+      <variablelist>
+        <varlistentry>
+          <term><emphasis role="bold">By Reference</emphasis></term>
+          <listitem>
+            A job's descriptor has a UIMA-AS endpoint as a service dependency.  SM starts a listener
+            and updates the service state of the job accordingly.  The listener stays alive until
+            the last reference is removed.  The service is not otherwise managed (started or stopped).
+          </listitem>
+        </varlistentry>
+        
+        <varlistentry>
+          <term><emphasis role="bold">By Submission</emphasis></term>
+          <listitem>
+            A service type of job is submitted for startup.  SM starts a listener and updates the
+            service state of any job that references it accordingly.  The listener stays alive until
+            the service is stopped by the OR's service_cancel API and the last reference is removed.
+          </listitem>
+        </varlistentry>
+        
+        <varlistentry>
+          <term><emphasis role="bold">By Registration</emphasis></term>
+          <listitem>
+            A service specification is registered with SM.  When the service is started, SM starts a
+            listener and updates the service state of any referencing job.  The listener stays alive
+            until the service is stopped and the last reference is removed. (If the service is
+            started implicitly it is stopped when the last reference is removed.  If the service is
+            started by the SM's start_service API it is stopped by the SM's stop_service API.)
+          </listitem>
+        </varlistentry>
+      </variablelist>
+    </para>
+    
+</chapter>

Propchange: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-services.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-userlogs.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-userlogs.xml?rev=1427917&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-userlogs.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-userlogs.xml Wed Jan  2 19:12:10 2013
@@ -0,0 +1,290 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+  <!-- ************************************ Logs ********************************** -->
+  <chapter id="ducc.user.logs">
+    <title>Job Logs</title>
+
+    <para>
+      <emphasis>The source for this chapter is ducc_ducbook/documents/part-user/userlogs.xml</emphasis>
+    </para>
+
+
+    <para>
+      The DUCC logs are managed by <emphasis>log4j</emphasis> and are configured using
+      <filename>ducc_runtime/log4j.xml.</filename>  It is not in the scope of this document
+      to describe <emphasis>log4j</emphasis> or its configuration mechanism.  Details on <emphasis>log4j</emphasis>
+      can be found at <ulink url="http://logging.apache.org/log4j/1.2/">http://logging.apache.org/log4j/1.2/</ulink>.
+    </para>
+
+    <para>
+      The "user logs" are the Job Driver (JD) and Job Process (JP) logs.  There is one log for each
+      process of a job. The JD log is divided between 
+      two physical files:
+      <orderedlist>
+        <listitem>
+          <para>
+            The logs and stdout written by the UIMA collection reader.  The collection reader
+            uses the UIMA logger which is by default directed to stdout.
+          </para>
+        </listitem>
+        <listitem>
+          <para>
+            The diagnostic logs written the the DUCC JD wrapper around the job's collection reader.  This
+            log is written using <emphasis>log4j</emphasis>.
+          </para>
+        </listitem>
+      </orderedlist>
+    </para>
+
+    <para>
+      A number of other usefiles are written to the log directory:
+      <orderedlist>
+        <listitem>
+          <para>
+            A properties file containing the full job specification for the job.  This includes all the
+            parameters specified by the user as well as the default parameters.  This file is written to
+            <filename>job-specification.properties.</filename>
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            The UIMA pipeline descriptor constructed by DUCC that describes the process that is
+            dispatched to each Job Process (JP).  The name of this file is of the form
+            
+            <programlisting>
+              JOBID-uima-ae-descriptor-PROCESS.xml
+            </programlisting>
+            where
+            <variablelist>
+              <varlistentry>
+                <term><emphasis role="bold">JOBID</emphasis></term>
+                <listitem>
+                  <para>
+                    This is the numerical id of the job as assigned by DUCC.
+                  </para>
+                </listitem>
+              </varlistentry>
+              
+              <varlistentry>
+                <term><emphasis role="bold">PROCESS</emphasis></term>
+                <listitem>
+                  <para>
+                    This is the process id of the Job Driver (JD) process.
+                  </para>
+                </listitem>
+              </varlistentry>
+            </variablelist>
+          </para>            
+        </listitem>
+
+        <listitem>
+          <para>
+            The UIMA-AS service descriptor that defines the process that defines the job as as UIMA-AS
+            service.  The name of this file is of the form
+            
+            <programlisting>
+              JOBID-uima-as-dd-PROCESS.xml
+            </programlisting>
+            where
+            <variablelist>
+              <varlistentry>
+                <term><emphasis role="bold">JOBID</emphasis></term>
+                <listitem>
+                  <para>
+                    This is the numerical id of the job as assigned by DUCC.
+                  </para>
+                </listitem>
+              </varlistentry>
+              
+              <varlistentry>
+                <term><emphasis role="bold">PROCESS</emphasis></term>
+                <listitem>
+                  <para>
+                    This is the process id of the Job Driver (JD) process.
+                  </para>
+                </listitem>
+              </varlistentry>
+            </variablelist>
+          </para>
+        </listitem>
+        
+        <listitem>
+          <para>
+            A Java serialized object containing the performance breakdown for the job.  This is
+            used by the Web Server to display the breakdown.  This file is written
+            to <filename>job-performance-summary.ser.</filename>
+          </para>
+        </listitem>
+
+      </orderedlist>
+    </para>
+
+    <para>
+      The JP logs are written by default to <filename class="directory">HOME/ducc/logs</filename>, where
+      HOME is the submitting user's home directory.  In this directory, a subdirectory whose name
+      is the numerical id of the job is created by DUCC, where all logs for the job are written.
+    </para>
+
+    <para>
+      The collection reader's log is written to the file <filename>HOME/ducc/logs/JOBID/jd.out.log</filename> via
+      <emphasis>log4j</emphasis>.  It is written in multiple generations, and its size is governed by the
+      same <emphasis>log4j</emphasis> configuration file used for the DUCC Daemon processes.  The size
+      of each generation and the number of generations is configured in the <emphasis>jdout</emphasis>
+      appender stanza.
+    </para>
+
+    <para>
+      Each JP log and the diagnostic JD log is of the following form:
+      <programlisting>
+        JOBID-TYPE-NODE-PROCESS.log
+      </programlisting>
+      where
+      <variablelist>
+        <varlistentry>
+          <term><emphasis role="bold">JOBID</emphasis></term>
+          <listitem>
+            <para>
+              This is the numerical id of the job as assigned by DUCC.
+            </para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><emphasis role="bold">TYPE</emphasis></term>
+          <listitem>
+            <para>
+              This is either the string "UIMA" for JP logs, or "JD" for JD logs.
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term><emphasis role="bold">NODE</emphasis></term>
+          <listitem>
+            <para>
+              This is the name of the machine where the process runs.
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term><emphasis role="bold">PROCESS</emphasis></term>
+          <listitem>
+            <para>
+              This is the process id of the process on the indicated node.
+            </para>
+          </listitem>
+        </varlistentry>
+      </variablelist>
+    </para>
+
+
+    <para>
+      This shows the contents a sample log directory for a small job that consisted of two processes.
+      <programlisting>
+        100-JD-bluej290-1-29383.log
+        100-uima-ae-descriptor-29383.xml
+        100-uima-as-dd-29383.xml
+        100-UIMA-bluej290-2-32766.log
+        100-UIMA-bluej291-63-13594.log
+        jd.out.log
+        job-performance-summary.ser
+        job-specification.properties
+      </programlisting>
+      
+      In this example,
+      <variablelist>
+        <varlistentry>
+          <term  />
+          <listitem>
+            <para>
+              The file <filename>100-JD-bluej290-1-29383.log</filename> is the diagnostic JD log, where the JD
+              executed on node bluej290-1 in process 29383.
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term  />
+          <listitem>
+            <para>
+              The file <filename>100-uima-ae-descriptor-29383.xml</filename> is the UIMA pipeline descriptor 
+              describing the service process that is launched in each JP, where the JD process is 29383.
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term  />
+          <listitem>
+            <para>
+              The file <filename>100-uima-as-dd-29383.xml</filename> is the UIMA-AS service descriptor 
+              where the client is the JD process running in process 29383.
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term  />
+          <listitem>
+            <para>
+              The file <filename>100-UIMA-bluej290-2-32766.log</filename> is a JP log for job 100,
+              that ran on node bluej290-2, in process 32766.
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term  />
+          <listitem>
+            <para>
+              The file <filename>100-UIMA-bluej291-63-13594.log</filename> is a JP log for job 100,
+              that ran on node bluej291-63, in process 13594
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term  />
+          <listitem>
+            <para>
+              The file <filename>jd.out.log</filename> is the user's JD log, containing the
+              user's collection reader output.
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term  />
+          <listitem>
+            <para>
+              The file <filename>job-performance-summary.ser</filename> is the serialized performance
+              breakdown that is displayed in the Web Server
+            </para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term  />
+          <listitem>
+            <para>
+              The file <filename>job-specification.propeties</filename> is the properties file 
+              describing the job.
+            </para>
+          </listitem>
+        </varlistentry>
+      </variablelist>
+
+    </para>
+
+  </chapter>
+

Propchange: uima/sandbox/uima-ducc/trunk/uima-ducc-ducbook/docbook/part-user/chapter-userlogs.xml
------------------------------------------------------------------------------
    svn:eol-style = native