You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by cw...@apache.org on 2016/08/08 19:58:35 UTC

svn commit: r1755532 [21/24] - in /uima/site/trunk/uima-website/docs/d/uima-ducc-2.1.0: ./ api/ api/org/ api/org/apache/ api/org/apache/uima/ api/org/apache/uima/ducc/ api/org/apache/uima/ducc/cli/ api/org/apache/uima/ducc/cli/class-use/ api/org/apache...

Added: uima/site/trunk/uima-website/docs/d/uima-ducc-2.1.0/duccbook.html
URL: http://svn.apache.org/viewvc/uima/site/trunk/uima-website/docs/d/uima-ducc-2.1.0/duccbook.html?rev=1755532&view=auto
==============================================================================
--- uima/site/trunk/uima-website/docs/d/uima-ducc-2.1.0/duccbook.html (added)
+++ uima/site/trunk/uima-website/docs/d/uima-ducc-2.1.0/duccbook.html Mon Aug  8 19:58:33 2016
@@ -0,0 +1,15450 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"  
+  "http://www.w3.org/TR/html4/loose.dtd">  
+<html > 
+<head><title>Distributed UIMA Cluster Computing</title> 
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 
+<meta name="generator" content="TeX4ht (http://www.cse.ohio-state.edu/~gurari/TeX4ht/)"> 
+<meta name="originator" content="TeX4ht (http://www.cse.ohio-state.edu/~gurari/TeX4ht/)"> 
+<!-- html --> 
+<meta name="src" content="duccbook.tex"> 
+<meta name="date" content="2016-07-28 16:27:00"> 
+<link rel="stylesheet" type="text/css" href="duccbook.css"> 
+</head><body 
+>
+                                                                                                
+                                                                                                
+<div class="maketitle">
+                                                                                                
+                                                                                                
+                                                                                                
+                                                                                                
+
+<h2 class="titleHead">Distributed UIMA Cluster Computing</h2>
+<div class="author" ><span 
+class="cmr-12">Written and maintained by the Apache</span>
+<br />  <span 
+class="cmr-12">UIMA</span><sup class="textsuperscript"><span 
+class="cmr-9">TM</span></sup><span 
+class="cmr-12">Development Community</span><br /><br /><br />
+<br />             <span 
+class="cmr-12">Version 2.1.0</span></div>
+<br />
+<div class="date" ></div>
+                                                                                                
+                                                                                                
+</div>
+<!--l. 18--><p class="noindent" >Copyright <span 
+class="cmsy-10">�</span>&#x00A0; 2012 The Apache Software Foundation
+<!--l. 20--><p class="noindent" >Copyright <span 
+class="cmsy-10">�</span>&#x00A0; 2012 International Business Machines Corporation
+     <!--l. 23--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-1000"></a><span 
+class="cmbx-10">License and Disclaimer</span></span>
+     The ASF licenses this documentation to you under the Apache License, Version 2.0 (the &#8221;License&#8221;); you may not
+     use this documentation except in compliance with the License. You may obtain a copy of the License
+     at
+     <!--l. 28--><p class="noindent" ><a 
+href="http://www.apache.org/licenses/LICENSE-2.0" class="url" ><span 
+class="cmtt-10">http://www.apache.org/licenses/LICENSE-2.0</span></a>
+     <!--l. 30--><p class="noindent" >Unless required by applicable law or agreed to in writing, this documentation and its contents are distributed under
+     the License on an &#8221;AS IS&#8221; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+     or implied. See the License for the specific language governing permissions and limitations under the
+     License.
+     <!--l. 35--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-2000"></a><span 
+class="cmbx-10">Trademarks</span></span>
+     All terms mentioned in the text that are known to be trademarks or service marks have been appropriately capitalized.
+     Use of such terms in this book should not be regarded as affecting the validity of the the trademark or service
+     mark.
+<!--l. 47--><p class="noindent" >Publication date: July&#x00A0;2016
+                                                                                                
+                                                                                                
+<h2 class="likechapterHead"><a 
+ id="x1-3000"></a>Table of Contents</h2> <div class="tableofcontents">
+<span class="partToc" >I&#x00A0;&#x00A0;<a 
+href="#x1-5000I" id="QQ2-1-5">DUCC Concepts</a></span>
+<br /><span class="chapterToc" >1 <a 
+href="#x1-60001" id="QQ2-1-6">DUCC Overview</a></span>
+<br />&#x00A0;<span class="sectionToc" >1.1 <a 
+href="#x1-70001.1" id="QQ2-1-7">What is DUCC?</a></span>
+<br />&#x00A0;<span class="sectionToc" >1.2 <a 
+href="#x1-80001.2" id="QQ2-1-8">DUCC Job Model</a></span>
+<br />&#x00A0;<span class="sectionToc" >1.3 <a 
+href="#x1-90001.3" id="QQ2-1-9">DUCC From UIMA to Full Scale-out</a></span>
+<br />&#x00A0;<span class="sectionToc" >1.4 <a 
+href="#x1-140001.4" id="QQ2-1-18">Error Management </a></span>
+<br />&#x00A0;<span class="sectionToc" >1.5 <a 
+href="#x1-150001.5" id="QQ2-1-19">Cluster and Job Management</a></span>
+<br />&#x00A0;<span class="sectionToc" >1.6 <a 
+href="#x1-160001.6" id="QQ2-1-20">Security Measures</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >1.6.1 <a 
+href="#x1-170001.6.1" id="QQ2-1-21">ducc_ling</a></span>
+<br />&#x00A0;<span class="sectionToc" >1.7 <a 
+href="#x1-180001.7" id="QQ2-1-22">Security Issues</a></span>
+<br /><span class="chapterToc" >2 <a 
+href="#x1-190002" id="QQ2-1-23">Glossary</a></span>
+<br /><span class="partToc" >II&#x00A0;&#x00A0;<a 
+href="#x1-20000II" id="QQ2-1-24">Ducc Users Guide</a></span>
+<br /><span class="chapterToc" >3 <a 
+href="#x1-210003" id="QQ2-1-25">Command Line Interface</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.1 <a 
+href="#x1-230003.1" id="QQ2-1-27">The DUCC Job Descriptor</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.2 <a 
+href="#x1-240003.2" id="QQ2-1-28">Operating System Limit Support</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.3 <a 
+href="#x1-250003.3" id="QQ2-1-29">Command Line Forms</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.4 <a 
+href="#x1-260003.4" id="QQ2-1-30">DUCC Commands</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.5 <a 
+href="#x1-270003.5" id="QQ2-1-31">ducc_submit</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.6 <a 
+href="#x1-320003.6" id="QQ2-1-36">ducc_cancel</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.7 <a 
+href="#x1-370003.7" id="QQ2-1-41">ducc_reserve</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.8 <a 
+href="#x1-420003.8" id="QQ2-1-46">ducc_unreserve</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.9 <a 
+href="#x1-470003.9" id="QQ2-1-51">ducc_process_submit</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.10 <a 
+href="#x1-520003.10" id="QQ2-1-56">ducc_process_cancel</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.11 <a 
+href="#x1-570003.11" id="QQ2-1-61">ducc_services</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.1 <a 
+href="#x1-610003.11.1" id="QQ2-1-65">Common Options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.2 <a 
+href="#x1-620003.11.2" id="QQ2-1-66">ducc_services &#8211;register [specification file] [options]</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.3 <a 
+href="#x1-630003.11.3" id="QQ2-1-67">ducc_services &#8211;start options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.4 <a 
+href="#x1-640003.11.4" id="QQ2-1-68">ducc_services &#8211;stop options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.5 <a 
+href="#x1-650003.11.5" id="QQ2-1-69">ducc_services &#8211;enable options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.6 <a 
+href="#x1-660003.11.6" id="QQ2-1-70">ducc_services &#8211;disable options</a></span>
+                                                                                                
+                                                                                                
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.7 <a 
+href="#x1-670003.11.7" id="QQ2-1-71">ducc_services &#8211;observe_references options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.8 <a 
+href="#x1-680003.11.8" id="QQ2-1-72">ducc_services &#8211;ignore_references options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.9 <a 
+href="#x1-690003.11.9" id="QQ2-1-73">ducc_services &#8211;modify options</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >3.11.10 <a 
+href="#x1-700003.11.10" id="QQ2-1-74">ducc_services &#8211;query options</a></span>
+<br />&#x00A0;<span class="sectionToc" >3.12 <a 
+href="#x1-720003.12" id="QQ2-1-76">viaducc and java_viaducc</a></span>
+<br /><span class="chapterToc" >4 <a 
+href="#x1-760004" id="QQ2-1-80">The DUCC Public API</a></span>
+<br />&#x00A0;<span class="sectionToc" >4.1 <a 
+href="#x1-770004.1" id="QQ2-1-81">Overview Of The DUCC API</a></span>
+<br />&#x00A0;<span class="sectionToc" >4.2 <a 
+href="#x1-780004.2" id="QQ2-1-82">Compiling and Running With the DUCC API</a></span>
+<br />&#x00A0;<span class="sectionToc" >4.3 <a 
+href="#x1-790004.3" id="QQ2-1-83">Java API</a></span>
+<br /><span class="chapterToc" >5 <a 
+href="#x1-800005" id="QQ2-1-84">Service Management</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.1 <a 
+href="#x1-810005.1" id="QQ2-1-85">Overview.</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.2 <a 
+href="#x1-820005.2" id="QQ2-1-86">Service Types.</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.3 <a 
+href="#x1-830005.3" id="QQ2-1-87">Service Instance IDs</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.4 <a 
+href="#x1-840005.4" id="QQ2-1-88">Service References and Endpoints</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.5 <a 
+href="#x1-850005.5" id="QQ2-1-89">Service Management Policies</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.6 <a 
+href="#x1-870005.6" id="QQ2-1-91">Service Pingers</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.6.1 <a 
+href="#x1-880005.6.1" id="QQ2-1-92">The Pinger API</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.6.2 <a 
+href="#x1-910005.6.2" id="QQ2-1-95">Declaring a Pinger in A Service</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.6.3 <a 
+href="#x1-920005.6.3" id="QQ2-1-96">Implementing a Pinger</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.6.4 <a 
+href="#x1-930005.6.4" id="QQ2-1-98">Building And Testing Your Pinger</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.6.5 <a 
+href="#x1-980005.6.5" id="QQ2-1-103">Globally Registered Pingers</a></span>
+<br />&#x00A0;<span class="sectionToc" >5.7 <a 
+href="#x1-990005.7" id="QQ2-1-104">Sample Pinger</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.7.1 <a 
+href="#x1-1000005.7.1" id="QQ2-1-105">Using the Sample Pinger</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.7.2 <a 
+href="#x1-1010005.7.2" id="QQ2-1-106">Understanding Sample Pinger</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.7.3 <a 
+href="#x1-1140005.7.3" id="QQ2-1-119">Calculating New Deployments in the Pinger</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >5.7.4 <a 
+href="#x1-1250005.7.4" id="QQ2-1-130">Summary of Sample Pinger</a></span>
+<br /><span class="chapterToc" >6 <a 
+href="#x1-1260006" id="QQ2-1-131">Job Logs</a></span>
+<br /><span class="chapterToc" >7 <a 
+href="#x1-1320007" id="QQ2-1-137">Job Error Handler</a></span>
+<br /><span class="chapterToc" >8 <a 
+href="#x1-1370008" id="QQ2-1-142">DUCC Web Server</a></span>
+<br />&#x00A0;<span class="sectionToc" >8.1 <a 
+href="#x1-1420008.1" id="QQ2-1-149">Common Links</a></span>
+<br />&#x00A0;<span class="sectionToc" >8.2 <a 
+href="#x1-1430008.2" id="QQ2-1-150">Jobs Page</a></span>
+<br />&#x00A0;<span class="sectionToc" >8.3 <a 
+href="#x1-1440008.3" id="QQ2-1-152">Job Details Page</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.3.1 <a 
+href="#x1-1450008.3.1" id="QQ2-1-153">Processes</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.3.2 <a 
+href="#x1-1460008.3.2" id="QQ2-1-155">Work Items</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.3.3 <a 
+href="#x1-1470008.3.3" id="QQ2-1-157">Performance</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.3.4 <a 
+href="#x1-1480008.3.4" id="QQ2-1-159">Specification</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.3.5 <a 
+href="#x1-1490008.3.5" id="QQ2-1-161">Files</a></span>
+                                                                                                
+                                                                                                
+<br />&#x00A0;<span class="sectionToc" >8.4 <a 
+href="#x1-1500008.4" id="QQ2-1-162">Reservations Page</a></span>
+<br />&#x00A0;<span class="sectionToc" >8.5 <a 
+href="#x1-1510008.5" id="QQ2-1-164">Managed Reservation Details Page</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.5.1 <a 
+href="#x1-1520008.5.1" id="QQ2-1-165">Processes</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.5.2 <a 
+href="#x1-1530008.5.2" id="QQ2-1-166">Specification</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.5.3 <a 
+href="#x1-1540008.5.3" id="QQ2-1-167">Files</a></span>
+<br />&#x00A0;<span class="sectionToc" >8.6 <a 
+href="#x1-1550008.6" id="QQ2-1-168">Services Page</a></span>
+<br />&#x00A0;<span class="sectionToc" >8.7 <a 
+href="#x1-1560008.7" id="QQ2-1-169">Service Details Page</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.7.1 <a 
+href="#x1-1570008.7.1" id="QQ2-1-170">Deployments</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.7.2 <a 
+href="#x1-1580008.7.2" id="QQ2-1-171">Registry</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.7.3 <a 
+href="#x1-1590008.7.3" id="QQ2-1-172">Files</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.7.4 <a 
+href="#x1-1600008.7.4" id="QQ2-1-173">History</a></span>
+<br />&#x00A0;<span class="sectionToc" >8.8 <a 
+href="#x1-1610008.8" id="QQ2-1-174">System Pages</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.8.1 <a 
+href="#x1-1620008.8.1" id="QQ2-1-175">Administration</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.8.2 <a 
+href="#x1-1630008.8.2" id="QQ2-1-176">Broker</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.8.3 <a 
+href="#x1-1640008.8.3" id="QQ2-1-177">Classes</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.8.4 <a 
+href="#x1-1650008.8.4" id="QQ2-1-178">Daemons</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >8.8.5 <a 
+href="#x1-1660008.8.5" id="QQ2-1-179">Machines</a></span>
+<br />&#x00A0;<span class="sectionToc" >8.9 <a 
+href="#x1-1670008.9" id="QQ2-1-180">Visualization</a></span>
+<br /><span class="partToc" >III&#x00A0;&#x00A0;<a 
+href="#x1-168000III" id="QQ2-1-182">Programming Model And Applications</a></span>
+<br /><span class="chapterToc" >9 <a 
+href="#x1-1690009" id="QQ2-1-183">Building and Testing Jobs</a></span>
+<br />&#x00A0;<span class="sectionToc" >9.1 <a 
+href="#x1-1700009.1" id="QQ2-1-184">Overview</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >9.1.1 <a 
+href="#x1-1710009.1.1" id="QQ2-1-185">Basic Job Process Threading Model</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >9.1.2 <a 
+href="#x1-1720009.1.2" id="QQ2-1-186">Alternate Pipeline Threading Model</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >9.1.3 <a 
+href="#x1-1730009.1.3" id="QQ2-1-187">Overriding UIMA Configuration Parameters</a></span>
+<br />&#x00A0;<span class="sectionToc" >9.2 <a 
+href="#x1-1740009.2" id="QQ2-1-188">Collection Segmentation and Artifact Extraction</a></span>
+<br />&#x00A0;<span class="sectionToc" >9.3 <a 
+href="#x1-1750009.3" id="QQ2-1-189">CAS Consumer Changes for DUCC</a></span>
+<br />&#x00A0;<span class="sectionToc" >9.4 <a 
+href="#x1-1760009.4" id="QQ2-1-190">Job Development for an Existing Pipeline Design</a></span>
+<br />&#x00A0;<span class="sectionToc" >9.5 <a 
+href="#x1-1770009.5" id="QQ2-1-191">Job Development for a New Pipeline Design</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >9.5.1 <a 
+href="#x1-1780009.5.1" id="QQ2-1-192">Collection Reader (CR) Characteristics</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >9.5.2 <a 
+href="#x1-1790009.5.2" id="QQ2-1-193">DUCC built-in Flow Controller</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >9.5.3 <a 
+href="#x1-1800009.5.3" id="QQ2-1-194">Workitem Feature Structure</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >9.5.4 <a 
+href="#x1-1810009.5.4" id="QQ2-1-195">Deployment Descriptor (DD) Jobs</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >9.5.5 <a 
+href="#x1-1820009.5.5" id="QQ2-1-196">Debugging</a></span>
+<br /><span class="chapterToc" >10 <a 
+href="#x1-18300010" id="QQ2-1-197">Sample Application: Raw Text Processing</a></span>
+<br />&#x00A0;<span class="sectionToc" >10.1 <a 
+href="#x1-18400010.1" id="QQ2-1-198">Application Function and Design</a></span>
+<br />&#x00A0;<span class="sectionToc" >10.2 <a 
+href="#x1-18500010.2" id="QQ2-1-199">Configuration Parameters</a></span>
+<br />&#x00A0;<span class="sectionToc" >10.3 <a 
+href="#x1-18600010.3" id="QQ2-1-200">Set up a working directory</a></span>
+                                                                                                
+                                                                                                
+<br />&#x00A0;<span class="sectionToc" >10.4 <a 
+href="#x1-18700010.4" id="QQ2-1-201">Download and Install OpenNLP</a></span>
+<br />&#x00A0;<span class="sectionToc" >10.5 <a 
+href="#x1-18800010.5" id="QQ2-1-202">Get some Input Text</a></span>
+<br />&#x00A0;<span class="sectionToc" >10.6 <a 
+href="#x1-18900010.6" id="QQ2-1-203">Run the Job</a></span>
+<br />&#x00A0;<span class="sectionToc" >10.7 <a 
+href="#x1-19000010.7" id="QQ2-1-204">Job Output</a></span>
+<br />&#x00A0;<span class="sectionToc" >10.8 <a 
+href="#x1-19100010.8" id="QQ2-1-205">Job Performance Details</a></span>
+<br /><span class="chapterToc" >11 <a 
+href="#x1-19200011" id="QQ2-1-208">Sample Application: CAS Input Processing</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.1 <a 
+href="#x1-19300011.1" id="QQ2-1-209">Application Function and Design</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.2 <a 
+href="#x1-19400011.2" id="QQ2-1-210">Configuration Parameters</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.3 <a 
+href="#x1-19500011.3" id="QQ2-1-211">Run the Job</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.4 <a 
+href="#x1-19600011.4" id="QQ2-1-212">Job Performance Details</a></span>
+<br />&#x00A0;<span class="sectionToc" >11.5 <a 
+href="#x1-19700011.5" id="QQ2-1-214">Limiting Job Resources</a></span>
+<br /><span class="partToc" >IV&#x00A0;&#x00A0;<a 
+href="#x1-198000IV" id="QQ2-1-215">Ducc Administrators Guide</a></span>
+<br /><span class="chapterToc" >12 <a 
+href="#x1-19900012" id="QQ2-1-216">Installation, Configuration, and Verification</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.1 <a 
+href="#x1-20000012.1" id="QQ2-1-217">Overview</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.2 <a 
+href="#x1-20100012.2" id="QQ2-1-218">Software Prerequisites</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.3 <a 
+href="#x1-20200012.3" id="QQ2-1-219">Building from Source</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.4 <a 
+href="#x1-20300012.4" id="QQ2-1-220">Documentation</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.5 <a 
+href="#x1-20400012.5" id="QQ2-1-221">Single System Installation and Verification</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.6 <a 
+href="#x1-20500012.6" id="QQ2-1-222">Minimal Hardware Requirements for Single System Installation</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.7 <a 
+href="#x1-20600012.7" id="QQ2-1-223">Single System Installation</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.8 <a 
+href="#x1-20700012.8" id="QQ2-1-224">Initial System Verification</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.9 <a 
+href="#x1-20800012.9" id="QQ2-1-225">Add additional nodes to the DUCC cluster</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.10 <a 
+href="#x1-20900012.10" id="QQ2-1-226">Ducc_ling Configuration - Running with credentials of submitting user</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.11 <a 
+href="#x1-21000012.11" id="QQ2-1-227">CGroups Installation and Configuration</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.12 <a 
+href="#x1-21100012.12" id="QQ2-1-228">Full DUCC Verification</a></span>
+<br />&#x00A0;<span class="sectionToc" >12.13 <a 
+href="#x1-21200012.13" id="QQ2-1-229">Enable DUCC webserver login</a></span>
+<br /><span class="chapterToc" >13 <a 
+href="#x1-21300013" id="QQ2-1-230">Administration</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.1 <a 
+href="#x1-21400013.1" id="QQ2-1-231">WebServer Authentication</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.1.1 <a 
+href="#x1-21500013.1.1" id="QQ2-1-232">Example Implementation</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.1.2 <a 
+href="#x1-21600013.1.2" id="QQ2-1-233">IAuthenticationManager</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.1.3 <a 
+href="#x1-21700013.1.3" id="QQ2-1-234">IAuthenticationResult</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.1.4 <a 
+href="#x1-21800013.1.4" id="QQ2-1-235">Example ANT script to build jar</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.1.5 <a 
+href="#x1-21900013.1.5" id="QQ2-1-236">Example ducc.properties entries</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.1.6 <a 
+href="#x1-22000013.1.6" id="QQ2-1-237">Example ducc.administrators</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.2 <a 
+href="#x1-22100013.2" id="QQ2-1-238">Properties</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.3 <a 
+href="#x1-22200013.3" id="QQ2-1-239">Properties merging</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.4 <a 
+href="#x1-22300013.4" id="QQ2-1-240">ducc.properties</a></span>
+                                                                                                
+                                                                                                
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.4.1 <a 
+href="#x1-22400013.4.1" id="QQ2-1-241">General DUCC Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.4.2 <a 
+href="#x1-22500013.4.2" id="QQ2-1-242">Web Server Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.4.3 <a 
+href="#x1-22600013.4.3" id="QQ2-1-243">Job Driver Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.4.4 <a 
+href="#x1-22700013.4.4" id="QQ2-1-244">Service Manager Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.4.5 <a 
+href="#x1-22800013.4.5" id="QQ2-1-245">Orchestrator Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.4.6 <a 
+href="#x1-22900013.4.6" id="QQ2-1-246">Resource Manager Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.4.7 <a 
+href="#x1-23000013.4.7" id="QQ2-1-247">Agent Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.4.8 <a 
+href="#x1-23100013.4.8" id="QQ2-1-248">Process Manager Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.4.9 <a 
+href="#x1-23200013.4.9" id="QQ2-1-249">Job Process Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.4.10 <a 
+href="#x1-23300013.4.10" id="QQ2-1-250">Database Configuration Properties</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.5 <a 
+href="#x1-23400013.5" id="QQ2-1-251">ducc.private.properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.5.1 <a 
+href="#x1-23500013.5.1" id="QQ2-1-252">Web Server Properties</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.5.2 <a 
+href="#x1-23600013.5.2" id="QQ2-1-253">Database Properties</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.6 <a 
+href="#x1-23700013.6" id="QQ2-1-254">Resource Manager Configuration: Classes and Nodepools</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.6.1 <a 
+href="#x1-23800013.6.1" id="QQ2-1-255">Nodepools</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.6.2 <a 
+href="#x1-24200013.6.2" id="QQ2-1-263">Class Definitions</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.6.3 <a 
+href="#x1-24300013.6.3" id="QQ2-1-265">Validation</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.7 <a 
+href="#x1-24600013.7" id="QQ2-1-268">Ducc Node Definitions</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.8 <a 
+href="#x1-24700013.8" id="QQ2-1-270">Ducc User Definitions</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.9 <a 
+href="#x1-24800013.9" id="QQ2-1-272">DUCC Database Integration</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.9.1 <a 
+href="#x1-24900013.9.1" id="QQ2-1-273">Overview</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.9.2 <a 
+href="#x1-25400013.9.2" id="QQ2-1-278">Database Scripting Utilities</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.9.3 <a 
+href="#x1-25500013.9.3" id="QQ2-1-279">Database Configuration</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.10 <a 
+href="#x1-25600013.10" id="QQ2-1-280">Administrative Commands</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.10.1 <a 
+href="#x1-25700013.10.1" id="QQ2-1-281">start_ducc</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.10.2 <a 
+href="#x1-26400013.10.2" id="QQ2-1-288">stop_ducc</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.10.3 <a 
+href="#x1-26900013.10.3" id="QQ2-1-293">check_ducc</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.10.4 <a 
+href="#x1-27300013.10.4" id="QQ2-1-297">ducc_post_install</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.10.5 <a 
+href="#x1-27600013.10.5" id="QQ2-1-300">ducc_update</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.10.6 <a 
+href="#x1-28100013.10.6" id="QQ2-1-305">rm_reconfigure</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.10.7 <a 
+href="#x1-28400013.10.7" id="QQ2-1-308">rm_qoccupancy</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.10.8 <a 
+href="#x1-28700013.10.8" id="QQ2-1-311">vary_off</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.10.9 <a 
+href="#x1-29000013.10.9" id="QQ2-1-314">vary_on</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.10.10 <a 
+href="#x1-29300013.10.10" id="QQ2-1-317">ducc_properties_manager</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.10.11 <a 
+href="#x1-29800013.10.11" id="QQ2-1-322">db_create</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.10.12 <a 
+href="#x1-30000013.10.12" id="QQ2-1-324">db_loader</a></span>
+<br />&#x00A0;<span class="sectionToc" >13.11 <a 
+href="#x1-30600013.11" id="QQ2-1-330">Administrative Tasks</a></span>
+                                                                                                
+                                                                                                
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.11.1 <a 
+href="#x1-30700013.11.1" id="QQ2-1-331">Add Node</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.11.2 <a 
+href="#x1-30900013.11.2" id="QQ2-1-333">Remove Node</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >13.11.3 <a 
+href="#x1-31100013.11.3" id="QQ2-1-335">Notes</a></span>
+<br /><span class="chapterToc" >14 <a 
+href="#x1-31200014" id="QQ2-1-336">Resource Management</a></span>
+<br />&#x00A0;<span class="sectionToc" >14.1 <a 
+href="#x1-31300014.1" id="QQ2-1-337">Overview</a></span>
+<br />&#x00A0;<span class="sectionToc" >14.2 <a 
+href="#x1-31400014.2" id="QQ2-1-338">Preemption vs Eviction</a></span>
+<br />&#x00A0;<span class="sectionToc" >14.3 <a 
+href="#x1-31500014.3" id="QQ2-1-339">Scheduling Policies</a></span>
+<br />&#x00A0;<span class="sectionToc" >14.4 <a 
+href="#x1-31600014.4" id="QQ2-1-340">Allotment</a></span>
+<br />&#x00A0;<span class="sectionToc" >14.5 <a 
+href="#x1-31700014.5" id="QQ2-1-341">Priority vs Weight</a></span>
+<br />&#x00A0;<span class="sectionToc" >14.6 <a 
+href="#x1-32000014.6" id="QQ2-1-344">Node Pools</a></span>
+<br />&#x00A0;<span class="sectionToc" >14.7 <a 
+href="#x1-32100014.7" id="QQ2-1-345">Scheduling Classes</a></span>
+<br /><span class="chapterToc" >15 <a 
+href="#x1-32200015" id="QQ2-1-346">Service Management</a></span>
+<br /><span class="chapterToc" >16 <a 
+href="#x1-32300016" id="QQ2-1-347">DUCC Web Server Customization</a></span>
+<br />&#x00A0;<span class="sectionToc" >16.1 <a 
+href="#x1-32400016.1" id="QQ2-1-348">Server Side</a></span>
+<br />&#x00A0;<span class="sectionToc" >16.2 <a 
+href="#x1-32500016.2" id="QQ2-1-349">Client Side</a></span>
+<br />&#x00A0;<span class="sectionToc" >16.3 <a 
+href="#x1-32600016.3" id="QQ2-1-350">Build and Install</a></span>
+<br /><span class="chapterToc" >17 <a 
+href="#x1-32700017" id="QQ2-1-351">Simulation and System Testing</a></span>
+<br />&#x00A0;<span class="sectionToc" >17.1 <a 
+href="#x1-32800017.1" id="QQ2-1-352">Cluster Simulation</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.1.1 <a 
+href="#x1-32900017.1.1" id="QQ2-1-353">Overview</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.1.2 <a 
+href="#x1-33000017.1.2" id="QQ2-1-354">Node Configuration</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.1.3 <a 
+href="#x1-33100017.1.3" id="QQ2-1-355">Setting up Test Mode</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.1.4 <a 
+href="#x1-33200017.1.4" id="QQ2-1-356">Starting a Simulated Cluster</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.1.5 <a 
+href="#x1-33600017.1.5" id="QQ2-1-360">Stopping a Simulated Cluster</a></span>
+<br />&#x00A0;<span class="sectionToc" >17.2 <a 
+href="#x1-34000017.2" id="QQ2-1-364">Job Simulation</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.1 <a 
+href="#x1-34100017.2.1" id="QQ2-1-365">Overview</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.2 <a 
+href="#x1-34200017.2.2" id="QQ2-1-366">Job meta-descriptors</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.3 <a 
+href="#x1-34300017.2.3" id="QQ2-1-367"><span 
+class="cmti-10">Prepare </span>Descriptors</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.4 <a 
+href="#x1-34400017.2.4" id="QQ2-1-368">Services</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.5 <a 
+href="#x1-34600017.2.5" id="QQ2-1-370">Generating a Job Set</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >17.2.6 <a 
+href="#x1-34700017.2.6" id="QQ2-1-371">Running the Test Driver</a></span>
+<br />&#x00A0;<span class="sectionToc" >17.3 <a 
+href="#x1-34800017.3" id="QQ2-1-372">Pre-Packaged Tests</a></span>
+<br /><span class="chapterToc" >18 <a 
+href="#x1-34900018" id="QQ2-1-373">Understanding the DUCC logs</a></span>
+<br />&#x00A0;<span class="sectionToc" >18.1 <a 
+href="#x1-35000018.1" id="QQ2-1-374">Overview</a></span>
+<br />&#x00A0;<span class="sectionToc" >18.2 <a 
+href="#x1-35100018.2" id="QQ2-1-375">Resource Manager Log (rm.log)</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.2.1 <a 
+href="#x1-35200018.2.1" id="QQ2-1-376">Bootstrap Configuration</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.2.2 <a 
+href="#x1-35700018.2.2" id="QQ2-1-381">Node Arrival and Missed Heartbeats</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.2.3 <a 
+href="#x1-36000018.2.3" id="QQ2-1-384">Node Occupancy</a></span>
+                                                                                                
+                                                                                                
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.2.4 <a 
+href="#x1-36100018.2.4" id="QQ2-1-385">Job Arrival and Status Updates</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.2.5 <a 
+href="#x1-36400018.2.5" id="QQ2-1-388">Calculation Of Job Caps</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.2.6 <a 
+href="#x1-36500018.2.6" id="QQ2-1-389">The &#8220;how much&#8221; calculations</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.2.7 <a 
+href="#x1-36600018.2.7" id="QQ2-1-390">The &#8220;what of&#8221; calculations</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.2.8 <a 
+href="#x1-36700018.2.8" id="QQ2-1-391">Defragmentation</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.2.9 <a 
+href="#x1-36800018.2.9" id="QQ2-1-392">Published Schedule</a></span>
+<br />&#x00A0;<span class="sectionToc" >18.3 <a 
+href="#x1-37100018.3" id="QQ2-1-395">Service Manager Log (sm.log)</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.3.1 <a 
+href="#x1-37200018.3.1" id="QQ2-1-396">Bootstrap configuration</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.3.2 <a 
+href="#x1-37700018.3.2" id="QQ2-1-401">Receipt and analysis of Orchestrator State</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.3.3 <a 
+href="#x1-37800018.3.3" id="QQ2-1-402">CLI Requests</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.3.4 <a 
+href="#x1-37900018.3.4" id="QQ2-1-403">Dispatching / Startup of Service Instances</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.3.5 <a 
+href="#x1-38000018.3.5" id="QQ2-1-404">Progression of Service State</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.3.6 <a 
+href="#x1-38100018.3.6" id="QQ2-1-405">Starting and Logging Pingers</a></span>
+<br />&#x00A0;&#x00A0;<span class="subsectionToc" >18.3.7 <a 
+href="#x1-38200018.3.7" id="QQ2-1-406">Publishing State</a></span>
+<br />&#x00A0;<span class="sectionToc" >18.4 <a 
+href="#x1-38300018.4" id="QQ2-1-407"> (Orchestrator Log or.log)</a></span>
+<br />&#x00A0;<span class="sectionToc" >18.5 <a 
+href="#x1-38400018.5" id="QQ2-1-408">Process Manager Log (pm.log)</a></span>
+<br />&#x00A0;<span class="sectionToc" >18.6 <a 
+href="#x1-38500018.6" id="QQ2-1-409">Agent log Log (hostname.agent.log)</a></span>
+</div>
+                                                                                                
+                                                                                                
+<h2 class="likechapterHead"><a 
+ id="x1-4000"></a>List of Figures</h2><div class="tableofcontents"><span class="lofToc" >1.1&#x00A0;<a 
+href="#x1-10001r1">Standard UIMA Pipeline</a></span><br /><span class="lofToc" >1.2&#x00A0;<a 
+href="#x1-11001r2">UIMA Pipeline As Scaled by
+UIMA-AS</a></span><br /><span class="lofToc" >1.3&#x00A0;<a 
+href="#x1-12001r3">UIMA Pipeline As Automatically Scaled Out By DUCC</a></span><br /><span class="lofToc" >1.4&#x00A0;<a 
+href="#x1-13001r4">UIMA Pipeline
+With User-Supplied DD as Automatically Scaled Out By DUCC</a></span><br /><span class="lofToc" >5.1&#x00A0;<a 
+href="#x1-92001r1">Sample UIMA-AS
+Service Pinger</a></span><br /><span class="lofToc" >8.1&#x00A0;<a 
+href="#x1-137001r1">Sample Webserver Page</a></span><br /><span class="lofToc" >8.2&#x00A0;<a 
+href="#x1-141001r2">Preferences Page</a></span><br /><span class="lofToc" >8.3&#x00A0;<a 
+href="#x1-143001r3">Jobs Page</a></span><br /><span class="lofToc" >8.4&#x00A0;<a 
+href="#x1-145004r4">Processes
+Tab</a></span><br /><span class="lofToc" >8.5&#x00A0;<a 
+href="#x1-146001r5">Work Items Tab</a></span><br /><span class="lofToc" >8.6&#x00A0;<a 
+href="#x1-147001r6">Performance Tab</a></span><br /><span class="lofToc" >8.7&#x00A0;<a 
+href="#x1-148001r7">Specification Tab</a></span><br /><span class="lofToc" >8.8&#x00A0;<a 
+href="#x1-150001r8">Reservations
+Page</a></span><br /><span class="lofToc" >8.9&#x00A0;<a 
+href="#x1-167001r9">Visualization</a></span><br /><span class="lofToc" >10.1&#x00A0;<a 
+href="#x1-191001r1">OpenNLP Process Measurements</a></span><br /><span class="lofToc" >10.2&#x00A0;<a 
+href="#x1-191002r2">OpenNLP
+Process Breakdown</a></span><br /><span class="lofToc" >11.1&#x00A0;<a 
+href="#x1-196001r1">CAS Input Processing Performance</a></span><br /><span class="lofToc" >13.1&#x00A0;<a 
+href="#x1-239004r1">Nodepool
+Example</a></span><br /><span class="lofToc" >13.2&#x00A0;<a 
+href="#x1-239007r2">Nodepools: Overlapping Pools are Incorrect</a></span><br /><span class="lofToc" >13.3&#x00A0;<a 
+href="#x1-239008r3">Nodepools: Multiple
+top-level Nodepools</a></span><br /><span class="lofToc" >13.4&#x00A0;<a 
+href="#x1-241007r4">Sample Nodepool Configuration</a></span><br /><span class="lofToc" >13.5&#x00A0;<a 
+href="#x1-242001r5">Sample Class
+Configuration</a></span><br /><span class="lofToc" >13.6&#x00A0;<a 
+href="#x1-246001r6">Sample Node Configuration</a></span><br /><span class="lofToc" >13.7&#x00A0;<a 
+href="#x1-247001r7">Sample User Registration</a></span><br />
+</div>
+                                                                                                
+                                                                                                
+                                                                                                
+                                                                                                
+<!--l. 81--><p class="noindent" >
+                                                                                                
+                                                                                                
+<h1 class="partHead"><span class="titlemark">Part&#x00A0;I<br /></span><a 
+ id="x1-5000I"></a>DUCC Concepts</h1>
+<!--l. 22--><p class="noindent" ><a name='DUCC_OVERVIEW'></a>
+                                                                                                
+                                                                                                
+<h2 class="chapterHead"><span class="titlemark">Chapter&#x00A0;1</span><br /><a 
+ id="x1-60001"></a>DUCC Overview</h2>
+<h3 class="sectionHead"><span class="titlemark">1.1   </span> <a 
+ id="x1-70001.1"></a>What is DUCC?</h3>
+<!--l. 28--><p class="noindent" >DUCC stands for Distributed UIMA Cluster Computing. DUCC is a cluster management system providing
+tooling, management, and scheduling facilities to automate the scale-out of applications written to the UIMA
+framework.
+<!--l. 32--><p class="noindent" >Core UIMA provides a generalized framework for applications that process unstructured information such as human
+language, but does not provide a scale-out mechanism. UIMA-AS provides a scale-out mechanism to distribute UIMA
+pipelines over a cluster of computing resources, but does not provide job or cluster management of the resources.
+DUCC defines a formal job model that closely maps to a standard UIMA pipeline. Around this job model
+DUCC provides cluster management services to automate the scale-out of UIMA pipelines over computing
+clusters.
+<!--l. 39--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">1.2   </span> <a 
+ id="x1-80001.2"></a>DUCC Job Model</h3>
+<!--l. 41--><p class="noindent" >The Job Model defines the steps necessary to scale-up a UIMA pipeline using DUCC. The goal of DUCC is to
+scale-up any UIMA pipeline, including pipelines that must be deployed across multiple machines using shared
+services.
+<!--l. 45--><p class="noindent" >The DUCC Job model consists of standard UIMA components: a Collection Reader (CR), a CAS Multiplier (CM),
+application logic as implemented one or more Analysis Engines (AE), and a CAS Consumer (CC).
+<!--l. 49--><p class="noindent" >The Collection Reader builds input CASs and forwards them to the UIMA pipelines. In the DUCC model, the CR is run in a
+process separate from the rest of the pipeline. In fact, in all but the smallest clusters it is run on a different physical machine
+than the rest of the pipeline. To achieve scalability, the CR must create very small CASs that do not contain application
+data, but which contain references to data; for instance, file names. Ideally, the CR should be runnable in a process
+not much larger than the smallest Java virtual machine. Later sections demonstrate methods for achieving
+this.
+<!--l. 57--><p class="noindent" >Each pipeline must contain at least one CAS Multiplier which receives the CASs from the CR. The CMs encapsulate the
+knowledge of how to receive the data references in the small CASs received from the CRs and deliver the referenced data to
+the application pipeline. DUCC packages the CM, AE(s), and CC into a single process, multiple instances of which are then
+deployed over the cluster.
+<!--l. 63--><p class="noindent" >A DUCC job therefore consists of a small specification containing the following items:
+     <ul class="itemize1">
+     <li class="itemize">The name of a resource containing the CR descriptor.
+     </li>
+     <li class="itemize">The name of a resource containing the CM descriptor.
+     </li>
+     <li class="itemize">The name of a resource containing the AE descriptor.
+     </li>
+     <li class="itemize">The name of a resource containing the CC descriptor.
+     </li>
+     <li class="itemize">Other information required to parameterize the above and identify the job such as log directory, working
+     directory, desired scale-out, classpath, etc. These are described in detail in subsequent sections.</li></ul>
+                                                                                                
+                                                                                                
+<!--l. 75--><p class="noindent" >On job submission, DUCC creates a single process executing the CR and one or more processes containing the analysis
+pipeline.
+<!--l. 78--><p class="noindent" >DUCC provides other facilities in support of scale-out:
+     <ul class="itemize1">
+     <li class="itemize">The ability to reserve all or part of a node in the cluster.
+     </li>
+     <li class="itemize">Automated management of services required in support of jobs.
+     </li>
+     <li class="itemize">The ability to schedule and execute arbitrary processes on nodes in the cluster.
+     </li>
+     <li class="itemize">Debugging tools and support.
+     </li>
+     <li class="itemize">A web server to display and manage work and cluster status.
+     </li>
+     <li class="itemize">A CLI and a Java API to support the above.</li></ul>
+<!--l. 89--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">1.3   </span> <a 
+ id="x1-90001.3"></a>DUCC From UIMA to Full Scale-out</h3>
+<!--l. 91--><p class="noindent" >In this section we demonstrate the progression of a simple UIMA pipeline to a fully scaled-out job running under
+DUCC.
+<!--l. 94--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-100001.3"></a><span 
+class="cmbx-10">UIMA Pipelines</span></span>
+A normal UIMA pipeline contains a Collection Reader (CR), one or more Analysis Engines (AE) connected in a pipeline, and
+a CAS Consumer (CC) as shown in <a 
+href="#x1-10001r1">Figure &#x00A0;1.1</a>.
+<!--l. 99--><p class="noindent" ><hr class="figure"><div class="figure" 
+>
+                                                                                                
+                                                                                                
+<a 
+ id="x1-10001r1"></a>
+                                                                                                
+                                                                                                
+
+<!--l. 101--><p class="noindent" ><img 
+src="images/uima-pipeline.jpg" alt="PIC"  
+>
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;1.1: </span><span  
+class="content">Standard UIMA Pipeline</span></div><!--tex4ht:label?: x1-10001r1 -->
+                                                                                                
+                                                                                                
+<!--l. 104--><p class="noindent" ></div><hr class="endfigure">
+<!--l. 106--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-110001.3"></a><span 
+class="cmbx-10">UIMA-AS Scaled Pipeline</span></span>
+With UIMA-AS the CR is separated into a discrete process and a CAS Multiplier (CM) is introduced into the pipeline as an
+interface between the CR and the pipeline, as shown in <a 
+href="#x1-11001r2">Figure &#x00A0;1.2</a> below. Multiple pipelines are serviced by the CR and are
+scaled-out over a computing cluster. The difficulty with this model is that each user is individually responsible for finding and
+scheduling computing nodes, installing communication software such as ActiveMQ, and generally managing the distributed
+job and associated hardware.
+<!--l. 116--><p class="noindent" ><hr class="figure"><div class="figure" 
+>
+                                                                                                
+                                                                                                
+<a 
+ id="x1-11001r2"></a>
+                                                                                                
+                                                                                                
+
+<!--l. 118--><p class="noindent" ><img 
+src="images/uima-as-pipeline.png" alt="PIC"  
+>
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;1.2: </span><span  
+class="content">UIMA Pipeline As Scaled by UIMA-AS</span></div><!--tex4ht:label?: x1-11001r2 -->
+                                                                                                
+                                                                                                
+<!--l. 121--><p class="noindent" ></div><hr class="endfigure">
+<!--l. 123--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-120001.3"></a><span 
+class="cmbx-10">UIMA Pipeline Scaled By DUCC</span></span>
+DUCC is a UIMA and UIMA-AS-aware cluster manager. To scale out work under DUCC the developer tells DUCC what
+the parts of the application are, and DUCC does the work to build the scale-out via UIMA/AS, to find and
+schedule resources, to deploy the parts of the application over the cluster, and to manage the jobs while it
+executes.
+<!--l. 129--><p class="noindent" >On job submission, the CR is wrapped with a DUCC main class and launched as a Job Driver (or JD). The DUCC main
+class establishes communication with other DUCC components and instantiates the CR. If the CR initializes
+successfully, and indicates that there are greater than 0 work items to process, the specified CM, AE and CC
+components are assembled into an aggregate, wrapped with a DUCC main class, and launched as a Job Process (or
+JP).
+<!--l. 135--><p class="noindent" >The JP will replicate the aggregate as many times as specified, each aggregate instance running in a single thread. When the
+aggregate initializes, and whenever an aggregate thread needs work, the JP wrapper will fetch the next work item from the
+JD, as shown in <a 
+href="#x1-12001r3">Figure &#x00A0;1.3</a> below.
+<!--l. 140--><p class="noindent" ><hr class="figure"><div class="figure" 
+>
+                                                                                                
+                                                                                                
+<a 
+ id="x1-12001r3"></a>
+                                                                                                
+                                                                                                
+
+<!--l. 142--><p class="noindent" ><img 
+src="images/ducc-sequential.png" alt="PIC"  
+>
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;1.3: </span><span  
+class="content">UIMA Pipeline As Automatically Scaled Out By DUCC</span></div><!--tex4ht:label?: x1-12001r3 -->
+                                                                                                
+                                                                                                
+<!--l. 145--><p class="noindent" ></div><hr class="endfigure">
+<!--l. 147--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-130001.3"></a><span 
+class="cmbx-10">UIMA Pipeline with User-Supplied DD Scaled By DUCC</span></span>
+Application programmers may supply their own Deployment Descriptors to control intra-process threading and scale-out. If a
+DD is specified in the job parameters, DUCC will launch each JP with the specified UIMA-AS service instantiated in-process,
+as depicted in <a 
+href="#x1-13001r4">Figure &#x00A0;1.4</a> below. In this case the user can still specify how many work items to deliver to the service
+concurrently.
+<!--l. 155--><p class="noindent" ><hr class="figure"><div class="figure" 
+>
+                                                                                                
+                                                                                                
+<a 
+ id="x1-13001r4"></a>
+                                                                                                
+                                                                                                
+
+<!--l. 157--><p class="noindent" ><img 
+src="images/ducc-parallel.png" alt="PIC"  
+>
+<br /> <div class="caption" 
+><span class="id">Figure&#x00A0;1.4: </span><span  
+class="content">UIMA Pipeline With User-Supplied DD as Automatically Scaled Out By DUCC</span></div><!--tex4ht:label?: x1-13001r4 -->
+                                                                                                
+                                                                                                
+<!--l. 160--><p class="noindent" ></div><hr class="endfigure">
+<h3 class="sectionHead"><span class="titlemark">1.4   </span> <a 
+ id="x1-140001.4"></a>Error Management </h3>
+<!--l. 164--><p class="noindent" >DUCC provides a number of facilities to assist error management:
+     <ul class="itemize1">
+     <li class="itemize">DUCC captures exceptions in the JPs and delivers them to the Job Drivers. The JD wrappers implement logic
+     to enforce error thresholds, to identify and log errors, and to reflect job problems in the DUCC Web Server.
+     Error thresholds are configurable both globally and on a per-job basis.
+     </li>
+     <li class="itemize">Error and timeout thresholds are implemented for both the initialization phase of a pipeline and the execution
+     phase.
+     </li>
+     <li class="itemize">Retry-after-error is supported: if a process has a failure on some CAS after initialization is successful, the
+     process is terminated and all affected CASs are retried, up to some configurable threshold.
+     </li>
+     <li class="itemize">To avoid disrupting existing workloads by a job that will fail to run, DUCC ensures that JD and JP processes
+     can successfully initialize before fully scaling out a job.
+     </li>
+     <li class="itemize">Various error conditions encountered while a job is running will prevent a problematic job from continuing
+     scale out, and can result in termination of the job.</li></ul>
+<!--l. 186--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">1.5   </span> <a 
+ id="x1-150001.5"></a>Cluster and Job Management</h3>
+<!--l. 187--><p class="noindent" >DUCC supports management of multiple jobs and multiple users in a distributed cluster:
+     <dl class="description"><dt class="description">
+<span 
+class="cmbx-10">Multiple User Support</span> </dt><dd 
+class="description">When properly configured, DUCC runs all work under the identity of the submitting
+     user. Logs are written with the user&#8217;s credentials into the user&#8217;s file space designated at job submission.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Fair-Share Scheduling</span> </dt><dd 
+class="description">DUCC provides a Fair-Share scheduler to equitably share resources among multiple users.
+     The scheduler also supports semi-permanent reservation of machines.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Service Management</span> </dt><dd 
+class="description">DUCC  provides  a  Service  Manager  capable  of  automatically  starting,  stopping,  and
+     otherwise managing and querying both UIMA-AS and non-UIMA-AS services in support of jobs.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Job Lifetime Management and Orchestration</span> </dt><dd 
+class="description">DUCC includes an Orchestrator to manage the lifetimes of all
+     entities in the system.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Node Sharing</span> </dt><dd 
+class="description">DUCC allocates processes for one or more users on a node, each with a specified amount of memory.
+     DUCC&#8217;s preferred mechanism for constraining memory use is Linux Control Groups, or CGroups. For nodes
+     that do not support CGroups, DUCC agents monitor RAM use and kill processes that exceed their share size
+     by a settable fudge factor.
+                                                                                                
+                                                                                                
+     </dd><dt class="description">
+<span 
+class="cmbx-10">DUCC Agents</span> </dt><dd 
+class="description">DUCC Agents manage each node&#8217;s local resources and all processes started by DUCC. Each node in a
+     cluster has exactly one Agent. The Agent
+         <ul class="itemize1">
+         <li class="itemize">Monitors and reports node capabilities (memory, etc) and performance data (CPU busy, swap, etc).
+         </li>
+         <li class="itemize">Starts, stops, and monitors all processes on behalf of users.
+         </li>
+         <li class="itemize">Patrols the node for &#8220;foreign&#8221; (non-DUCC) processes, reporting them to the Web Server, and optionally
+         reaping them.
+         </li>
+         <li class="itemize">Ensures  job  processes  do  not  exceed  their  declared  memory  requirements  through  the  use  of  Linux
+         CGroups.</li></ul>
+     </dd><dt class="description">
+<span 
+class="cmbx-10">DUCC Web server</span> </dt><dd 
+class="description">DUCC provides a web server displaying all aspects of the system:
+         <ul class="itemize1">
+         <li class="itemize">All jobs in the system, their current state, resource usage, etc.
+         </li>
+         <li class="itemize">All reserved resources and associated information (owner, etc.), including the ability to request and cancel
+         reservations.
+         </li>
+         <li class="itemize">All services, including the ability to start, stop, and modify service definitions.
+         </li>
+         <li class="itemize">All nodes in the system and their status, usage, etc.
+         </li>
+         <li class="itemize">The status of all DUCC management processes.
+         </li>
+         <li class="itemize">Access to documentation.</li></ul>
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Cluster Management Support</span> </dt><dd 
+class="description">DUCC provides system management support to:
+         <ul class="itemize1">
+         <li class="itemize">Start, stop, and query full DUCC systems.
+         </li>
+         <li class="itemize">Start, stop, and quiesce individual DUCC components.
+         </li>
+         <li class="itemize">Add and delete nodes from the DUCC system.
+         </li>
+         <li class="itemize">Discover DUCC processes (e.g. after partial failures).
+         </li>
+         <li class="itemize">Find and kill errant job processes belonging to individual users.
+         </li>
+         <li class="itemize">Monitor and display inter-DUCC messages.</li></ul>
+     </dd></dl>
+                                                                                                
+                                                                                                
+<!--l. 256--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">1.6   </span> <a 
+ id="x1-160001.6"></a>Security Measures</h3>
+<!--l. 257--><p class="noindent" >The following DUCC security measures are provided:
+     <dl class="description"><dt class="description">
+<span 
+class="cmbx-10">user credentials</span> </dt><dd 
+class="description">DUCC instantiates user processes using a setuid root executable named ducc_ling. See more at
+     <a 
+href="#x1-170001.6.1"><span 
+class="cmti-10">ducc</span><span 
+class="cmti-10">_ling</span></a>.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">command line interface</span> </dt><dd 
+class="description">The CLI employs HTTP to send requests to the DUCC controller. The CLI creates and
+     employs public and private security keys in the user&#8217;s home directory for authentication of HTTP requests.
+     The controller validates requests via these same security keys.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">webserver</span> </dt><dd 
+class="description">The webserver facilitates operational control and therefore authentication is desirable.
+         <ul class="itemize1">
+         <li class="itemize">Each user has the ability to control certain aspects of only his/her active submissions.
+         </li>
+         <li class="itemize">Each administrator has the ability to control certain aspects of any user&#8217;s active submissions, as well as
+         modification of some DUCC operational characteristics.</li></ul>
+     <!--l. 275--><p class="noindent" >A simple interface is provided so that an installation can plug-in a site specific authentication mechanism comprising
+     userid and password.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ActiveMQ</span> </dt><dd 
+class="description">DUCC uses ActiveMQ for administrative communication. AMQ authentication is used to prevent arbitrary
+     processes from participating.</dd></dl>
+<!--l. 282--><p class="noindent" >
+<h4 class="subsectionHead"><span class="titlemark">1.6.1   </span> <a 
+ id="x1-170001.6.1"></a>ducc_ling</h4>
+<!--l. 284--><p class="noindent" >ducc_ling contains the following functions, which the security-conscious may verify by examining the source in
+$DUCC_HOME/duccling. All sensitive operations are performed only AFTER switching userids, to prevent unauthorized
+root access to the system.
+     <ul class="itemize1">
+     <li class="itemize">Changes it&#8217;s real and effective userid to that of the user invoking the job.
+     </li>
+     <li class="itemize">Optionally redirects its stdout and stderr to the DUCC log for the current job.
+     </li>
+     <li class="itemize">Optionally redirects its stdio to a port set by the CLI, when a job is submitted.
+     </li>
+     <li class="itemize">&#8220;Nice&#8221;s itself to a &#8220;worse&#8221; priority than the default, to reduce the chances that a runaway DUCC job could
+     monopolize a system.
+     </li>
+     <li class="itemize">Optionally sets user limits.
+     </li>
+     <li class="itemize">Prints the effective limits for a job to both the user&#8217;s log, and the DUCC agent&#8217;s log.
+     </li>
+     <li class="itemize">Changes to the user&#8217;s working directory, as specified by the job.
+                                                                                                
+                                                                                                
+     </li>
+     <li class="itemize">Optionally establishes LD_LIBRARY_PATH for the job from the environment variable <span 
+class="cmtt-10">DUCC</span><span 
+class="cmtt-10">_LD</span><span 
+class="cmtt-10">_LIBRARY</span><span 
+class="cmtt-10">_PATH</span>
+     if set in the DUCC job specification. (Secure Linux systems will prevent LD_LIBRARY_PATH from being set
+     by a program with root authority, so this is done AFTER changing userids).
+     </li>
+     <li class="itemize">ONLY user <span 
+class="cmti-10">ducc </span>may use the ducc_ling program in a privileged way. Ducc_ling contains checks to prevent even
+     user <span 
+class="cmti-10">root </span>from using it for privileged operations.
+     </li></ul>
+<!--l. 309--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">1.7   </span> <a 
+ id="x1-180001.7"></a>Security Issues</h3>
+<!--l. 310--><p class="noindent" >The following DUCC security issues should be considered:
+     <dl class="description"><dt class="description">
+<span 
+class="cmbx-10">submit transmission &#8217;sniffed&#8217;</span> </dt><dd 
+class="description">In  the  event  that  the  DUCC  submit  command  is  &#8217;sniffed&#8217;  then  the  user
+     authentication mechanism is compromised and user masquerading is possible. That is, the userid encryption
+     mechanism can be exploited such that user A can submit a job pretending to be user B.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">user </span><span 
+class="cmbxti-10">ducc </span><span 
+class="cmbx-10">password compromised</span> </dt><dd 
+class="description">In  the  event  that  the  <span 
+class="cmti-10">ducc  </span>user  password  is  compromised  then  the  root
+     privileged command <span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_ling </span>can be used to become any other user except root.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">user </span><span 
+class="cmbxti-10">root </span><span 
+class="cmbx-10">password compromised</span> </dt><dd 
+class="description">In the event that the <span 
+class="cmti-10">root </span>user password is compromised DUCC provides no
+     protection. That is, compromising the root user is equivalent to compromising the DUCC user password.</dd></dl>
+<!--l. 22--><p class="noindent" ><a name='DUCC_TERMINOLOGY'></a>
+                                                                                                
+                                                                                                
+<h2 class="chapterHead"><span class="titlemark">Chapter&#x00A0;2</span><br /><a 
+ id="x1-190002"></a>Glossary</h2>
+     <dl class="description"><dt class="description">
+<span 
+class="cmbx-10">Agent</span> </dt><dd 
+class="description">DUCC Agent processes run on every node in the system. The Agent receives orders to start and stop processes
+     on each node. Agents monitors nodes, sending heartbeat packets with node statistics to interested components
+     (such as the RM and web-server). If CGroups are installed in the cluster, the Agent is responsible for managing
+     the CGroups for each job process. All processes other than the DUCC management processes are are managed
+     as children of the agents.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Autostarted Service</span> </dt><dd 
+class="description">An autostarted service is a registered service that is started automatically by DUCC when
+     the DUCC system is booted.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Dependent Service or Job</span> </dt><dd 
+class="description">A dependent service or job is a service or job that specifies one or more service
+     dependencies in their job specification. The service or job is dependent upon the referenced service being
+     operational before being started by DUCC.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">DUCC</span> </dt><dd 
+class="description">Distributed UIMA Cluster Computing.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">DUCC-MON</span> </dt><dd 
+class="description">DUCC-MON is the DUCC web-server.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Job</span> </dt><dd 
+class="description">A DUCC job consists of the components required to deploy and execute a UIMA pipeline over a computing
+     cluster. It consists of a JD to run the Collection Reader, a set of JPs to run the UIMA AEs, and a Job
+     Specification to describe how the parts fit together.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Job Driver (JD)</span> </dt><dd 
+class="description">The Job Driver is a thin wrapper that encapsulates a Job&#8217;s Collection Reader. The JD executes
+     as a process that is scheduled and deployed by DUCC.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Job Process (JP)</span> </dt><dd 
+class="description">The Job Process is a thin wrapper that encapsulates a job&#8217;s pipeline components. The JP
+     executes in a process that is scheduled and deployed by DUCC.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Job Specification</span> </dt><dd 
+class="description">The  Job  Specification  is  a  collection  of  properties  that  describe  work  to  be  scheduled  and
+     deployed by DUCC. It identifies the UIMA components (CR, AE, etc) that comprise the job and the system-wide
+     properties of the job (CLASSPATHs, RAM requirements, etc).
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Machine</span> </dt><dd 
+class="description">A physical computing resource managed by the DUCC Resource Manager.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Node</span> </dt><dd 
+class="description">See Machine.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Orchestrator (OR)</span> </dt><dd 
+class="description">The Orchestrator manages the life cycle of all entities within DUCC.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Process</span> </dt><dd 
+class="description">A process is one physical process executing on a machine in the DUCC cluster. DUCC jobs are comprised
+     of one or more processes (JDs and JPs). Each process is assigned one or more <span 
+class="cmti-10">shares </span>by the DUCC scheduler.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Process Manager (PM)</span>  </dt><dd 
+class="description">The Process Manager coordinates distribution of work among the Agents.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Registered Service</span> </dt><dd 
+class="description">A  registered  service  is  a  service  that  is  registered  with  DUCC.  DUCC  saves  the  service
+     specification and fully manages the service, insuring it is running when needed, and shutdown when not.
+     </dd><dt class="description">
+                                                                                                
+                                                                                                
+<span 
+class="cmbx-10">Resource Manager (RM)</span>  </dt><dd 
+class="description">The Resource Manager schedules physical resources for DUCC work.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Service Endpoint</span> </dt><dd 
+class="description">In DUCC, the service endpoint provides a unique identifier for a service. In the case of UIMA-AS
+     services, the endpoint also serves as a well-known address for contacting the service.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Service Instance</span> </dt><dd 
+class="description">A service instance is one physical process which runs a CUSTOM or UIMA-AS service. UIMA-AS
+     services are usually scaled-out with multiple instances implementing the same underlying service logic.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Service Manager (SM)</span> </dt><dd 
+class="description">The Service Manager manages the life-cycles of UIMA-AS and CUSTOM services. It
+     coordinates registration of services, starting and stopping of services, and ensures that services are available
+     and remain available for the lifetime of the jobs.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Share Quantum</span> </dt><dd 
+class="description">The DUCC scheduler abstracts the nodes in the cluster as a single large conglomerate of resources:
+     memory, processor cores, etc. The scheduler logically decomposes the collection of resources into some number
+     of equal-sized atomic units. Each unit of work requiring resources is apportioned one or more of these atomic
+     units. The smallest possible atomic unit is called the <span 
+class="cmti-10">share quantum</span>, or simply, <span 
+class="cmti-10">share</span>.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Weighted Fair Share</span> </dt><dd 
+class="description">A weighted fair share calculation is used to apportion resources equitably to the outstanding
+     work in the system. In a non-weighted fair-share system, all work requests are given equal consideration to all
+     resources. To provide some (&#8220;more important&#8221;) work more than equal resources, weights are used to bias the
+     allotment of shares in favor of some classes of work.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Work Items</span> </dt><dd 
+class="description">A DUCC work item is one unit of work to be completed in a single DUCC process. It is usually
+     initiated by the submission of a single CAS from the JD to one of the JPs. It could be thought of as a single
+     &#8220;question&#8221; to be answered by a UIMA analytic, or a single &#8220;task&#8221; to complete. Usually each DUCC JP executes
+     many work items per job.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">$DUCC</span><span 
+class="cmbx-10">_HOME</span> </dt><dd 
+class="description">The root of the installed DUCC runtime, e.g. /home/ducc/ducc_runtime. It need not be set in
+     the environment, although the examples in this document assume that it has been.
+     </dd></dl>
+                                                                                                
+                                                                                                
+<!--l. 85--><p class="noindent" >
+                                                                                                
+                                                                                                
+<h1 class="partHead"><span class="titlemark">Part&#x00A0;II<br /></span><a 
+ id="x1-20000II"></a>Ducc Users Guide</h1>
+<!--l. 23--><p class="noindent" ><a name='DUCC_CLI'></a>
+                                                                                                
+                                                                                                
+<h2 class="chapterHead"><span class="titlemark">Chapter&#x00A0;3</span><br /><a 
+ id="x1-210003"></a>Command Line Interface</h2>
+<!--l. 28--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-220003"></a><span 
+class="cmbx-10">Overview</span></span>
+The DUCC CLI is the primary means of communication with DUCC. Work is submitted, work is canceled, work is
+monitored, and work is queried with this interface.
+<!--l. 32--><p class="noindent" >All parameters may be passed to all the CLI commands in the form of Unix-like &#8220;long-form&#8221; (key, value) pairs, in which the
+key is proceeded by the characters &#8220;<span 
+class="cmsy-10">--</span>&#8221;. As well, the parameters may be saved in a standard Java Properties file, without
+the leading &#8220;<span 
+class="cmsy-10">--</span>&#8221; characters. Both a properties file and command-line parameters may be passed to each CLI.
+When both are present, the parameters on the command line take precedence. Take, for example the following
+simple job properties file, call it <span 
+class="cmtt-10">1.job</span>, where the environment variable &#8220;DH&#8221; has been set to the location of
+$DUCC_HOME.
+                                                                                                
+                                                                                                
+<div class="verbatim" id="verbatim-1">
+description&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;Test&#x00A0;job&#x00A0;1
+&#x00A0;<br />
+&#x00A0;<br />classpath&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;${DH}/lib/uima-ducc/examples/*
+&#x00A0;<br />environment&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;AE_INIT_TIME=5&#x00A0;AE_INIT_RANGE=5&#x00A0;LD_LIBRARY_PATH=/a/nother/path
+&#x00A0;<br />scheduling_class&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;normal
+&#x00A0;<br />
+&#x00A0;<br />driver_descriptor_CR&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;org.apache.uima.ducc.test.randomsleep.FixedSleepCR
+&#x00A0;<br />driver_descriptor_CR_overrides&#x00A0;jobfile=${DH}/lib/examples/simple/1.inputs&#x00A0;compression=10
+&#x00A0;<br />error_rate=0.0
+&#x00A0;<br />
+&#x00A0;<br />driver_jvm_args&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;-Xmx500M
+&#x00A0;<br />
+&#x00A0;<br />process_descriptor_AE&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;org.apache.uima.ducc.test.randomsleep.FixedSleepAE
+&#x00A0;<br />process_memory_size&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;4
+&#x00A0;<br />process_jvm_args&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;-Xmx100M
+&#x00A0;<br />process_pipeline_count&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;2
+&#x00A0;<br />process_per_item_time_max&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;5
+&#x00A0;<br />process_deployments_max&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;999
+&#x00A0;<br /></div>
+<!--l. 59--><p class="nopar" >
+<!--l. 61--><p class="noindent" >This can be submitted, overriding the scheduling class and memory, thus:
+                                                                                                
+                                                                                                
+<div class="verbatim" id="verbatim-2">
+ducc_submit&#x00A0;--specification&#x00A0;1.job&#x00A0;--process_memory_size&#x00A0;16&#x00A0;--scheduling_class&#x00A0;high</div>
+<!--l. 64--><p class="nopar" >
+<!--l. 66--><p class="noindent" >The DUCC CLI parameters are now described in detail.
+<!--l. 68--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">3.1   </span> <a 
+ id="x1-230003.1"></a>The DUCC Job Descriptor</h3>
+<!--l. 69--><p class="noindent" >The DUCC Job Descriptor includes properties to enable automated management and scale-out over large computing clusters.
+The job descriptor includes
+     <ul class="itemize1">
+     <li class="itemize">References to the various UIMA components required by the job (CR, CM, AE, CC, and maybe DD)
+     </li>
+     <li class="itemize">Scale-out requirements: number of processes, number of threads per process, etc
+     </li>
+     <li class="itemize">Environment requirements: log directory, working directory, environment variables, etc,
+     </li>
+     <li class="itemize">JVM parameters
+     </li>
+     <li class="itemize">Scheduling class
+     </li>
+     <li class="itemize">Error-handling preferences: acceptable failure counts, timeouts, etc
+     </li>
+     <li class="itemize">Debugging and monitoring requirements and preferences</li></ul>
+<!--l. 81--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">3.2   </span> <a 
+ id="x1-240003.2"></a>Operating System Limit Support</h3>
+<!--l. 82--><p class="noindent" >The CLI supports specification of operating system limits applied to the various job processes. To specify a limit, pass the
+name of the limit and its value in the <span 
+class="cmti-10">environment </span>specified in the job. Limits are named with the string
+&#8220;DUCC_RLIMIT_name&#8221; where &#8220;name&#8221; is the name of a specific limit. Supported limits include:
+     <ul class="itemize1">
+     <li class="itemize">DUCC_RLIMIT_CORE
+     </li>
+     <li class="itemize">DUCC_RLIMIT_CPU
+     </li>
+     <li class="itemize">DUCC_RLIMIT_DATA
+     </li>
+     <li class="itemize">DUCC_RLIMIT_FSIZE
+                                                                                                
+                                                                                                
+     </li>
+     <li class="itemize">DUCC_RLIMIT_MEMLOCK
+     </li>
+     <li class="itemize">DUCC_RLIMIT_NOFILE
+     </li>
+     <li class="itemize">DUCC_RLIMIT_NPROC
+     </li>
+     <li class="itemize">DUCC_RLIMIT_RSS
+     </li>
+     <li class="itemize">DUCC_RLIMIT_STACK
+     </li>
+     <li class="itemize">DUCC_RLIMIT_AS
+     </li>
+     <li class="itemize">DUCC_RLIMIT_LOCKS
+     </li>
+     <li class="itemize">DUCC_RLIMIT_SIGPENDING
+     </li>
+     <li class="itemize">DUCC_RLIMIT_MSGQUEUE
+     </li>
+     <li class="itemize">DUCC_RLIMIT_NICE
+     </li>
+     <li class="itemize">DUCC_RLIMIT_STACK
+     </li>
+     <li class="itemize">DUCC_RLIMIT_RTPRIO</li></ul>
+<!--l. 104--><p class="noindent" >See the Linux documentation for details on the meanings of these limits and their values.
+<!--l. 106--><p class="noindent" >For example, to set the maximum number of open files allowed in any job process, specify an environment similar to this
+when submitting the job:
+                                                                                                
+                                                                                                
+<div class="verbatim" id="verbatim-3">
+&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;ducc_submit&#x00A0;....&#x00A0;--environment="DUCC_RLIMIT_NOFILE=1024"&#x00A0;...</div>
+<!--l. 110--><p class="nopar" >
+<!--l. 112--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">3.3   </span> <a 
+ id="x1-250003.3"></a>Command Line Forms</h3>
+<!--l. 113--><p class="noindent" >The Command Line Interface is provided in several forms:
+<!--l. 116--><p class="noindent" >
+     <dl class="enumerate-enumitem"><dt class="enumerate-enumitem">
+  1. </dt><dd 
+class="enumerate-enumitem">A wrapper script around the uima-ducc-cli.jar.
+     </dd><dt class="enumerate-enumitem">
+  2. </dt><dd 
+class="enumerate-enumitem">Direct invocation of each command&#8217;s <span 
+class="cmtt-10">class </span>with the <span 
+class="cmtt-10">java </span>command.</dd></dl>
+<!--l. 120--><p class="noindent" >When using the scripts the full execution environment is established silently. When invoking a command&#8217;s <span 
+class="cmtt-10">class </span>directly, the
+java <span 
+class="cmtt-10">CLASSPATH </span>must include the uima-ducc-cli.jar, as illustrated in the wrapper scripts.
+<!--l. 124--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">3.4   </span> <a 
+ id="x1-260003.4"></a>DUCC Commands</h3>
+<!--l. 125--><p class="noindent" >The following commands are provided:
+     <dl class="description"><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_submit</span> </dt><dd 
+class="description">Submit a job for execution.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_cancel</span> </dt><dd 
+class="description">Cancel a job in progress.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_reserve</span> </dt><dd 
+class="description">Request a reservation of a machine.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_unreserve</span> </dt><dd 
+class="description">Cancel a reservation.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_monitor</span> </dt><dd 
+class="description">Monitor the progress of a job that is already submitted.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_process</span><span 
+class="cmbx-10">_submit</span> </dt><dd 
+class="description">Submit an arbitrary process (managed reservation) for execution.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_process</span><span 
+class="cmbx-10">_cancel</span> </dt><dd 
+class="description">Cancel an arbitrary process.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">ducc</span><span 
+class="cmbx-10">_services</span> </dt><dd 
+class="description">Register, unregister, start, stop, modify, disable, enable, ignore references, observe references, and
+     query a service.
+     </dd><dt class="description">
+<span 
+class="cmbx-10">viaducc</span> </dt><dd 
+class="description">This is a script wrapper to facilitate execution of Eclipse workspaces as DUCC jobs as well as general
+     execution of arbitrary processes in DUCC-managed resources.</dd></dl>
+                                                                                                
+                                                                                                
+<!--l. 140--><p class="noindent" >The next section describes these commands in detail.
+<!--l. 22--><p class="noindent" ><a name='DUCC_CLI_SUBMIT'></a>
+<!--l. 25--><p class="noindent" >
+<h3 class="sectionHead"><span class="titlemark">3.5   </span> <a 
+ id="x1-270003.5"></a>ducc_submit</h3>
+<!--l. 28--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-280003.5"></a><span 
+class="cmbx-10">Description:</span></span>
+The submit CLI is used to submit work for execution by DUCC. DUCC assigns a unique id to the job and schedules it for
+execution. The submitter may optionally request that the progress of the job is monitored, in which case the state of the job
+as it progresses through its lifetime is printed on the console.
+<!--l. 33--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-290003.5"></a><span 
+class="cmbx-10">Usage:</span></span>
+     <dl class="description"><dt class="description">
+<span 
+class="cmbx-10">Script wrapper</span> </dt><dd 
+class="description">$DUCC_HOME/bin/ducc_submit <span 
+class="cmti-10">options</span>
+     </dd><dt class="description">
+<span 
+class="cmbx-10">Java Main</span> </dt><dd 
+class="description">java -cp $DUCC_HOME/lib/uima-ducc-cli.jar org.apache.uima.ducc.cli.DuccJobSubmit <span 
+class="cmti-10">options</span></dd></dl>
+<!--l. 39--><p class="noindent" ><span class="paragraphHead"><a 
+ id="x1-300003.5"></a><span 
+class="cmbx-10">Options:</span></span>
+     <dl class="description"><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">all</span><span 
+class="cmbx-10">_in</span><span 
+class="cmbx-10">_one </span><span 
+class="cmmi-10">&#x003C;</span><span 
+class="cmbx-10">local </span><span 
+class="cmsy-10">| </span><span 
+class="cmbx-10">remote </span><span 
+class="cmmi-10">&#x003E;</span> </dt><dd 
+class="description">Run driver and pipeline in single process. If <span 
+class="cmti-10">local </span>is specified, the process is
+     executed on the local machine, for example, in the current Eclipse session. If <span 
+class="cmti-10">remote </span>is specified, the jobs is
+     submitted to DUCC as a <span 
+class="cmti-10">managed reservation </span>and run on some (presumably larger) machine allocated by
+     DUCC.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">attach</span><span 
+class="cmbx-10">_console</span> </dt><dd 
+class="description">If specified, redirect remote stdout and stderr to the local submitting console.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">cancel</span><span 
+class="cmbx-10">_on</span><span 
+class="cmbx-10">_interrupt</span> </dt><dd 
+class="description">If  specified,  the  job  is  monitored  and  will  be  canceled  if  the  submit  command  is
+     interrupted, e.g. with CTRL-C. This option always implies <span 
+class="cmsy-10">--</span><span 
+class="cmti-10">wait</span><span 
+class="cmti-10">_for</span><span 
+class="cmti-10">_completion</span>.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">classpath [path-string]</span> </dt><dd 
+class="description">The CLASSPATH used for the job. If specified, this is used for both the Job Driver
+     and each Job Process. If not specified, the CLASSPATH of the process invoking this request is used.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">classpath</span><span 
+class="cmbx-10">_order [user-before-ducc </span><span 
+class="cmsy-10">| </span><span 
+class="cmbx-10">ducc-before-user]</span>  </dt><dd 
+class="description">OBSOLETE - ignored.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">debug</span> </dt><dd 
+class="description">Enable debugging messages. This is primarily for debugging DUCC itself.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">description [text]</span> </dt><dd 
+class="description">The text is any string used to describe the job. It is displayed in the Web Server. When
+     specified on a command-line the text usually must be surrounded by quotes to protect it from the shell. The
+     default is &#8220;none&#8221;.
+                                                                                                
+                                                                                                
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">driver</span><span 
+class="cmbx-10">_debug [debug-port]</span> </dt><dd 
+class="description">Append JVM debug flags to the JVM arguments to start the JobDriver in remote
+     debug mode. The remote process debugger will attempt to contact the specified port.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">driver</span><span 
+class="cmbx-10">_descriptor</span><span 
+class="cmbx-10">_CR [descriptor.xml]</span>  </dt><dd 
+class="description">This  is  the  XML  descriptor  for  the  Collection  Reader.  This
+     descriptor is a resource that is searched for in the filesystem or Java classpath as described in the &#x00A0;<a 
+href="#x1-310003.5">notes below</a>.
+     (Required)
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">driver</span><span 
+class="cmbx-10">_descriptor</span><span 
+class="cmbx-10">_CR</span><span 
+class="cmbx-10">_overrides [list]</span>  </dt><dd 
+class="description">This is the Job Driver collection reader configuration overrides. They are
+     specified as name/value pairs in a whitespace-delimited list. Example:
+                                                                                                
+                                                                                                
+     <div class="verbatim" id="verbatim-4">
+     &#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;--driver_descriptor_CR_overrides&#x00A0;name1=value1&#x00A0;name2=value2...
+     &#x00A0;<br />&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;&#x00A0;</div>
+     <!--l. 83--><p class="nopar" >
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">driver</span><span 
+class="cmbx-10">_exception</span><span 
+class="cmbx-10">_handler [classname]</span> </dt><dd 
+class="description">This specifies a developer-supplied exception handler for the Job Driver. It
+     must implement <span 
+class="cmti-10">org.apache.uima.ducc.IErrorHandler </span>or extend <span 
+class="cmti-10">org.apache.uima.ducc.ErrorHandler</span>. A built-in default
+     exception handler is provided.
+     </dd><dt class="description">
+<span 
+class="cmsy-10">--</span><span 
+class="cmbx-10">driver</span><span 
+class="cmbx-10">_exception</span><span 
+class="cmbx-10">_handler</span><span 
+class="cmbx-10">_arguments [argument-string]</span> </dt><dd 
+class="description">This is a string containing arguments for the exception
+     handler. The contents of the string is entirely a function of the specified exception handler. If not specified, a <span 
+class="cmti-10">null </span>is
+     passed in. <br 
+class="newline" />The built-in default exception handler supports an argument string of the following form (with NO embedded
+     blanks):

[... 14007 lines stripped ...]