You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by re...@apache.org on 2013/03/31 16:56:50 UTC

svn commit: r1462983 - in /uima/sandbox/uimafit/trunk: ./ uimafit-docbook/ uimafit-docbook/src/ uimafit-docbook/src/docbook/

Author: rec
Date: Sun Mar 31 14:56:50 2013
New Revision: 1462983

URL: http://svn.apache.org/r1462983
Log:
[UIMA-2787] Migrate documentation to docbook

Added:
    uima/sandbox/uimafit/trunk/uimafit-docbook/   (with props)
    uima/sandbox/uimafit/trunk/uimafit-docbook/pom.xml   (with props)
    uima/sandbox/uimafit/trunk/uimafit-docbook/src/
    uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/
    uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/common_book_info.xml   (with props)
    uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.book.xml   (with props)
    uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.configurationparameters.xml   (with props)
    uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.experiments.xml   (with props)
    uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.externalresources.xml   (with props)
    uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.gettingstarted.xml   (with props)
    uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.introduction.xml   (with props)
    uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.pipelines.xml   (with props)
    uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.testing.xml   (with props)
    uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.typesystem.xml   (with props)
Modified:
    uima/sandbox/uimafit/trunk/pom.xml

Modified: uima/sandbox/uimafit/trunk/pom.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/pom.xml?rev=1462983&r1=1462982&r2=1462983&view=diff
==============================================================================
--- uima/sandbox/uimafit/trunk/pom.xml (original)
+++ uima/sandbox/uimafit/trunk/pom.xml Sun Mar 31 14:56:50 2013
@@ -520,6 +520,7 @@
 		<module>uimafit-spring</module>
 		<module>uimafit-maven-plugin</module>
 		<module>uimafit-legacy-support</module>
+		<module>uimafit-docbook</module>
 	</modules>
 	<inceptionYear>2012</inceptionYear>
 </project>
\ No newline at end of file

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Sun Mar 31 14:56:50 2013
@@ -0,0 +1,3 @@
+.settings
+target
+.project

Added: uima/sandbox/uimafit/trunk/uimafit-docbook/pom.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/uimafit-docbook/pom.xml?rev=1462983&view=auto
==============================================================================
--- uima/sandbox/uimafit/trunk/uimafit-docbook/pom.xml (added)
+++ uima/sandbox/uimafit/trunk/uimafit-docbook/pom.xml Sun Mar 31 14:56:50 2013
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+	Licensed to the Apache Software Foundation (ASF) under one
+	or more contributor license agreements. See the NOTICE file
+	distributed with this work for additional information
+	regarding copyright ownership. The ASF licenses this file
+	to you under the Apache License, Version 2.0 (the
+	"License"); you may not use this file except in compliance
+	with the License. You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+	Unless required by applicable law or agreed to in writing,
+	software distributed under the License is distributed on an
+	"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+	KIND, either express or implied. See the License for the
+	specific language governing permissions and limitations
+	under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<parent>
+		<groupId>org.apache.uima</groupId>
+		<artifactId>uimafit-parent</artifactId>
+		<version>2.0.0-SNAPSHOT</version>
+	</parent>
+	<artifactId>uimafit-docbook</artifactId>
+	<name>uimaFIT - Documentation</name>
+	<packaging>pom</packaging>
+	<properties>
+		<uimaScmProject>${project.artifactId}</uimaScmProject>
+		<bookNameRoot>tools.uimafit.book</bookNameRoot>
+	</properties>
+</project>
\ No newline at end of file

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/pom.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/pom.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml

Added: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/common_book_info.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/common_book_info.xml?rev=1462983&view=auto
==============================================================================
--- uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/common_book_info.xml (added)
+++ uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/common_book_info.xml Sun Mar 31 14:56:50 2013
@@ -0,0 +1,79 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE bookinfo PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd">
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->  
+<bookinfo>
+
+    <releaseinfo>Version <?eval ${project.version}?></releaseinfo>
+
+    <productname>Apache uimaFIT</productname>
+
+    <authorgroup>
+      <corpauthor>Written and maintained by the Apache UIMA Development Community</corpauthor>
+    </authorgroup>
+
+ <!--
+    <mediaobject>
+      <imageobject>
+        <imagedata fileref="images/UIMAlogoLarge.png"/>
+      </imageobject>
+    </mediaobject>
+ -->
+    <legalnotice>
+      <para> </para>
+      <formalpara>
+        <title>License and Disclaimer</title>
+
+        <para>The ASF licenses this documentation
+           to you under the Apache License, Version 2.0 (the
+           "License"); you may not use this documentation except in compliance
+           with the License.  You may obtain a copy of the License at
+         
+         <blockquote>
+           <ulink url="http://www.apache.org/licenses/LICENSE-2.0"/>
+         </blockquote>
+         
+           Unless required by applicable law or agreed to in writing,
+           this documentation and its contents are distributed under the License 
+           on an 
+           "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+           KIND, either express or implied.  See the License for the
+           specific language governing permissions and limitations
+           under the License.
+         </para>
+      </formalpara>
+      <para> </para>
+      <para> </para>
+      <formalpara>
+        <title>Trademarks</title>
+        <para>All terms mentioned in the text that are known to be trademarks or 
+        service marks have been appropriately capitalized.  Use of such terms
+        in this book should not be regarded as affecting the validity of the
+        the trademark or service mark.
+        </para>
+      </formalpara>
+    </legalnotice>
+    <copyright>
+      <year><?eval ${project.inceptionYear}?></year>
+      <year><?eval ${project.properties.buildYear}?></year>
+      <holder>The Apache Software Foundation</holder>
+    </copyright>
+    <pubdate><?eval ${project.properties.buildMonth}?>, <?eval ${project.properties.buildYear}?></pubdate>
+  </bookinfo>
\ No newline at end of file

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/common_book_info.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/common_book_info.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml

Added: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.book.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.book.xml?rev=1462983&view=auto
==============================================================================
--- uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.book.xml (added)
+++ uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.book.xml Sun Mar 31 14:56:50 2013
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd">
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<book lang="en">
+  <title>Apache uimaFIT Guide and Reference</title>
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="common_book_info.xml"/>
+    
+  <toc/>
+
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="tools.uimafit.introduction.xml"/>
+  
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="tools.uimafit.gettingstarted.xml"/>
+
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="tools.uimafit.pipelines.xml"/>
+
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="tools.uimafit.experiments.xml"/>
+  
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="tools.uimafit.configurationparameters.xml"/>
+
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="tools.uimafit.externalresources.xml"/>
+
+  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="tools.uimafit.typesystem.xml"/>
+</book>

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.book.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.book.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml

Added: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.configurationparameters.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.configurationparameters.xml?rev=1462983&view=auto
==============================================================================
--- uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.configurationparameters.xml (added)
+++ uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.configurationparameters.xml Sun Mar 31 14:56:50 2013
@@ -0,0 +1,156 @@
+<chapter id="ugr.tools.uimafit.configurationparameters">
+  <title>Configuration Parameters</title>
+  <para>uimaFIT defines the <classname>@ConfigurationParameter</classname> annotation which can be
+    used to annotate the fields of an analysis engine or collection reader. The purpose of this
+    annotation is twofold:<itemizedlist>
+      <listitem>
+        <para>injection of parameters from the UIMA context into fields</para>
+      </listitem>
+      <listitem>
+        <para>declaration of parameter metadata (mandatory, default value, description) which can be
+          used to generate XML descriptors</para>
+      </listitem>
+    </itemizedlist>In a regular UIMA component, parameters need to be manually extracted from the
+    UIMA context, typically requiring a type cast. </para>
+  <programlisting format="linespecific">class MyAnalysisEngine extends CasAnnotator_ImplBase {
+  public static final String PARAM_SOURCE_DIRECTORY = "sourceDirectory";
+  private File sourceDirectory;
+
+  public void initialize(UimaContext context) 
+      throws ResourceInitializationException {
+
+    sourceDirectory = new File((String) context.getConfigParameterValue(
+      PARAM_SOURCE_DIRECTORY));
+  }
+}</programlisting>
+  <para>The component has no way to declare a default value or to declare if a parameter is optional
+    or mandatory. In addition, any documentation needs to be maintained in !JavaDoc and in the XML
+    descriptor for the component.</para>
+  <para>With uimaFIT, all this information can be declared in the component using the
+      <classname>@ConfigurationParameter</classname> annotation.<table frame="all">
+      <title><classname>@ConfigurationParameter</classname> annotation</title>
+      <tgroup cols="3">
+        <colspec colname="c1" colnum="1" colwidth="1.0*"/>
+        <colspec colname="c2" colnum="2" colwidth="1*"/>
+        <colspec colname="c3" colnum="3" colwidth="1.0*"/>
+        <thead>
+          <row>
+            <entry>Parameter</entry>
+            <entry>Description</entry>
+            <entry>Default</entry>
+          </row>
+        </thead>
+        <tbody>
+          <row>
+            <entry>name</entry>
+            <entry>parameter name</entry>
+            <entry>name of annotated field</entry>
+          </row>
+          <row>
+            <entry>description</entry>
+            <entry>description of the parameter</entry>
+            <entry/>
+          </row>
+          <row>
+            <entry>mandatory</entry>
+            <entry>whether a non-null value must be specified </entry>
+            <entry>true</entry>
+          </row>
+          <row>
+            <entry>defaultValue</entry>
+            <entry>the default value if no value is specified</entry>
+            <entry/>
+          </row>
+        </tbody>
+      </tgroup>
+    </table></para>
+  <programlisting>class MyAnalysisEngine 
+    extends org.uimafit.component.CasAnnotator_ImplBase {
+
+  /**
+   * Directory to read the data from.
+   */
+  public static final String PARAM_SOURCE_DIRECTORY = "sourceDirectory";
+  @ConfigurationParameter(name=PARAM_SOURCE_DIRECTORY, defaultValue=".")
+  private File sourceDirectory;
+}</programlisting>
+  <para>Note, that it is no longer necessary to implement the <methodname>initialize()</methodname>
+    method. uimaFIT takes care of locating the parameter <parameter>sourceDirectory</parameter> in
+    the UIMA context. It recognizes that the <classname>File</classname> class has a
+      <classname>String</classname> constructor and uses that to instantiate a new
+      <classname>File</classname> object from the parameter. A parameter is mandatory unless
+    specified otherwise. If a mandatory parameter is not specified in the context, an exception is
+    thrown.</para>
+  <para>The <parameter>defaultValue</parameter> is used when generating an UIMA component
+    description from the class. It should be pointed out in particular, that uimaFIT does not make
+    use of the default value when injecting parameters into fields. For this reason, it is possible
+    to have a parameter that is mandatory but does have a default value. The default value is used
+    as a parameter value when a component description is generated via the uimaFIT factories unless
+    a parameter is specified in the factory call. If a component description in created manually
+    without specifying a value for a mandatory parameter, uimaFIT will generate an exception.</para>
+  <note>
+    <para>You can use the <emphasis>enhance</emphasis> goal of the uimaFIT Maven plugin to pick up
+      the parameter description from the JavaDoc and post it to the
+        <parameter>description</parameter> field of the
+        <classname>@ConfigurationParameter</classname> annotation. This should be preferred to
+      specifying the description explicitly as part of the annotation.</para>
+  </note>
+  <para>The parameter injection mechanism is implemented in the
+      <classname>ConfigurationParameterInitializer</classname> class. uimaFIT provides several base
+    classes that already come with an <methodname>initialize()</methodname> method using the
+    initializer:</para>
+  <itemizedlist>
+    <listitem>
+      <para><classname>CasAnnotator_ImplBase</classname>`</para>
+    </listitem>
+    <listitem>
+      <para><classname>CasCollectionReader_ImplBase</classname></para>
+    </listitem>
+    <listitem>
+      <para><classname>CasConsumer_ImplBase</classname></para>
+    </listitem>
+    <listitem>
+      <para><classname>CasFlowController_ImplBase</classname></para>
+    </listitem>
+    <listitem>
+      <para><classname>CasMultiplier_ImplBase</classname></para>
+    </listitem>
+    <listitem>
+      <para><classname>JCasAnnotator_ImplBase</classname></para>
+    </listitem>
+    <listitem>
+      <para><classname>JCasCollectionReader_ImplBase</classname></para>
+    </listitem>
+    <listitem>
+      <para><classname>JCasConsumer_ImplBase</classname></para>
+    </listitem>
+    <listitem>
+      <para><classname>JCasFlowController_ImplBase</classname></para>
+    </listitem>
+    <listitem>
+      <para><classname>JCasMultiplier_ImplBase</classname></para>
+    </listitem>
+    <listitem>
+      <para><classname>Resource_ImplBase</classname></para>
+    </listitem>
+  </itemizedlist>
+  <para>The <classname>ConfigurationParameterInitializer</classname> can also be used with shared
+    resources:</para>
+  <programlisting>class MySharedResourceObject implements SharedResourceObject {
+  public static final String PARAM_VALUE = "Value";
+  @ConfigurationParameter(name = PARAM_VALUE, mandatory = true)
+  private String value;
+
+  public void load(DataResource aData)
+      throws ResourceInitializationException {
+
+    ConfigurationParameterInitializer.initialize(this, aData);
+  }
+}</programlisting>
+  <para>Fields that can be annotated with the <classname>@ConfigurationParameter</classname>
+    annotation are any array or collection types of primitive types (<type>int</type>,
+      <type>boolean</type>, <type>float</type>, <type>double</type>), any enum types, any types that
+    define a constructor accepting a single <classname>String</classname> (e.g.
+      <classname>File</classname>), as well as, fields of the types <classname>Pattern</classname>
+    and <classname>Locale</classname>.</para>
+</chapter>

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.configurationparameters.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.configurationparameters.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml

Added: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.experiments.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.experiments.xml?rev=1462983&view=auto
==============================================================================
--- uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.experiments.xml (added)
+++ uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.experiments.xml Sun Mar 31 14:56:50 2013
@@ -0,0 +1,42 @@
+<chapter id="ugr.tools.uimafit.experiments">
+  <title>Running Experiments</title>
+  <para>The <emphasis>uimafit-examples</emphasis> module contains a package
+      <package>org.uimafit.examples.experiment.pos</package> which demonstrates a very simple
+    experimental setup for testing a part-of-speech tagger. You may find this example more
+    accessible if you check out the code from subversion and build it in your own
+    environment.</para>
+  <para>The documentation for this example can be found in the code itself. Please refer to
+      <classname>RunExperiment</classname> as a starting point. The following is copied from the
+    javadoc comments of that file:</para>
+  <blockquote>
+    <para><classname>RunExperiment</classname> demonstrates a very common (though simplified)
+      experimental setup in which gold standard data is available for some task and you want to
+      evaluate how well your analysis engine works against that data. Here we are evaluating
+        <classname>BaselineTagger</classname> which is a (ridiculously) simple part-of-speech tagger
+      against the part-of-speech tags found in
+        <filename>src/main/resources/org/uimafit/examples/pos/sample.txt.pos</filename></para>
+  </blockquote>
+  <para>The basic strategy is as follows:</para>
+  <itemizedlist>
+    <listitem>
+      <para>post the data <emphasis>as is</emphasis> into the default view,</para>
+    </listitem>
+    <listitem>
+      <para>parse the gold-standard tokens and part-of-speech tags and put the results into another
+        view we will call <emphasis>GOLD_VIEW</emphasis>,</para>
+    </listitem>
+    <listitem>
+      <para>create another view called <emphasis>SYSTEM_VIEW</emphasis> and copy the text and
+          <classname>Token</classname> annotations from the <emphasis>GOLD_VIEW</emphasis> into this
+        view,</para>
+    </listitem>
+    <listitem>
+      <para>run the <classname>BaselineTagger</classname> on the <emphasis>SYSTEM_VIEW</emphasis>
+        over the copied <classname>Token</classname> annoations,</para>
+    </listitem>
+    <listitem>
+      <para>evaluate the part-of-speech tags found in the <emphasis>SYSTEM_VIEW</emphasis> with
+        those in the <emphasis>GOLD_VIEW.</emphasis></para>
+    </listitem>
+  </itemizedlist>
+</chapter>

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.experiments.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.experiments.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml

Added: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.externalresources.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.externalresources.xml?rev=1462983&view=auto
==============================================================================
--- uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.externalresources.xml (added)
+++ uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.externalresources.xml Sun Mar 31 14:56:50 2013
@@ -0,0 +1,284 @@
+<chapter id="ugr.tools.uimafit.externalresources">
+  <title>External Resources</title>
+  <para>An analysis engine often uses some data model. This may be as simple as word frequency
+    counts or as complex as the model of a parser. Often these models can become quite large. If an
+    analysis engine is deployed multiple times in the same pipeline or runs on multiple CPU cores,
+    memory can be saved by using a shared instance of the data model. UIMA supports such a scenario
+    by so-called external resources. The following sections illustrates how external resources can
+    be used with uimaFIT.</para>
+  <para>First create a class for the shared data model. Usually this class would load its data from
+    some URI and then expose it via its methods. An example would be to load word frequency counts
+    and to provide a <methodname>getFrequency()</methodname> method. In our simple example we do not
+    load anything from the provided URI - we just offer a method to get the URI from which data be
+    loaded.</para>
+  <programlisting>// Simple model that only stores the URI it was loaded from. Normally data
+// would be loaded from the URI instead and made accessible through methods
+// in this class. This simple example only allows to access the URI.
+public static final class SharedModel implements SharedResourceObject {
+  private String uri;
+
+  public void load(DataResource aData) 
+      throws ResourceInitializationException {
+
+    uri = aData.getUri().toString();
+  }
+
+  public String getUri() { return uri; }
+}</programlisting>
+  <section>
+    <title>Resource injection</title>
+    <section>
+      <title>Regular UIMA components</title>
+      <para>When an external resource is used in a regular UIMA component, it is usually fetched
+        from the context, cast and copied to a class member variable.</para>
+      <programlisting>class MyAnalysisEngine extends CasAnnotator_ImplBase {
+  final static String MODEL_KEY = "Model";
+  private SharedModel model;
+
+  public void initialize(UimaContext context) 
+      throws ResourceInitializationException {
+
+    configuredResource = (SharedModel) getContext().getResourceObject(MODEL_KEY);
+  }
+}</programlisting>
+      <para>uimaFIT can be used to inject external resources into such traditional components using
+        the <methodname>createDependencyAndBind()</methodname> method. To show that this works with
+        any off-the-shelf UIMA component, the following example uses uimaFIT to configure the
+        OpenNLP Tokenizer:</para>
+      <programlisting>// Create descriptor
+AnalysisEngineDescription tokenizer = createPrimitiveDescription(
+  Tokenizer.class,
+  UimaUtil.TOKEN_TYPE_PARAMETER, Token.class.getName(),
+  UimaUtil.SENTENCE_TYPE_PARAMETER, Sentence.class.getName());
+
+// Create the external resource dependency for the model and bind it
+createDependencyAndBind(tokenizer, UimaUtil.MODEL_PARAMETER,
+  TokenizerModelResourceImpl.class,
+  "http://opennlp.sourceforge.net/models-1.5/en-token.bin");</programlisting>
+    </section>
+    <section>
+      <title>uimaFIT-aware components</title>
+      <para>uimaFIT provides the <classname>@ExternalResource</classname> annotation to inject
+        external resources directly into class member variables.</para>
+      <table frame="all">
+        <title><classname>@ExternalResource</classname> annotation</title>
+        <tgroup cols="3">
+          <colspec colname="c1" colnum="1" colwidth="1.0*"/>
+          <colspec colname="c2" colnum="2" colwidth="1.0*"/>
+          <colspec colname="c3" colnum="3" colwidth="1.0*"/>
+          <thead>
+            <row>
+              <entry>Parameter</entry>
+              <entry>Description</entry>
+              <entry>Default</entry>
+            </row>
+          </thead>
+          <tbody>
+            <row>
+              <entry>key</entry>
+              <entry>Resource key</entry>
+              <entry>field name</entry>
+            </row>
+            <row>
+              <entry>api</entry>
+              <entry>Used when the external resource type is different from the field type, e.g.
+                when using an ExternalResourceLocator</entry>
+              <entry>field type</entry>
+            </row>
+            <row>
+              <entry>mandatory</entry>
+              <entry>Whether a value must be specified</entry>
+              <entry>true</entry>
+            </row>
+          </tbody>
+        </tgroup>
+      </table>
+      <programlisting>// Example annotator that uses the SharedModel. In the process() we only test
+// if the model was properly initialized by uimaFIT
+public static class Annotator 
+    extends org.uimafit.component.JCasAnnotator_ImplBase {
+
+  final static String MODEL_KEY = "Model";
+  @ExternalResource(key = MODEL_KEY)
+  private SharedModel model;
+
+  public void process(JCas aJCas) throws AnalysisEngineProcessException {
+    assertTrue(model.getUri().endsWith("gene_model_v02.bin"));
+    // Prints the instance ID to the console - this proves the same
+    // instance of the SharedModel is used in both Annotator instances.
+    System.out.println(model);
+  }
+}</programlisting>
+      <para>Note, that it is no longer necessary to implement the
+          <methodname>initialize()</methodname> method. uimaFIT takes care of locating the external
+        resource <parameter>Model</parameter> in the UIMA context and assigns it to the field
+          <varname>model</varname>. If a mandatory resource is not present in the context, an
+        exception is thrown.</para>
+      <para>The resource injection mechanism is implemented in the
+          <classname>ExternalResourceInitializer</classname> class. uimaFIT provides several base
+        classes that already come with an <methodname>initialize()</methodname> method using the
+        initializer:</para>
+      <itemizedlist>
+        <listitem>
+          <para><classname>CasAnnotator_ImplBase</classname></para>
+        </listitem>
+        <listitem>
+          <para><classname>CasCollectionReader_ImplBase</classname></para>
+        </listitem>
+        <listitem>
+          <para><classname>CasConsumer_ImplBase</classname></para>
+        </listitem>
+        <listitem>
+          <para><classname>CasFlowController_ImplBase</classname></para>
+        </listitem>
+        <listitem>
+          <para><classname>CasMultiplier_ImplBase</classname></para>
+        </listitem>
+        <listitem>
+          <para><classname>JCasAnnotator_ImplBase</classname></para>
+        </listitem>
+        <listitem>
+          <para><classname>JCasCollectionReader_ImplBase</classname></para>
+        </listitem>
+        <listitem>
+          <para><classname>JCasConsumer_ImplBase</classname></para>
+        </listitem>
+        <listitem>
+          <para><classname>JCasFlowController_ImplBase</classname></para>
+        </listitem>
+        <listitem>
+          <para><classname>JCasMultiplier_ImplBase</classname></para>
+        </listitem>
+        <listitem>
+          <para><classname>Resource_ImplBase</classname></para>
+        </listitem>
+      </itemizedlist>
+      <para>When building a pipeline, external resources can be set of a component just like
+        configuration parameters. External resources and configuration parameters can be mixed and
+        appear in any order when creating a component description.</para>
+      <para>Note that in the following example, we create only one external resource description and
+        use it to configure two different analysis engines. Because we only use a single
+        description, also only a single instance of the external resource is created and shared
+        between the two engines.
+        <programlisting>ExternalResourceDescription extDesc = createExternalResourceDescription(
+  SharedModel.class, new File("somemodel.bin"));
+		
+// Binding external resource to each Annotator individually
+AnalysisEngineDescription aed1 = createPrimitiveDescription(
+  Annotator.class,
+  Annotator.MODEL_KEY, extDesc);
+
+AnalysisEngineDescription aed2 = createPrimitiveDescription(
+  Annotator.class,
+  Annotator.MODEL_KEY, extDesc);
+
+// Check the external resource was injected
+AnalysisEngineDescription aaed = createAggregateDescription(aed1, aed2);
+AnalysisEngine ae = createAggregate(aaed);
+ae.process(ae.newJCas());</programlisting></para>
+      <para>This example is given as a full JUnit-based example in the the
+          <emphasis>uimaFIT-examples</emphasis> project.</para>
+    </section>
+    <section>
+      <title>Resources extending <classname>Resource_ImplBase</classname></title>
+      <para>One kind of resources extend <classname>Resource_ImplBase</classname>. These are the
+        easiest to handle, because uimaFIT's version of <classname>Resource_ImplBase</classname>
+        already implements the necessary logic. Just be sure to call
+          <methodname>super.initialize()</methodname> when overriding
+          <methodname>initialize()</methodname>. Also mind that external resources are not available
+        yet when <methodname>initialize()</methodname> is called. For any initialization logic that
+        requires resources, override and implement
+          <methodname>afterResourcesInitialized()</methodname>. Other than that, injection of
+        external resources works as usual.</para>
+      <programlisting>public static class ChainableResource extends Resource_ImplBase {
+  public final static String PARAM_CHAINED_RESOURCE = "chainedResource";
+  @ExternalResource(key = PARAM_CHAINED_RESOURCE)
+  private ChainableResource chainedResource;
+
+  public void afterResourcesInitialized() {
+    // init logic that requires external resources
+  }
+}</programlisting>
+    </section>
+    <section>
+      <title>Resources implementing <interfacename>SharedResourceObject</interfacename></title>
+      <para>The other kind of resources implement
+          <interfacename>SharedResourceObject</interfacename>. Since this is an interface, uimaFIT
+        cannot provide the initialization logic, so you have to implement a couple of things in the
+        resource:</para>
+      <itemizedlist>
+        <listitem>
+          <para>implement <interfacename>ExternalResourceAware</interfacename></para>
+        </listitem>
+        <listitem>
+          <para>declare a configuration parameter
+              <constant>ExternalResourceFactory.PARAM_RESOURCE_NAME</constant> and return its value
+            in <methodname>getResourceName()</methodname></para>
+        </listitem>
+        <listitem>
+          <para>invoke <methodname>ConfigurationParameterInitializer.initialize()</methodname> in
+            the <methodname>load()</methodname> method.</para>
+        </listitem>
+      </itemizedlist>
+      <para>Again, mind that external resource not properly initialized until uimaFIT invokes
+          <methodname>afterResourcesInitialized()</methodname>.</para>
+      <programlisting>public class TestSharedResourceObject implements 
+    SharedResourceObject, ExternalResourceAware {
+
+  @ConfigurationParameter(name=ExternalResourceFactory.PARAM_RESOURCE_NAME)
+  private String resourceName;
+
+  public final static String PARAM_CHAINED_RESOURCE = "chainedResource";
+  @ExternalResource(key = PARAM_CHAINED_RESOURCE)
+  private ChainableResource chainedResource;
+
+  public String getResourceName() {
+    return resourceName;
+  }
+
+  public void load(DataResource aData) 
+      throws ResourceInitializationException {
+
+    ConfigurationParameterInitializer.initialize(this, aData);
+    // rest of the init logic that does not require external resources
+  }
+
+  public void afterResourcesInitialized() {
+   // init logic that requires external resources
+  }
+}</programlisting>
+    </section>
+    <section>
+      <title>Note on injecting resources into resources</title>
+      <para>Nested resources are only initialized if they are used in a pipeline which contains at
+        least one component that calls
+          <methodname>ConfigurationParameterInitializer.initialize()</methodname>. Any component
+        extending uimaFIT's component base classes qualifies. If you use nested resources in a
+        pipeline without any uimaFIT-aware components, you can just add uimaFIT's
+          <classname>NoopAnnotator</classname> to the pipeline.</para>
+    </section>
+  </section>
+  <section>
+    <title>Resource locators</title>
+    <para>Normally, in UIMA an external resource needs to implement either
+        <interfacename>SharedResourceObject</interfacename> or
+        <interfacename>Resource</interfacename>. In order to inject arbitrary objects, uimaFIT has
+      the concept of <interfacename>ExternalResourceLocator</interfacename>. When a resource
+      implements this interface, not the resource itself is injected, but the method
+        <methodname>getResource()</methodname> is called on the resource and the result is injected.
+      The following example illustrates how to inject an object from JNDI into a UIMA
+      component:</para>
+    <programlisting>class MyAnalysisEngine2 extends JCasAnnotator_ImplBase {
+  static final String RES_DICTIONARY = "dictionary";
+  @ExternalResource(key = RES_DICTIONARY)
+  Dictionary dictionary;
+}
+
+AnalysisEngineDescription desc = createPrimitiveDescription(
+  MyAnalysisEngine2.class);
+
+bindResource(desc, MyAnalysisEngine2.RES_DICTIONARY, 
+  JndiResourceLocator.class,
+  JndiResourceLocator.PARAM_NAME, "dictionaries/german");</programlisting>
+  </section>
+</chapter>

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.externalresources.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.externalresources.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml

Added: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.gettingstarted.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.gettingstarted.xml?rev=1462983&view=auto
==============================================================================
--- uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.gettingstarted.xml (added)
+++ uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.gettingstarted.xml Sun Mar 31 14:56:50 2013
@@ -0,0 +1,99 @@
+<chapter id="ugr.tools.uimafit.gettingstarted">
+  <title>Getting Started</title>
+
+  <para>This quick start tutorial demonstrates how to use uimaFIT to define and set a configuration
+    parameter in an analysis engine, run it, and generate a descriptor file for it. The complete
+    code for this example can be found in the <emphasis>uimaFIT-examples</emphasis> module.</para>
+
+  <section>
+    <title>A simple analysis engine implementation</title>
+    <para> Here is the complete analysis engine implementation for this example.</para>
+
+    <programlisting format="linespecific">public class GetStartedQuickAE 
+    extends org.uimafit.component.JCasAnnotator_ImplBase {
+  
+  public static final String PARAM_STRING = "stringParam";
+  @ConfigurationParameter(name = PARAM_STRING)
+  private String stringParam;
+  
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+    System.out.println("Hello world!  Say 'hi' to " + stringParam);
+  }
+}</programlisting>
+    <para>The first thing to note is that the member variable <varname>stringParam</varname> is
+      annotated with <classname>@ConfigurationParameter</classname> which tells uimaFIT that this is
+      an analysis engine configuration parameter. It is best practice to create a public constant
+      for the parameter name, here <code>PARAM_STRING</code> The second thing to note is that we
+      extend uimaFIT's version of the <classname>JCasAnnotator_ImplBase</classname>. The initialize
+      method of this super class calls:</para>
+    <programlisting format="linespecific">ConfigurationParameterInitializer.initializeConfigurationParameters(
+  Object, UimaContext) </programlisting>
+    <para>which populates the configuration parameters with the appropriate contents of the
+        <interfacename>UimaContext</interfacename>. If you do not want to extend uimaFIT's
+        <classname>JCasAnnotator_ImplBase</classname>, then you can call this method directly in the
+        <methodname>initialize</methodname> method of your analysis engine or any class that
+      implements <interfacename>Initializable</interfacename>. You can call this method for an
+      instance of any class that has configuration parameters.</para>
+  </section>
+
+  <section>
+    <title>Running the analysis engine</title>
+    <para>The following lines of code demonstrate how to instantiate and run the analysis engine
+      from a main method:</para>
+    <programlisting>JCas jCas = JCasFactory.createJCas();
+  
+AnalysisEngine analysisEngine = AnalysisEngineFactory.createPrimitive(
+  GetStartedQuickAE.class,
+  GetStartedQuickAE.PARAM_STRING, "uimaFIT");
+  
+analysisEngine.process(jCas);  </programlisting>
+
+    <para>In a more involved example, we would probably instantiate a collection reader and run this
+      analysis engine over a collection of documents. Here, it suffices to simply create a
+        <interfacename>JCas</interfacename>. Line 3 instantiates the analysis engine using
+        <classname>AnalysisEngineFactory</classname> and sets the string parameter named
+        <parameter>stringParam</parameter> to the value <literal>uimaFIT</literal>. Running this
+      simple program sends the following output to the console: </para>
+
+    <programlisting>Hello world!  Say 'hi' to uimaFIT  </programlisting>
+
+    <para>Normally you would be using a type system with your analysis components. When using
+      uimaFIT, it is easiest to keep your type system descriptors in your source folders and make
+      them known to uimaFIT. To do so, create a file
+        <filename>META-INF/org.uimafit/types.txt</filename> in a source folder and add references to
+      all your type descriptors to the file, one per line. You can also use wildcards. For example: </para>
+
+    <programlisting>classpath*:org/uimafit/examples/type/Token.xml
+classpath*:org/uimafit/examples/type/Sentence.xml
+classpath*:org/uimafit/examples/tutorial/type/*.xml </programlisting>
+  </section>
+
+  <section>
+    <title>Generate a descriptor file</title>
+
+    <para>The following lines of code demonstrate how a descriptor file can be generated using the
+      class definition:</para>
+
+    <programlisting>AnalysisEngine analysisEngine = AnalysisEngineFactory.createPrimitive(
+  GetStartedQuickAE.class,
+  GetStartedQuickAE.PARAM_STRING, "uimaFIT");
+
+analysisEngineDescription.toXML(
+  new FileOutputStream("GetStartedQuickAE.xml"));</programlisting>
+
+    <para>If you open the resulting descriptor file you will see that the configuration parameter
+        <parameter>stringParam</parameter> is defined with the value set to
+        <literal>uimaFIT</literal>. We could now instantiate an analysis engine using this
+      descriptor file with a line of code like this:</para>
+
+    <programlisting>AnalysisEngineFactory.createAnalysisEngine("GetStartedQuickAE");</programlisting>
+
+    <para>But, of course, we really wouldn't want to do that now that we can instantiate analysis
+      engines using the class definition as was done above!</para>
+
+    <para>This chapter, of course, did not demonstrate every feature of uimaFIT which provides
+      support for annotating external resources, creating aggregate engines, running pipelines,
+      testing components, among others.</para>
+  </section>
+</chapter>

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.gettingstarted.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.gettingstarted.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml

Added: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.introduction.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.introduction.xml?rev=1462983&view=auto
==============================================================================
--- uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.introduction.xml (added)
+++ uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.introduction.xml Sun Mar 31 14:56:50 2013
@@ -0,0 +1,142 @@
+<chapter id="ugr.tools.uimafit.introduction">
+  <title>Introduction</title>
+  <para>While uimaFIT provides many features for a UIMA developer, there are two overarching themes
+    that most features fall under. These two sides of uimaFIT are,while complementary, largely
+    independent of each other. One of the beauties of uimaFIT is that a developer that uses one side
+    of uimaFIT extensively is not required to use the other side at all. </para>
+  <section>
+    <title>Simplify Component Implementation</title>
+    <para>The first broad theme of uimaFIT provides features that <emphasis>simplify component
+        implementation</emphasis>. Our favorite example of this is the
+        <classname>@ConfigurationParameter</classname> annotation which allows you to annotate a
+      member variable as a configuration parameter. This annotation in combination with the method
+        <methodname>ConfigurationParameterInitializer.initialize()</methodname> completely automates
+      the process of initializing member variables with values from the
+        <interfacename>UimaContext</interfacename> passed into your analysis engine's initialize
+      method. Similarly, the annotation <classname>@ExternalResource</classname> annotation in
+      combination with the method <methodname>ExternalResourceInitializer.initialize()</methodname>
+      completely automates the binding of an external resource as defined in the
+        <interfacename>UimaContext</interfacename> to a member variable. Dispensing with manually
+      writing the code that performs these two tasks reduces effort, eliminates verbose and
+      potentially buggy boiler-plate code, and makes implementing a UIMA component more enjoyable.
+      Consider, for example, a member variable that is of type <classname>Locale</classname>. With
+      uimaFIT you can simply annotate the member variable with
+        <classname>@ConfigurationParameter</classname> and have your initialize method automatically
+      initialize the variable correctly with a string value in the
+        <interfacename>UimaContext</interfacename> such as <literal>en_US</literal>. </para>
+  </section>
+  <section>
+    <title>Simplify Component Instantiation</title>
+    <para>The second broad theme of uimaFIT provides features that <emphasis>simplify component
+        instantiation</emphasis>. Working with UIMA, have you ever said to yourself <quote>but I
+        just want to tag some text!?</quote> What does it take to <quote>just tag some text?</quote>
+      Here's a list of things you must do with the traditional approach:</para>
+    <itemizedlist>
+      <listitem>
+        <para>wrap your tagger as a UIMA analysis engine</para>
+      </listitem>
+      <listitem>
+        <para>write a descriptor file for your analysis engine</para>
+      </listitem>
+      <listitem>
+        <para>write a CAS consumer that produces the desired output</para>
+      </listitem>
+      <listitem>
+        <para>write another descriptor file for the CAS consumer</para>
+      </listitem>
+      <listitem>
+        <para>write a descriptor file for a collection reader</para>
+      </listitem>
+      <listitem>
+        <para>write a descriptor file that describes a pipeline</para>
+      </listitem>
+      <listitem>
+        <para>invoke the Collection Processing Manager with your pipeline descriptor file</para>
+      </listitem>
+    </itemizedlist>
+    <section>
+      <title>From a class</title>
+      <para>Each of these steps has its own pitfalls and can be rather time consuming. This is a
+        rather unsatisfying answer to our simple desire to just tag some text. With uimaFIT you can
+        literally eliminate all of these steps. </para>
+      <para>Here's a simple snippet of Java code that illustrates <quote>tagging some text</quote>
+        with uimaFIT:</para>
+      <programlisting>JCas jCas = JCasFactory.createJCas();
+
+jCas.setDocumentText("some text");
+
+AnalysisEngine tokenizer = createPrimitive(MyTokenizer.class);
+
+AnalysisEngine tagger = createPrimitive(MyTagger.class);
+
+runPipeline(jCas, tokenizer, tagger);
+
+for(Token token : iterate(jCas, Token.class)){
+    System.out.println(token.getTag());
+}</programlisting>
+      <para>This code assumes several static method imports (e.g.
+          <methodname>createPrimitive()</methodname>) provided by uimaFIT for brevity. And while the
+        terseness of this code won't make a Python programmer blush - it is certainly much easier
+        than the seven steps outlined above! </para>
+    </section>
+    <section>
+      <title>From an XML descriptor</title>
+      <para>uimaFIT provides mechanisms to instantiate and run UIMA components programmatically with
+        or without descriptor files. For example, if you have a descriptor file for your analysis
+        engine defined by <classname>MyTagger</classname> (as shown above), then you can instead
+        instantiate the analysis engine with:</para>
+      <programlisting>AnalysisEngine tagger = createAnalysisEngine("mypackage.MyTagger");</programlisting>
+      <para>This will find the descriptor file <filename>mypackage/!MyTagger.xml</filename> by name.
+        Similarly, you can find a descriptor file by location with
+          <methodname>createAnalysisEngineFromPath()</methodname>. However, if you want to dispense
+        with XML descriptor files altogether (and you probably do), you can use the method
+          <methodname>createPrimitive()</methodname> as shown above. One of the driving motivations
+        for creating the second side of uimaFIT is our frustration with descriptor files and our
+        desire to eliminate them. Descriptor files are difficult to maintain because they are
+        generally tightly coupled with java code, they decay without warning, they are wearisome to
+        test, and they proliferate, among other reasons.</para>
+    </section>
+  </section>
+  <section>
+    <title>Is this cheating?</title>
+    <para>One question that is often raised by new uimaFIT users is whether or not it breaks the
+        <emphasis>UIMA way</emphasis>. That is, does adopting uimaFIT lead me down a path of
+      creating UIMA components and systems that are incompatible with the traditional UIMA approach?
+      The answer to this question is <emphasis>no</emphasis>. For starters, uimaFIT does not skirt
+      the UIMA mechanism of describing components - it only skips the XML part of it. For example,
+      when the method <methodname>createPrimitive()</methodname> is called (as shown above) an
+        <interfacename>AnalysisEngineDescription</interfacename> is created for the analysis engine.
+      This is the same object type that is instantiated when a descriptor file is used. So, instead
+      of parsing XML to instantiate an analysis engine description from XML, uimaFIT uses a factory
+      method to instantiate it from method parameters. One of the happy benefits of this approach is
+      that for a given <interfacename>AnalysisEnginedDescription</interfacename> (which can be
+      obtained directly with <methodname>createPrimitiveDescription()</methodname>) you can generate
+      an XML descriptor file using <methodname>AnalysisEngineDescription.toXML()</methodname>. So,
+      uimaFIT actually provides a very simple and direct path for <emphasis>generating</emphasis>
+      XML descriptor files rather than manually creating and maintaining them! </para>
+    <para>It is also useful to clarify that if you only want to use one side or the other of
+      uimaFIT, then you are free to do so. This is possible precisely because uimaFIT does not
+      workaround UIMA's mechanisms for describing components but rather uses them directly. For
+      example, if the only thing you want to use in uimaFIT is the
+        <classname>@ConfigurationParameter</classname>, then you can do so without worrying about
+      what effect this will have on your descriptor files. This is because your analysis engine will
+      be initialized with exactly the same <interfacename>UimaContext</interfacename> regardless of
+      whether you instantiate your analysis engine in the <emphasis>UIMA way</emphasis> or use one
+      of uimaFIT's factory methods. Similarly, a UIMA component does not need to be annotated with
+        <classname>@ConfiguratioParameter</classname> for you to make use of the
+        <methodname>createPrimitive()</methodname> method. This is because when you pass
+      configuration parameter values in to the <methodname>createPrimitive()</methodname> method,
+      they are added to an <interfacename>AnalysisEngineDescription</interfacename> which is used by
+      UIMA to populate a <interfacename>UimaContext</interfacename> - just as it would if you used a
+      descriptor file. </para>
+  </section>
+  <section>
+    <title>Conclusion</title>
+    <para>Because uimaFIT can be used to simplify component implementation and instantiation it is
+      easy to assume that you can't do one without the other. This page has demonstrated that while
+      these two sides of uimaFIT complement each other, they are not coupled together and each can
+      be effectively used without the other. Similarly, by understanding how uimaFIT uses the UIMA
+      component description mechanisms directly, one can be assured that uimaFIT enables UIMA
+      development that is compatible and consistent with the UIMA standard and APIs. </para>
+  </section>
+</chapter>

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.introduction.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.introduction.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml

Added: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.pipelines.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.pipelines.xml?rev=1462983&view=auto
==============================================================================
--- uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.pipelines.xml (added)
+++ uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.pipelines.xml Sun Mar 31 14:56:50 2013
@@ -0,0 +1,40 @@
+<chapter id="ugr.tools.uimafit.pipelines">
+  <title>Pipelines</title>
+  <para>UIMA is a component-based architecture that allows to compose various processing components
+    into a complex processing pipeline. A pipeline typically involves a <emphasis>collection
+      reader</emphasis> which ingests documents and <emphasis>analysis engines</emphasis> that do
+    the actual processing.</para>
+  <para>Normally, you would run a pipeline using a UIMA Collection Processing Engine or using UIMA
+    AS. uimaFIT offers a third alternative that is much simpler to use and well suited for embedding
+    UIMA pipelines into applications or for writing tests.</para>
+  <para>As uimaFIT does not supply any readers or processing components, we just assume that we have
+    written three components:</para>
+  <itemizedlist>
+    <listitem>
+      <para><classname>TextReader</classname> - reads text files from a directory</para>
+    </listitem>
+    <listitem>
+      <para><classname>Tokenizer</classname> - annotates tokens</para>
+    </listitem>
+    <listitem>
+      <para><classname>TokenFrequencyWriter</classname> - writes a list of tokens and their
+        frequency to a file</para>
+    </listitem>
+  </itemizedlist>
+  <para>We create descriptors for all components and run them as a pipeline:</para>
+  <programlisting>CollectionReaderDescription reader = 
+  CollectionReaderFactory.createDescription(
+    TextReader.class, 
+    TextReader.PARAM_INPUT, "/home/uimafit/documents");
+
+AnalysisEngineDescription tokenizer = 
+  AnalysisEngineFactory.createPrimitiveDescription(
+    Tokenizer.class);
+
+AnalysisEngineDescription tokenFrequencyWriter = 
+  AnalysisEngineFactory.createPrimitiveDescription(
+    TokenFrequencyWriter.class, 
+    TokenFrequencyWriter.PARAM_OUTPUT, "counts.txt");
+
+SimplePipeline.runPipeline(reader, tokenizer, writer);</programlisting>
+</chapter>

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.pipelines.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.pipelines.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml

Added: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.testing.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.testing.xml?rev=1462983&view=auto
==============================================================================
--- uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.testing.xml (added)
+++ uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.testing.xml Sun Mar 31 14:56:50 2013
@@ -0,0 +1,87 @@
+<chapter id="ugr.tools.uimafit.introduction">
+  <title>Testing UIMA components</title>
+  <para>Writing tests without uimaFIT can be a laborious process that results in fragile tests that
+    are very verbose and break easily when code is refactored. This page demonstrates how you can
+    write tests that are both concise and robust. Here's an outline of how you might create a test
+    for a UIMA component <emphasis>without</emphasis> uimaFIT:</para>
+  <orderedlist>
+    <listitem>
+      <para>write a descriptor file that configures your component appropriately for the test. This
+        requires a minimum of 30-50 lines of XML.</para>
+    </listitem>
+    <listitem>
+      <para>begin a test with 5-10 lines of code that instantiate the e.g. analysis engine.</para>
+    </listitem>
+    <listitem>
+      <para>run the analysis engine against some text and test the contents of the CAS.</para>
+    </listitem>
+    <listitem>
+      <para>repeat steps 1-3 for your next test usually by copying the descriptor file, renaming it,
+        and changing e.g. configuration parameters.</para>
+    </listitem>
+  </orderedlist>
+  <para>If you have gone through the pain of creating tests like these and then decided you should
+    refactor your code, then you know how tedious it is to maintain them. </para>
+  <para>Instead of pasting variants of the setup code (see step 2) into other tests we began to
+    create a library of utility methods that we could call which helped shorten our code. We
+    extended these methods so that we could instantiate our components directly without a descriptor
+    file. These utility methods became the initial core of uimaFIT. </para>
+  <section>
+    <title>Examples</title>
+    <para>There are several examples that can be found in the <emphasis>uimafit-examples</emphasis>
+      module.</para>
+    <itemizedlist>
+      <listitem>
+        <para>There are a number of examples of unit tests in both the test suite for the uimafit
+          project and the uimafit-examples project. In particular, there are some well-documented
+          unit tests in the latter which can be found in
+            <classname>RoomNumberAnnotator1Test</classname></para>
+      </listitem>
+      <listitem>
+        <para>You can improve your testing strategy by introducing a <classname>TestBase</classname>
+          class such as the one found in <classname>ExamplesTestBase</classname>. This class is
+          intended as a super class for your other test classes and sets up a
+            <interfacename>JCas</interfacename> that is always ready to use along with a
+            <interfacename>TypeSystemDescription</interfacename> and a
+            <interfacename>TypePriorities</interfacename>. An example test that subclasses from
+            <classname>ExamplesTestBase</classname> is
+            <classname>RoomNumberAnnotator2Test</classname>.</para>
+      </listitem>
+      <listitem>
+        <para>Most analysis engines that you want to test will generally be downstream of many other
+          components that add annotations to the CAS. These annotations will likely need to be in
+          the CAS so that a downstream analysis engine will do something sensible. This poses a
+          problem for tests because it may be undesirable to set up and run an entire pipeline every
+          time you want to test a downstream analysis engine. Furthermore, such tests can become
+          fragile in the face of behavior changes to upstream components. For this reason, it can be
+          advantageous to serialize a CAS as an XMI file and use this as a starting point rather
+          than running an entire pipeline. An example of this approach can be found in
+            <classname>XmiTest</classname>. </para>
+      </listitem>
+    </itemizedlist>
+  </section>
+  <section>
+    <title>Tips &amp; Tricks</title>
+    <para>The package <package>org.apache.uima.fit.testing</package> provides some utility classes
+      that can be handy when writing tests for UIMA components. You may find the following
+      suggestions useful:</para>
+    <itemizedlist>
+      <listitem>
+        <para>add a <classname>TokenBuilder</classname> to your <classname>TestBase</classname>
+          class. An example of this can be found in <classname>ComponentTestBase</classname>. This
+          makes it easy to add tokens and sentences to the CAS you are testing which is a common
+          task for many tests.</para>
+      </listitem>
+      <listitem>
+        <para>use a <classname>JCasBuilder</classname> to add text and annotations incrementally to
+          a JCas instead of first setting the text and then adding all annotations. </para>
+      </listitem>
+      <listitem>
+        <para>use a <classname>CasDumpWriter</classname> to write the CAS contents is a human
+          readable format to a file or to the console. Compare this with a previously written and
+          manually verifed file to see if changes in the component result in changes of the
+          components output.</para>
+      </listitem>
+    </itemizedlist>
+  </section>
+</chapter>

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.testing.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.testing.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml

Added: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.typesystem.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.typesystem.xml?rev=1462983&view=auto
==============================================================================
--- uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.typesystem.xml (added)
+++ uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.typesystem.xml Sun Mar 31 14:56:50 2013
@@ -0,0 +1,130 @@
+<chapter id="ugr.tools.uimafit.typesystem">
+  <title>Type System Detection</title>
+  <para>UIMA requires that types that are used in the CAS are defined in XML files - so-called
+      <emphasis>type system descriptions</emphasis> (TSD). Whenever a UIMA component is created, it
+    must be associated with such a type system. While it is possible to manually load the type
+    system descriptors and pass them to each UIMA component and to each created CAS, it is quite
+    inconvenient to do so. For this reason, uimaFIT supports the automatic detection of such files
+    in the classpath. Thus is becomes possible for a UIMA component provider to have component's
+    type automatically detected and thus the components becomes immediately usable by adding it to
+    the classpath.</para>
+  <section>
+    <title>Making types auto-detectable</title>
+    <para>The provider of a type system should create a file
+        <filename>META-INF/org.apache.uima.fit/types.txt</filename> in the classpath. This file
+      should define the locations of the type system descriptions. Assume that a type
+        <classname>org.apache.uima.fit.type.Token</classname> is specified in the TSD
+        <filename>org/apache/uima/fit/type/Token.xml</filename>, then the file should have the
+      following contents:</para>
+    <programlisting>classpath*:org/apache/uima/fit/type/Token.xml</programlisting>
+    <para>To specify multiple TSDs, add additonal lines to the file. If you have a large number of
+      TSDs, you may prefer to add a pattern. Assume that we have a large number of TSDs under
+        <filename>org/apache/uima/fit/type</filename>, we can use the following pattern which
+      recursively scans the package <package>org.apache.uima.fit.type</package> and all sub-packages
+      for XML files and tries to load them as TSDs.</para>
+    <programlisting>classpath*:org/apache/uima/fit/type/**/*.xml</programlisting>
+    <para>Try to design your packages structure in a way that TSDs and JCas wrapper classes
+      generated from them are separate from the rest of your code.</para>
+    <para>If it is not possible or inconvenient to add the `types.txt` file, patterns can also be
+      specified using the system property
+        <parameter>org.apache.uima.fit.type.import_pattern</parameter>. Multiple patterns may be
+      specified separated by semicolon<footnote>
+        <para>The <literal>\</literal> in the example is used as a line-continuation indicator. It
+          and all spaces following it should be ommitted.</para>
+      </footnote>:</para>
+    <programlisting>-Dorg.apache.uima.fit.type.import_pattern=\
+  classpath*:org/apache/uima/fit/type/**/*.xml</programlisting>
+  </section>
+  <section>
+    <title>Using type auto-detection </title>
+    <para>The auto-detected type system can be obtained from the
+        <classname>TypeSystemDescriptionFactory</classname>:</para>
+    <programlisting>TypeSystemDescription tsd = 
+  TypeSystemDescriptionFactory.createTypeSystemDescription()</programlisting>
+    <para>Popular factory methods also support auto-detection:</para>
+    <programlisting>AnalysisEngine ae = createPrimitive(MyEngine.class);</programlisting>
+  </section>
+  <section>
+    <title>Multiple <filename>META-INF/org.uimafit/types.txt</filename> files</title>
+    <para>uimaFIT supports multiple `types.txt` files in the classpath (e.g. in differnt JARs). The
+        <filename>types.txt</filename> files are located via Spring using the classpath search
+      pattern: </para>
+    <programlisting>TYPE_MANIFEST_PATTERN = "classpath*:META-INF/org.uimafit/types.txt" </programlisting>
+    <para>This resolves to a list URLs pointing to ALL <filename>types.txt</filename> files. The
+      resolved URLs are unique and will point either to a specific point in the file system or into
+      a specific JAR. These URLs can be handled by the standard Java URL loading mechanism.
+      Example:</para>
+    <programlisting>jar:/path/to/syntax-types.jar!/META-INF/org.uimafit/types.txt 
+jar:/path/to/token-types.jar!/META-INF/org.uimafit/types.txt</programlisting>
+    <para>uimaFIT then reads all patters from all of these URLs and uses these to search the
+      classpath again. The patterns now resolve to a list of URLs pointing to the individual type
+      system XML descriptors. All of these URLs are collected in a set to avoid duplicate loading
+      (for performance optimization - not strictly necessary because the UIMA type system merger can
+      handle compatible duplicates). Then the descriptors are loaded into memory and merged using
+      the standard UIMA type system merger
+        (<methodname>CasCreationUtils.mergeTypeSystems()</methodname>). Example:</para>
+    <programlisting>jar:/path/to/syntax-types.jar!/desc/types/Syntax.xml 
+jar:/path/to/token-types.jar!/org/foobar/typesystems/Tokens.xml </programlisting>
+    <para>Voilá, the result is a type system covering all types could be found in the
+      classpath.</para>
+    <para>It is recommended <orderedlist>
+        <listitem>
+          <para>to put type system descriptors into packages resembling a namespace you "own" and to
+            use a package-scoped wildcard
+            search<programlisting>classpath*:org/apache/uima/fit/type/**/*.xml`</programlisting></para>
+        </listitem>
+        <listitem>
+          <para>or when putting descriptors into a "well-known" package like
+              <package>desc.type</package>, that <filename>types.txt</filename> file should
+            explicitly list all type system descriptors instead of using a wildcard
+            search<programlisting>classpath*:desc/type/Token.xml 
+classpath*:desc/type/Syntax.xml </programlisting></para>
+        </listitem>
+      </orderedlist>Method 1 should be preferred. Both methods can be mixed. </para>
+  </section>
+  <section>
+    <title>Performance note and caching</title>
+    <para>Currently uimaFIT evaluates the patterns for TSDs once and caches the locations, but not
+      the actual merged type system description. A rescan can be forced using
+        <methodname>TypeSystemDescriptionFactory.forceTypeDescriptorsScan()</methodname>. This may
+      change in future.</para>
+  </section>
+  <section>
+    <title>Potential problems</title>
+    <para>The mechanism works fine. However, there are specific issues with Java in general that one
+      should be aware of.</para>
+    <section>
+      <title>m2eclipse fails to copy descriptors to <filename>target/classes</filename></title>
+      <para>There seems to be a bug in some older versions of m2eclipse that causes resources not
+        always to be copied to <filename>target/classes</filename>. If UIMA complains about type
+        definitions missing at runtime, try to <emphasis>clean/rebuild</emphasis> your project and
+        carefully check the m2eclipse console in the console view for error messages that might
+        cause m2eclipse to abort.</para>
+    </section>
+    <section>
+      <title>Class version conflicts</title>
+      <para>A problem can occur if you end up having multiple incompatible versions of the same type
+        system in the classpath. This is a general problem and not related to the auto-detection
+        feature. It is the same as when you have incompatible version of a particular class (e.g.
+          <interfacename>JCas</interfacename> wrapper or some third-party-library) in the classpath.
+        The behavior of the Java Classloader is undefined in that case. The detection will do its
+        best to try and load everything it can find, but the UIMA type system merger may barf or you
+        may end up with undefined behavior at runtime because one of the class versions is used at
+        random. </para>
+    </section>
+    <section>
+      <title>Classes and resources in the default package</title>
+      <para>It is bad practice to place classes into the default (unnamed) package. In fact it is
+        not possible to import classes from the default package in another class. Similarly it is a
+        bad idea to put resources at the root of the classpath. The Spring documentation on
+        resources <ulink
+          url="http://static.springsource.org/spring/docs/3.0.x/reference/resources.html#resources-app-ctx-wildcards-in-resource-paths"
+          >explains this in detail</ulink>.</para>
+      <para>For this reason the <filename>types.txt</filename> resides in
+          <filename>/META-INF/org.apache.uima.fit</filename> and it is suggest that type system
+        descriptors reside either in a proper package like
+          <filename>/org/foobar/typesystems/XXX.xml</filename> or in
+          <filename>/desc/types/XXX.xml</filename>. </para>
+    </section>
+  </section>
+</chapter>

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.typesystem.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: uima/sandbox/uimafit/trunk/uimafit-docbook/src/docbook/tools.uimafit.typesystem.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml