You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2012/09/05 15:40:58 UTC
svn commit: r1381180 [1/2] - in /uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook: ./ language/

Author: pkluegl
Date: Wed Sep  5 13:40:58 2012
New Revision: 1381180

URL: http://svn.apache.org/viewvc?rev=1381180&view=rev
Log:
UIMA-2285
- restructured project
- added some documentation for expressions and declarations

Added:
    uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/
    uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.actions.xml
    uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.conditions.xml
    uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.declarations.xml
    uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.expressions.xml
    uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.quantifier.xml
Removed:
    uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/tools.textmarker.actions.xml
    uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/tools.textmarker.conditions.xml
Modified:
    uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/tools.textmarker.language.xml

Added: uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.actions.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.actions.xml?rev=1381180&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.actions.xml (added)
+++ uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.actions.xml Wed Sep  5 13:40:58 2012
@@ -0,0 +1,1136 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"[
+<!ENTITY imgroot "images/tools/tools.textmarker/" >
+<!ENTITY % uimaents SYSTEM "../../target/docbook-shared/entities.ent" >  
+%uimaents;
+]>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	you under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
+
+<section id="ugr.tools.tm.language.actions">
+	<title>Actions</title>
+
+	<section id="ugr.tools.tm.language.actions.add">
+		<title>ADD</title>
+		<para>
+			The ADD action adds all the elements of the passed
+			TextMarkerExpressions to a given list. For example this expressions
+			could be a string, an integer variable or a list itself. For a
+			complete overview on Textmarker expressions see
+			<xref linkend='ugr.tools.tm.language.expressions' />
+			.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[ADD(ListVariable,(TextMarkerExpression)+)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->ADD(list, var)};]]></programlisting>
+			</para>
+			<para>
+				In this example, the variable 'var' is added to the list
+				'list'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.assign">
+		<title>ASSIGN</title>
+		<para>
+			The ASSIGN action assigns the value of the passed expression to
+			a variable of the same type.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[ASSIGN(BooleanVariable,BooleanExpression)]]></programlisting>
+			</para>
+			<para>
+				<programlisting><![CDATA[ASSIGN(NumberVariable,NumberExpression)]]></programlisting>
+			</para>
+			<para>
+				<programlisting><![CDATA[ASSIGN(StringVariable,StringExpression)]]></programlisting>
+			</para>
+			<para>
+				<programlisting><![CDATA[ASSIGN(TypeVariable,TypeExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->ASSIGN(amount, (amount/2))};]]></programlisting>
+			</para>
+			<para>
+				In this example, the value of the variable 'amount' is halved.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.call">
+		<title>CALL</title>
+		<para>
+			The CALL action initiates the execution of a different script
+			file or script block. Currently only complete script files are
+			supported.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[CALL(DifferentFile)]]></programlisting>
+			</para>
+			<para>
+				<programlisting><![CDATA[CALL(Block)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->CALL(NamedEntities)};]]></programlisting>
+			</para>
+			<para>
+				Here, a script 'NamedEntities' for named entity recognition is
+				executed.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.clear">
+		<title>CLEAR</title>
+		<para>
+			The CLEAR action removes all elements of the given list.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[CLEAR(ListVariable)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->CLEAR(SomeList)};]]></programlisting>
+			</para>
+			<para>
+				This rule clears the list 'SomeList'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.color">
+		<title>COLOR</title>
+		<para>
+			The COLOR action sets the color of an annotation type in the
+			modified view if the rule is fired. The background color is passed as
+			the second parameter. The font color can be changed by passing a
+			further color as third parameter. By default annotations are not
+			automatically selected when opening the modified view. This can be
+			changed for the matched annotations by passing true as fourth
+			parameter. By default The supported colors are: black, silver, gray,
+			white, maroon, red, purple, fuchsia, green, lime, olive, yellow,
+			navy, blue, aqua, lightblue, lightgreen, orange, pink, salmon, cyan,
+			violet, tan, brown, white, mediumpurple.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[COLOR(TypeExpression,StringExpression(, StringExpression
+				  (, BooleanExpression)?)?)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->COLOR(Headline, "red", "green", true)};]]></programlisting>
+			</para>
+			<para>
+				This rule colors all Headline annotations in the modified view.
+				Thereby background color is set to red, font color is set to green
+				and all 'Headline' annotations are selected when opening the
+				modified view.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.configure">
+		<title>CONFIGURE</title>
+		<para>
+			The CONFIGURE action can be used to configure the analysis
+			engine of the given namespace (first parameter). The parameters that
+			should be configured with corresponding values are passed as
+			name-value
+			pairs.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[CONFIGURE(StringExpression(,StringExpression = Expression)+)]]></programlisting>
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.create">
+		<title>CREATE</title>
+		<para>
+			The CREATE action is similar to the MARK action. It also
+			annotates the matched text fragments with a type annotation, but
+			additionally assigns values to a choosen subset of the type's feature
+			elements.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[CREATE(TypeExpression(,NumberExpression)*(,StringExpression = Expression)+)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{COUNT(ANY,0,10000,cnt)->CREATE(Headline,"size" = cnt)};]]></programlisting>
+			</para>
+			<para>
+				This rule counts the number of tokens of type ANY in a
+				Paragraph annotation and assigns the counted value to the int
+				variable 'cnt'. If the counted number is between 0 and 10000, a
+				Headline annotation is created for this Paragraph. Moreover the
+				feature named 'size' of Headline is set to the value of 'cnt'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.del">
+		<title>DEL</title>
+		<para>
+			The DEL action deletes the matched text fragments in the
+			modified
+			view.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[DEL]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Name{->DEL};]]></programlisting>
+			</para>
+			<para>
+				This rule deletes all text fragments that are annotated with a
+				Name annotation.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.dynamicanchoring">
+		<title>DYNAMICANCHORING</title>
+		<para>
+			The DYNAMICANCHORING action turns dynamic anchoring on or off
+			(first parameter) and assigns the anchoring parameters penalty
+			(sceond parameter) and factor (third parameter).
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[DYNAMICANCHORING(BooleanExpression(,NumberExpression(,NumberExpression)?)?)]]></programlisting>
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.exec">
+		<title>EXEC</title>
+		<para>
+			The EXEC action initiates the execution of a different script
+			file or analysis engine on the complete input document independent of
+			the matched text and the current filtering settings. If the argument
+			refers to another script file, a new view on the document is created:
+			the complete text of the original CAS and with the default filtering
+			settings of the TextMarker analysis engine.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[EXEC(DifferentFile)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[ENGINE NamedEntities;
+              Document{->EXEC(NamedEntities)};]]></programlisting>
+			</para>
+			<para>
+				Here, an analysis engine for named entity recognition is
+				executed once on the complete document
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.fill">
+		<title>FILL</title>
+		<para>
+			The FILL action fills a choosen subset of the given type's
+			feature elements.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[FILL(TypeExpression(,StringExpression = Expression)+)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Headline{COUNT(ANY,0,10000,tokenCount)
+				  ->FILL(Headline,"size" = tokenCount)};]]></programlisting>
+			</para>
+			<para>
+				Here, the number of tokens within an Headline annotation is
+				counted an stored in variable 'tokenCount'. If the number of tokens
+				is within the interval [0;10000], the FILL action fills the
+				Headline's feature 'size' with the value of 'tokenCount'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.filtertype">
+		<title>FILTERTYPE</title>
+		<para>
+			This action filters the given types of annotations. They are now
+			ignored by rules. For more informations on how rules work see
+			<xref linkend='ugr.tools.tm.language.inference' />
+			. Expressions are not yet supported. This action is complementary to
+			RETAINTYPE (see
+			<xref linkend='ugr.tools.tm.language.actions.retaintype' />
+			).
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[FILTERTYPE((TypeExpression(,TypeExpression)*))?]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->FILTERTYPE(SW)};]]></programlisting>
+			</para>
+			<para>
+				This rule filters all small written words in the input
+				document. This means they are further ignored by any rules.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.gather">
+		<title>GATHER</title>
+		<para>
+			This action creates a complex structure, a annotation with
+			features. The optionally passed indexes (NumberExpressions after the
+			TypeExpression) can be used to create an annotation that spanns the
+			matched information of several rule elements. The features are
+			collected using the indexes of the rule elements of the complete
+			rule.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[GATHER(TypeExpression(,NumberExpression)*
+				  (,StringExpression = NumberExpression)+)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[DECLARE Annotation A;
+        DECLARE Annotation B;
+        DECLARE Annotation C(Annotation a, Annotation b);
+        W{REGEXP("A")->MARK(A)};
+        W{REGEXP("B")->MARK(B)};
+        A B{-> GATHER(C, 1, 2, "a" = 1, "b" = 2)};]]></programlisting>
+			</para>
+			<para>
+				Two annotations A and B are declared and annotated. The last
+				rule creates an annotation C spanning the elements A (index 1 since
+				it is the first rule element) and B (index 2) with its features 'a'
+				set to annotation A (again index 1) and 'b' set to annotation B
+				(again index 2).
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.get">
+		<title>GET</title>
+		<para>
+			The GET action retrieves an element of the given list dependent on a
+			given strategy.
+			<table frame='all'>
+				<title>Currently supported strategies</title>
+				<tgroup cols='2' align='left' colsep='0.5' rowsep='0.5'>
+					<thead>
+						<row>
+							<entry>Strategy</entry>
+							<entry>Functionality</entry>
+						</row>
+					</thead>
+					<tbody>
+						<row>
+							<entry>dominant</entry>
+							<entry>finds the most occuring element</entry>
+						</row>
+					</tbody>
+				</tgroup>
+			</table>
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[GET(ListExpression, Variable, StringExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->GET(list, var, "dominant")};]]></programlisting>
+			</para>
+			<para>
+				In this example, the element of the list 'list' that occurs
+				most is stored in the variable 'var'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.getfeature">
+		<title>GETFEATURE</title>
+		<para>
+			The GETFEATURE action stores the value of the matched
+			annotation's feature (first paramter) in the given variable (second
+			parameter).
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[GETFEATURE(StringExpression, Variable)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->GETFEATURE("language", stringVar)};]]></programlisting>
+			</para>
+			<para>
+				In this example, variable 'stringVar' will contain the value of
+				the feature 'language'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.getlist">
+		<title>GETLIST</title>
+		<para>
+			This action retrieves a list of types dependent on a given strategy.
+			<table frame='all'>
+				<title>Currently supported strategies</title>
+				<tgroup cols='2' align='left' colsep='0.5' rowsep='0.5'>
+					<thead>
+						<row>
+							<entry>Strategy</entry>
+							<entry>Functionality</entry>
+						</row>
+					</thead>
+					<tbody>
+						<row>
+							<entry>Types</entry>
+							<entry>get all types within the matched annotation</entry>
+						</row>
+						<row>
+							<entry>Types:End</entry>
+							<entry>get all types that end at the same offset as the matched
+								annotation
+							</entry>
+						</row>
+						<row>
+							<entry>Types:Begin</entry>
+							<entry>get all types that start at the same offset as the
+								matched
+								annotation
+							</entry>
+						</row>
+					</tbody>
+				</tgroup>
+			</table>
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[GETLIST(ListVariable, StringExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->GETLIST(list, "Types")};]]></programlisting>
+			</para>
+			<para>
+				Here, a list of all types within the document is created and
+				assigned to list variable 'list'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.log">
+		<title>LOG</title>
+		<para>
+			The LOG action simply writes a log message.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[LOG(StringExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->LOG("processed")};]]></programlisting>
+			</para>
+			<para>
+				This rule writes a log message with the string "processed".
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.mark">
+		<title>MARK</title>
+		<para>
+			The MARK action is the most important action in the TextMarker
+			system. It creates a new annotation of the given type. The optionally
+			passed indexes (NumberExpressions after the TypeExpression) can be
+			used to create an annotation that spanns the matched information of
+			several rule elements.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[MARK(TypeExpression(,NumberExpression)*)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Freeline Paragraph{->MARK(ParagraphAfterFreeline,1,2)};]]></programlisting>
+			</para>
+			<para>
+				This rule matches on a free line followed by a Paragraph
+				annotation and annotates both in a single ParagraphAfterFreeline
+				annotation. The two numerical expressions at the end of the mark
+				action state that the matched text of the first and the second rule
+				elements are joined to create the boundaries of the new annotation.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.markfast">
+		<title>MARKFAST</title>
+		<para>
+			The MARKFAST action creates annotations of the given type (first
+			parameter) if an element of the passed list (second parameter) occurs
+			within the window of the matched annotation. Thereby the created
+			annotation doesn't cover the whole matched annotation. Instead it
+			only covers the text of the found occurence. The third parameter is
+			optional. It defines if the MARKFAST action should ignore the case,
+			whereby its default value is false. The optional fourth parameter
+			specifies a character threshold for the ignorence of the case. It is
+			only relevant if the ignore-case value is set to true. The last
+			parameter is set to true by default and specifies whether whitespaces
+			in the entries of the dictionary should be ignored. For more
+			information on lists see
+			<xref linkend='ugr.tools.tm.language.declarations.ressource' />
+			. Additionally to external word lists, string lists variables can be
+			used.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[MARKFAST(TypeExpression,ListExpression(,BooleanExpression
+				  (,NumberExpression,(BooleanExpression)?)?)?)]]></programlisting>
+				<programlisting><![CDATA[MARKFAST(TypeExpression,StringListExpression(,BooleanExpression
+          (,NumberExpression,(BooleanExpression)?)?)?)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[WORDLIST FirstNameList = 'FirstNames.txt';
+        DECLARE FirstName;
+        Document{-> MARKFAST(FirstName, FirstNameList, true, 2)};]]></programlisting>
+			</para>
+			<para>
+				This rule annotates all first names listed in the list
+				'FirstNameList' within the document and ignores the case if the
+				length of the word
+				is greater than 2.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.marklast">
+		<title>MARKLAST</title>
+		<para>
+			The MARKLAST action annotates the last token of the matched
+			annotation with the given type.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[MARKLAST(TypeExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->MARKLAST(Last)};]]></programlisting>
+			</para>
+			<para>
+				This rule annotates the last token of the document with the
+				annotation Last.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.markonce">
+		<title>MARKONCE</title>
+		<para>
+			The MARKONCE action has the same functionality as the MARK
+			action, but creates a new annotation only if it does not yet exist.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[MARKONCE(NumberExpression,TypeExpression(,NumberExpression)*)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Freeline Paragraph{->MARKONCE(ParagraphAfterFreeline,1,2)};]]></programlisting>
+			</para>
+			<para>
+				This rule matches on a free line followed by a Paragraph and
+				annotates both in a single ParagraphAfterFreeline annotation if it
+				is not already annotated with ParagraphAfterFreeline annotation. The
+				two numerical expressions at the end of the MARKONCE action state
+				that the matched text of the first and the second rule elements are
+				joined to create the boundaries of the new annotation.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.markscore">
+		<title>MARKSCORE</title>
+		<para>
+			The MARKSCORE action is similar to the MARK action. It also creates a
+			new annotation of the given type, but only if it does not yet exist.
+			The optionally passed indexes (parameters after the TypeExpression)
+			can be used to create an annotation that spanns the matched
+			information of several rule elements. Additionally a score value
+			(first parameter) is added to the heuristic score value of the
+			annotation. For more information on heuristic scores see
+			<xref linkend='ugr.tools.tm.language.score' />
+			.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[MARKSCORE(NumberExpression,TypeExpression(,NumberExpression)*)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Freeline Paragraph{->MARKSCORE(10,ParagraphAfterFreeline,1,2)};]]></programlisting>
+			</para>
+			<para>
+				This rule matches on a free line followed by a paragraph and
+				annotates both in a single ParagraphAfterFreeline annotation. The
+				two number expressions at the end of the mark action indicate that
+				the matched text of the first and the second rule elements are
+				joined to create the boundaries of the new annotation. Additionally
+				the score '10' is added to the heuristic threshold of this
+				annotation.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.marktable">
+		<title>MARKTABLE</title>
+		<para>
+			The MARKTABLE action creates annotations of the given type (first
+			parameter) if an element of the given column (second parameter) of a
+			passed table (third parameter) occures within the window of the
+			matched annotation. Thereby the created annotation doesn't cover the
+			whole matched annotation. Instead it only covers the text of the
+			found occurence. Optionally the MARKTABLE action is able to assign
+			entries of the given table to features of the created annotation.
+			For
+			more information on tables see
+			<xref linkend='ugr.tools.tm.language.declarations.ressource' />
+			. Additionally several configuration parameters are possible. (See
+			example.)
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[MARKTABLE(TypeExpression, NumberExpression, TableExpression 
+				  (,BooleanExpression, NumberExpression, StringExpression, NumberExpression)? 
+				  (,StringExpression = NumberExpression)+)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[WORDTABLE TestTable = 'TestTable.csv';
+        DECLARE Annotation Struct(STRING first);
+        Document{-> MARKTABLE(Struct, 1, TestTable, true, 4, ".,-", 2, "first" = 2)};]]></programlisting>
+			</para>
+			<para>
+				In this example, the whole document is searched for all
+				occurences of the entries of the first column of the given table
+				'TestTable'. For each occurence an annotation of the type Struct is
+				created and its feature 'first' is filled with the entry of the
+				second column. Moreover the case of the word is ignored if the
+				length of the word exceeds 4. Additionally the chars '.', ',' and
+				'-' are ignored, but at maximum two of them.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.matchedtext">
+		<title>MATCHEDTEXT</title>
+		<para>
+			The MATCHEDTEXT action saves the text of the matched annotation
+			in a passed String variable. The optionally passed indexes can be
+			used to match the text of several rule elements.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[MATCHEDTEXT(StringVariable(,NumberExpression)*)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Headline Paragraph{->MATCHEDTEXT(stringVariable,1,2)};]]></programlisting>
+			</para>
+			<para>
+				The text covered by the Headline (rule elment 1) and the
+				Paragraph (rule elment 2) annotation is saved in variable
+				'stringVariable'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.merge">
+		<title>MERGE</title>
+		<para>
+			The MERGE action merges a number of given lists. The first
+			parameter defines if the merge is done as intersection (false) or as
+			union (true). The second parameter is the list variable that will
+			contain the result.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[MERGE(BooleanExpression, ListVariable, ListExpression, (ListExpression)+)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->MERGE(false, listVar, list1, list2, list3)};]]></programlisting>
+			</para>
+			<para>
+				The elements that occur in all three lists will be placed in
+				the list 'listVar'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.remove">
+		<title>REMOVE</title>
+		<para>
+			The REMOVE action removes lists or single values from a given
+			list
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[REMOVE(ListVariable,(Argument)+)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->REMOVE(list, var)};]]></programlisting>
+			</para>
+			<para>
+				In this example, the variable 'var' is removed from the list
+				'list'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.removeduplicate">
+		<title>REMOVEDUPLICATE</title>
+		<para>
+			This action removes all duplicates within a given list.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[REMOVEDUPLICATE(ListVariable)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->REMOVEDUPLICATE(list)};]]></programlisting>
+			</para>
+			<para>
+				Here, all duplicates in list 'list' are removed.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.replace">
+		<title>REPLACE</title>
+		<para>
+			The REPLACE action replaces the text of all matched annotations with
+			the given StringExpression. It remembers the modification for the
+			matched annotations and shows them in the modified view (see
+			<xref linkend='ugr.tools.tm.views.browser' />
+			).
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[REPLACE(StringExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[FirstName{->REPLACE("first name")};]]></programlisting>
+			</para>
+			<para>
+				This rule replaces all first names with the string 'first
+				name'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.retaintype">
+		<title>RETAINTYPE</title>
+		<para>
+			The RETAINTYPE action retains the given types. This means that they
+			are now not ignored by rules. This action is complementary to
+			FILTERTYPE (see
+			<xref linkend='ugr.tools.tm.language.actions.filtertype' />
+			).
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[RETAINTYPE((TypeExpression(,TypeExpression)*))?]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->RETAINTYPE(SPACE)};]]></programlisting>
+			</para>
+			<para>
+				All spaces are retained and can be matched by rules.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.setfeature">
+		<title>SETFEATURE</title>
+		<para>
+			The SETFEATURE action sets the value of a feature of the
+			matched
+			complex structure.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[SETFEATURE(StringExpression,Expression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->SETFEATURE("language","en")};]]></programlisting>
+			</para>
+			<para>
+				Here, the feature 'language' of the input document is set to
+				English.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.transfer">
+		<title>TRANSFER</title>
+		<para>
+			The TRANSFER action creates a new feature structure and adds all
+			compatible features of the matched annotation.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[TRANSFER(TypeExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->TRANSFER(LanguageStorage)};]]></programlisting>
+			</para>
+			<para>
+				Here, a new feature structure LanguageStorage is created and
+				the compatible features of the Document annotation are copied. E.g.,
+				if LanguageStorage defined a feature named 'language', then the
+				feature value of the Document annotation is copied.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.trie">
+		<title>TRIE</title>
+		<para>
+			The TRIE action uses an external multi tree word list to
+			annotate the matched annotation and provides several configuration
+			parameters.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[TRIE((String = Type)+,ListExpression,BooleanExpression,NumberExpression,
+				  BooleanExpression,NumberExpression,StringExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{->TRIE("FirstNames.txt" = FirstName, "Companies.txt" = Company, 
+				  'Dictionary.mtwl', true, 4, false, 0, ".,-/")};]]></programlisting>
+			</para>
+			<para>
+				Here, the dictionary 'Dictionary.mtwl' that contains word lists
+				for first names and companies is used to annotate the document. The
+				words previously contained in the file 'FirstNames.txt' are
+				annotated with the type FirstName and the words in the file
+				'Companies.txt' with the type Company. The case of the word is
+				ignored if the length of the word exceeds 4. The edit distance is
+				deactivated. The cost of an edit operation can currently not be
+				configured by an argument. The last argument additionally defines
+				several chars that will be ignored.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.unmark">
+		<title>UNMARK</title>
+		<para>
+			The UNMARK action removes the annotation of the given type
+			overlapping the matched annotation.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[UNMARK(TypeExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Headline{->UNMARK(Headline)};]]></programlisting>
+			</para>
+			<para>
+				Here, the headline annotation is removed.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.actions.unmarkall">
+		<title>UNMARKALL</title>
+		<para>
+			The UNMARKALL action removes all the annotations of the given
+			type and all of its descendants overlapping the matched annotation,
+			except the annotation is of at least one type in the passed list.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[UNMARKALL(TypeExpression, TypeListExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Annotation{->UNMARKALL(Annotation, {Headline})};]]></programlisting>
+			</para>
+			<para>
+				Here, all annotations but headlines are removed.
+			</para>
+		</section>
+	</section>
+
+</section>
\ No newline at end of file

Added: uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.conditions.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.conditions.xml?rev=1381180&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.conditions.xml (added)
+++ uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.conditions.xml Wed Sep  5 13:40:58 2012
@@ -0,0 +1,933 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"[
+<!ENTITY imgroot "images/tools/tools.textmarker/" >
+<!ENTITY % uimaents SYSTEM "../../target/docbook-shared/entities.ent" >  
+%uimaents;
+]>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	you under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
+
+<section id="ugr.tools.tm.language.conditions">
+	<title>Conditions</title>
+
+	<section id="ugr.tools.tm.language.conditions.after">
+		<title>AFTER</title>
+		<para>
+			The AFTER condition evaluates true if the matched annotation
+			starts after the beginning of an arbitrary annotation of the passed
+			type. If a list of types is passed, this has to be true for at least
+			one of them.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[AFTER(Type|TypeListExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[CW{AFTER(SW)};]]></programlisting>
+			</para>
+			<para>
+				Here, the rule matches on a capitalized word if there is any
+				small written word previously.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.and">
+		<title>AND</title>
+		<para>
+			The AND condition is a composed condition and evaluates true if
+			all contained conditions evaluate true.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[AND(Condition1,...,ConditionN)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{AND(PARTOF(Headline),CONTAINS(Keyword))
+				  ->MARK(ImportantHeadline)};]]></programlisting>
+			</para>
+			<para>
+				In this example a Paragraph is annotated with an
+				ImportantHeadline annotation if it is part of a Headline and
+				contains a Keyword annotation.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.before">
+		<title>BEFORE</title>
+		<para>
+			The BEFORE condition evaluates true if the matched annotation
+			starts before the beginning of an arbitrary annotation of the passed
+			type. If a list of types is passed, this has to be true for at least
+			one of them.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[BEFORE(Type|TypeListExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[CW{BEFORE(SW)};]]></programlisting>
+			</para>
+			<para>
+				Here, the rule matches on a capitalized word if there is any
+				small written word afterwards.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.contains">
+		<title>CONTAINS</title>
+		<para>
+			The CONTAINS condition evaluates true on a matched annotation
+			if
+			the frequency of the passed type lies within an optionally passed
+			interval. The limits of the passed interval are per default
+			interpreted as absolute numeral values. By passing a further boolean
+			parameter set to true the limits are interpreted as percental
+			values.
+			If no interval parameters are passed at all the condition
+			checks
+			whether the matched annotation contains at least one
+			occurrence of the
+			passed type.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[CONTAINS(Type(,NumberExpression,NumberExpression(,BooleanExpression)?)?)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{CONTAINS(Keyword)->MARK(KeywordParagraph)};]]></programlisting>
+			</para>
+			<para>
+				A Paragraph is annotated with a KeywordParagraph annotation if
+				it contains a Keyword annotation.
+			</para>
+			<para>
+				<programlisting><![CDATA[Paragraph{CONTAINS(Keyword,2,4)->MARK(KeywordParagraph)};]]></programlisting>
+			</para>
+			<para>
+				A Paragraph is annotated with a KeywordParagraph annotation if
+				it contains between two and four Keyword annotations.
+			</para>
+			<para>
+				<programlisting><![CDATA[Paragraph{CONTAINS(Keyword,50,100,true)->MARK(KeywordParagraph)};]]></programlisting>
+			</para>
+			<para>
+				A Paragraph is annotated with a KeywordParagraph annotation if it
+				contains between 50% and 100% Keyword annotations. This is
+				calculated based on the tokens of the Paragraph. If the Paragraph
+				contains six basic annotations (see
+				<xref linkend='ugr.tools.tm.language.seeding' />
+				), two of them are part of one Keyword annotation and one basic
+				annotation is also annotated with a Keyword annotation, then the
+				percentage of the contained Keywords is 50%.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.contextcount">
+		<title>CONTEXTCOUNT</title>
+		<para>
+			The CONTEXTCOUNT condition numbers all occurrences of the
+			matched type within the context of a passed type's annotation
+			consecutively, thus assigning an index to each occurrence.
+			Additionally it stores the index of the matched annotation in a
+			numerical variable if one is passed. The condition evaluates true if
+			the index of the matched annotation is within a passed interval. If
+			no interval is passed, the condition always evaluates true.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[CONTEXTCOUNT(Type(,NumberExpression,NumberExpression)?(,Variable)?)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Keyword{CONTEXTCOUNT(Paragraph,2,3,var)
+				  ->MARK(SecondOrThirdKeywordInParagraph)};]]></programlisting>
+			</para>
+			<para>
+				Here, the position of the matched Keyword annotation within a
+				Paragraph annotation is calculated and stored in the variable 'var'.
+				If the counted value lies within the interval [2,3] the matched
+				Keyword is annotated with the SecondOrThirdKeywordInParagraph
+				annotation.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.count">
+		<title>COUNT</title>
+		<para>
+			The COUNT condition can be used in two different ways. In the
+			first case (see first definition), it counts the number of
+			annotations of the passed type within the window of the matched
+			annotation and stores the amount in a numerical variable if such a
+			variable is passed. The condition evaluates true if the counted
+			amount is within a specified interval. If no interval is passed, the
+			condition always evaluates true. In the second case (see second
+			definition), it counts the number of occurrences of the passed
+			VariableExpression (second parameter) within the passed list (first
+			parameter) and stores the amount in a numerical variable if such a
+			variable is passed. Again the condition evaluates true if the counted
+			amount is within a specified interval. If no interval is passed, the
+			condition always evaluates true.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[COUNT(Type(,NumberExpression,NumberExpression)?(,NumberVariable)?)]]></programlisting>
+			</para>
+			<para>
+				<programlisting><![CDATA[COUNT(ListExpression,VariableExpression
+				  (,NumberExpression,NumberExpression)?(,NumberVariable)?)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{COUNT(Keyword,1,10,var)->MARK(KeywordParagraph)};]]></programlisting>
+			</para>
+			<para>
+				Here, the amount of Keyword annotations within a Paragraph is
+				calculated and stored in the variable 'var'. If one to ten Keywords
+				were counted, the Paragraph is marked with a KeywordParagraph
+				annotation.
+			</para>
+			<para>
+				<programlisting><![CDATA[Paragraph{COUNT(list,"author",5,7,var)};]]></programlisting>
+			</para>
+			<para>
+				Here, the number of occurrences of STRING "author" within the
+				STRINGLIST 'list' is counted and stored in the variable 'var'. If
+				"author" occurs five to seven times within 'list', the condition
+				evaluates true.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.currentcount">
+		<title>CURRENTCOUNT</title>
+		<para>
+			The CURRENTCOUNT condition numbers all occurences of the matched
+			type within the whole document consecutively, thus assigning an index
+			to each occurence. Additionally it stores the index of the matched
+			annotation in a numerical variable if one is passed. The condition
+			evaluates true if the index of the matched annotation is within a
+			specified interval. If no interval is passed, the condition always
+			evaluates true.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[CURRENTCOUNT(Type(,NumberExpression,NumberExpression)?(,Variable)?)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{CURRENTCOUNT(Keyword,3,3,var)->MARK(ParagraphWithThirdKeyword)};]]></programlisting>
+			</para>
+			<para>
+				Here, the Paragraph which contains the third Keyword of the
+				whole document is annotated with the ParagraphWithThirdKeyword
+				annotation. The index is stored in the variable 'var'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.endswith">
+		<title>ENDSWITH</title>
+		<para>
+			The ENDSWITH condition evaluates true if an annotation of the
+			given type ends exactly at the same position as the matched
+			annotation. If a list of types is passed, this has to be true for at
+			least one of them.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[ENDSWITH(Type|TypeListExpression) ]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{ENDSWITH(SW)};]]></programlisting>
+			</para>
+			<para>
+				Here, the rule matches on a Paragraph annotation if it ends
+				with a small written word.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.feature">
+		<title>FEATURE</title>
+		<para>
+			The FEATURE condition compares a feature of the matched
+			annotation with the second argument.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[FEATURE(StringExpression,Expression) ]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{FEATURE("language",targetLanguage)}]]></programlisting>
+			</para>
+			<para>
+				This rule matches if the feature named 'language' of the
+				document annotation equals the value of the variable
+				'targetLanguage'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.if">
+		<title>IF</title>
+		<para>
+			The IF condition evaluates true if the contained boolean
+			expression does.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[IF(BooleanExpression) ]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{IF(keywordAmount > 5)->MARK(KeywordParagraph)};]]></programlisting>
+			</para>
+			<para>
+				A Paragraph annotation is annotated with a KeywordParagraph
+				annotation if the value of the variable 'keywordAmount' is greater
+				than five.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.inlist">
+		<title>INLIST</title>
+		<para>
+			The INLIST condition is fulfilled if the matched annotation is listed
+			in a given word or string list. The (relative) edit distance
+			is
+			currently disabled.
+			<!-- ATTENTION: it seems the edit distance is still disabled? what does 
+				this mean? -->
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[INLIST(WordList(,NumberExpression,(BooleanExpression)?)?) ]]></programlisting>
+			</para>
+			<para>
+				<programlisting><![CDATA[INLIST(StringList(,NumberExpression,(BooleanExpression)?)?) ]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Keyword{INLIST(specialKeywords.txt)->MARK(SpecialKeyword)};]]></programlisting>
+			</para>
+			<para>
+				A Keyword is annotated with the type SpecialKeyword if the text
+				of the Keyword annotation is listed in the word list
+				'specialKeywords.txt'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.is">
+		<title>IS</title>
+		<para>
+			The IS condition evaluates true if there is an annotation of the
+			given type with the same beginning and ending offsets as the
+			matched
+			annotation. If a list of types is given, the condition
+			evaluates true
+			if at least one of them fulfills the former condition.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[IS(Type|TypeListExpression) ]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Author{IS(Englishman)->MARK(EnglishAuthor)};]]></programlisting>
+			</para>
+			<para>
+				If an Author annotation is also annotated with an Englishman
+				annotation, it is annotated with an EnglishAuthor annotation.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.last">
+		<title>LAST</title>
+		<para>
+			The LAST condition evaluates true if the type of the last token
+			within the window of the matched annotation is of the given type.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[LAST(TypeExpression) ]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{LAST(CW)};]]></programlisting>
+			</para>
+			<para>
+				This rule fires if the last token of the document is a
+				capitalized word.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.mofn">
+		<title>MOFN</title>
+		<para>
+			The MOFN condition is a composed condition. It evaluates true if
+			the number of containing conditions evaluating true is within a given
+			interval.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[MOFN(NumberExpression,NumberExpression,Condition1,...,ConditionN) ]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{MOFN(1,1,PARTOF(Headline),CONTAINS(Keyword))
+				  ->MARK(HeadlineXORKeywords)};]]></programlisting>
+			</para>
+			<para>
+				A Paragraph is marked as a HeadlineXORKeywords if the matched
+				text is either part of a Headline annotation or contains Keyword
+				annotations.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.near">
+		<title>NEAR</title>
+		<para>
+			The NEAR condition is fulfilled if the distance of the matched
+			annotation to an annotation of the given type is within a given
+			interval. The direction is defined by a boolean parameter, whose
+			default value is true, therefore searching forward. By default this
+			condition works on an unfiltered index. An optional fifth boolean
+			parameter can be set to true to get the condition being evaluated on
+			a filtered index.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[NEAR(TypeExpression,NumberExpression,NumberExpression
+				  (,BooleanExpression(,BooleanExpression)?)?) ]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{NEAR(Headline,0,10,false)->MARK(NoHeadline)};]]></programlisting>
+			</para>
+			<para>
+				A Paragraph that starts at most ten tokens after a Headline
+				annotation is annotated with the NoHeadline annotation.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.not">
+		<title>NOT</title>
+		<para>
+			The NOT condition negates the result of its contained
+			condition.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA["-"Condition]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{-PARTOF(Headline)->MARK(Headline)};]]></programlisting>
+			</para>
+			<para>
+				A Paragraph that is not part of a Headline annotation so far is
+				annotated with a Headline annotation.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.or">
+		<title>OR</title>
+		<para>
+			The OR Condition is a composed condition and evaluates true if
+			at least one contained condition is evaluated true.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[OR(Condition1,...,ConditionN)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{OR(PARTOF(Headline),CONTAINS(Keyword))->MARK(ImportantParagraph)};]]></programlisting>
+			</para>
+			<para>
+				In this example a Paragraph is annotated with the
+				ImportantParagraph annotation if it is a Headline or contains
+				Keyword annotations.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.parse">
+		<title>PARSE</title>
+		<para>
+			The PARSE condition is fulfilled if the text covered by the
+			matched annotation can be transformed into a value of the given
+			variable's type. If this is possible, the parsed value is
+			additionally assigned to the passed variable.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[PARSE(variable)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[NUM{PARSE(var)};]]></programlisting>
+			</para>
+			<para>
+				If the variable 'var' is of an appropriate numeric type, the
+				value of NUM is parsed and subsequently stored in 'var'.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.partof">
+		<title>PARTOF</title>
+		<para>
+			The PARTOF condition is fulfilled if the matched annotation is
+			part of an annotation of the given type. However it is not necessary
+			that the matched annotation is smaller than the annotation of the
+			given type. Use the (much slower) PARTOFNEQ condition instead if this
+			is needed. If a type list is given, the condition evaluates true if
+			the former described condition for a single type is fulfilled for at
+			least one of the types in the list.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[PARTOF(Type|TypeListExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{PARTOF(Headline) -> MARK(ImportantParagraph)};]]></programlisting>
+			</para>
+			<para>
+				A Paragraph is an ImportantParagraph if the matched text is
+				part of a Headline annotation.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.partofneq">
+		<title>PARTOFNEQ</title>
+		<para>
+			The PARTOFNEQ condition is fulfilled if the matched annotation
+			is part of (smaller than and inside of) an annotation of the given
+			type. If also annotations of the same size should be acceptable, use
+			the PARTOF condition. If a type list is given, the condition
+			evaluates true if the former described condition is fulfilled for at
+			least one of the types in the list.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[PARTOFNEQ(Type|TypeListExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[W{PARTOFNEQ(Headline) -> MARK(ImportantWord)};]]></programlisting>
+			</para>
+			<para>
+				A word is an ImportantWord if it is part of a headline.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.position">
+		<title>POSITION</title>
+		<para>
+			The POSITION condition is fulfilled if the matched type is the
+			k-th occurence of this type within the window of an annotation of the
+			passed type, whereby k is defined by the value of the passed
+			NumberExpression. If the additional boolean paramter is set to false,
+			then k count the occurences of of the minimal annotations.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[POSITION(Type,NumberExpression(,BooleanExpression)?)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Keyword{POSITION(Paragraph,2)->MARK(SecondKeyword)};]]></programlisting>
+			</para>
+			<para>
+				The second Keyword in a Paragraph is annotated with the type
+				SecondKeyword.
+			</para>
+			<para>
+				<programlisting><![CDATA[Keyword{POSITION(Paragraph,2,false)->MARK(SecondKeyword)};]]></programlisting>
+			</para>
+			<para>
+				A Keyword in a Paragraph is annotated with the type
+				SecondKeyword, if it starts at the same offset as the second
+				(visible) TextMarkerBasic annotation, which normally corresponds to
+				the tokens.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.regexp">
+		<title>REGEXP</title>
+		<para>
+			The REGEXP condition is fulfilled if the given pattern matches on the
+			matched annotation. However, if a string variable is given as the
+			first
+			argument, then the pattern is evaluated on the value of the
+			variable.
+			For more details on the syntax of regular
+			expressions, have a
+			look at
+			the
+			<ulink
+				url="http://docs.oracle.com/javase/1.4.2/docs/api/java/util/regex/Pattern.html">Java API</ulink>
+			. By default the REGEXP condition is case-sensitive. To change this
+			add an optional boolean parameter set to true.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[REGEXP((StringVariable,)? StringExpression(,BooleanExpression)?)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Keyword{REGEXP("..")->MARK(SmallKeyword)};]]></programlisting>
+			</para>
+			<para>
+				A Keyword that only consists of two chars is annotated with a
+				SmallKeyword annotation.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.score">
+		<title>SCORE</title>
+		<para>
+			The SCORE condition evaluates the heuristic score of the matched
+			annotation. This score is set or changed by the MARK action.
+			The
+			condition is fulfilled if the score of the matched annotation is
+			in a
+			given interval. Optionally the score can be stored in a
+			variable.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[SCORE(NumberExpression,NumberExpression(,Variable)?)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[MaybeHeadline{SCORE(40,100)->MARK(Headline)};]]></programlisting>
+			</para>
+			<para>
+				A annotation of the type MaybeHeadline is annotated with
+				Headline if its score is between 40 and 100.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.size">
+		<title>SIZE</title>
+		<para>
+			The SIZE contition counts the number of elements in the given
+			list. By default this condition always evaluates true. If an interval
+			is passed, it evaluates true if the counted number of list elements
+			is within the interval. The counted number can be stored in an
+			optionally passed numeral variable.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[SIZE(ListExpression(,NumberExpression,NumberExpression)?(,Variable)?)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Document{SIZE(list,4,10,var)};]]></programlisting>
+			</para>
+			<para>
+				This rule fires if the given list contains between 4 and 10
+				elements. Additionally, the exact amount is stored in the variable
+				var.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.startswith">
+		<title>STARTSWITH</title>
+		<para>
+			The STARTSWITH condition evaluates true if an annotation of the
+			given type starts exactly at the same position as the matched
+			annotation. If a type list is given, the condition evaluates true if
+			the former is true for at least one of the given types in the list.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[STARTSWITH(Type|TypeListExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{STARTSWITH(SW)};]]></programlisting>
+			</para>
+			<para>
+				Here, the rule matches on a Paragraph annotation if it starts
+				with small written word.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.totalcount">
+		<title>TOTALCOUNT</title>
+		<para>
+			The TOTALCOUNT condition counts the annotations of the passed
+			type within the whole document and stores the amount in an optionally
+			passed numerical variable. The condition evaluates true if the
+			amount
+			is within the passed interval. If no interval is passed, the
+			condition always evaluates true.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[TOTALCOUNT(Type(,NumberExpression,NumberExpression(,Variable)?)?)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{TOTALCOUNT(Keyword,1,10,var)->MARK(KeywordParagraph)};]]></programlisting>
+			</para>
+			<para>
+				Here, the amount of Keyword annotations within the whole
+				document is calculated and stored in the variable 'var'. If one to
+				ten Keywords were counted, the Paragraph is marked with a
+				KeywordParagraph annotation.
+			</para>
+		</section>
+	</section>
+
+	<section id="ugr.tools.tm.language.conditions.vote">
+		<title>VOTE</title>
+		<para>
+			The VOTE condition counts the annotations of the given two types
+			within the window of the matched annotation and evaluates true
+			if it
+			found more annotations of the first type.
+		</para>
+		<section>
+			<title>
+				<emphasis role="bold">Definition:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[VOTE(TypeExpression,TypeExpression)]]></programlisting>
+			</para>
+		</section>
+		<section>
+			<title>
+				<emphasis role="bold">Example:</emphasis>
+			</title>
+			<para>
+				<programlisting><![CDATA[Paragraph{VOTE(FirstName,LastName)};]]></programlisting>
+			</para>
+			<para>
+				Here, this rule fires if a paragraph contains more firstnames
+				than lastnames.
+			</para>
+		</section>
+	</section>
+
+</section>
\ No newline at end of file

Added: uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.declarations.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.declarations.xml?rev=1381180&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.declarations.xml (added)
+++ uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/language/tools.textmarker.language.declarations.xml Wed Sep  5 13:40:58 2012
@@ -0,0 +1,269 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"[
+<!ENTITY imgroot "images/tools/tools.textmarker/" >
+<!ENTITY % uimaents SYSTEM "../../target/docbook-shared/entities.ent" >  
+%uimaents;
+]>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	you under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
+
+<section id="ugr.tools.tm.language.declarations">
+	<title>Declarations</title>
+	<para>
+		There are three different kinds of declarations in the
+		TextMarker
+		system:
+		Declarations of types with optional feature
+		definitions,
+		declarations of variables and declarations for importing
+		external
+		resources, further TextMarker scripts and UIMA components
+		such as type
+		systems and analysis engines.
+	</para>
+	<section id="ugr.tools.tm.language.declarations.type">
+		<title>Type</title>
+		<para>
+			Type declarations define new kinds of annotation types and
+			optionally
+			its features.
+			<section>
+				<title>
+					<emphasis role="bold">Definition:</emphasis>
+				</title>
+				<para>
+					<programlisting><![CDATA[typeDeclaration    -> "DECLARE" (AnnotationType)? 
+					     Identifier ("," Identifier )*
+					  | "DECLARE" AnnotationType Identifier ( "(" featureDeclaration ")" )?
+featureDeclaration -> ( (AnnotationType | "STRING" | "INT" | "DOUBLE" | 
+					  "BOOLEAN") Identifier)+]]></programlisting>
+				</para>
+			</section>
+			<section>
+				<title>
+					<emphasis role="bold">Example:</emphasis>
+				</title>
+				<para>
+					<programlisting><![CDATA[DECLARE SimpleType1, SimpleType2; // <- two new types with the parent 
+                                  // type "Annotation"
+DECLARE ParentType NewType (SomeType feature1, INT feature2); // <- defines 
+      // a new type "NewType" with parent type "ParentType" and two features]]></programlisting>
+				</para>
+				<para>
+					Attention: Types with features need
+					a parent type in its
+					declaration. If no
+					special parent type is
+					requested, just use type
+					Annotation as
+					default parent
+					type.
+				</para>
+			</section>
+		</para>
+	</section>
+	<section id="ugr.tools.tm.language.declarations.variable">
+		<title>Variable</title>
+		<para>
+			Variable declarations define new variables. There are 12 kinds of
+			variables:
+			<itemizedlist mark='opencircle'>
+				<listitem>
+					<para>
+						Type variable: A variable that represents an annotation
+						type.
+					</para>
+				</listitem>
+				<listitem>
+					<para>
+						Type list variable: A variable that represents a list of
+						annotation
+						types.
+					</para>
+				</listitem>
+				<listitem>
+					<para>
+						Integer variable: A variable that represents a integer.
+					</para>
+				</listitem>
+				<listitem>
+					<para>
+						Integer list variable: A variable that represents a list of
+						integers.
+					</para>
+				</listitem>
+				<listitem>
+					<para>
+						Float variable: A variable that represents a
+						floating-point
+						number.
+					</para>
+				</listitem>
+				<listitem>
+					<para>
+						Float list variable: A variable that represents a list of
+						floating-point numbers in single precision.
+					</para>
+				</listitem>
+				<listitem>
+					<para>
+						Double variable: A variable that represents a
+						floating-point
+						number.
+					</para>
+				</listitem>
+				<listitem>
+					<para>
+						Double list variable: A variable that represents a list
+						of
+						floating-point numbers in double precision.
+					</para>
+				</listitem>
+				<listitem>
+					<para>
+						String variable: A variable that represents a string.
+					</para>
+				</listitem>
+				<listitem>
+					<para>
+						String list: A variable that represents a list of strings.
+					</para>
+				</listitem>
+				<listitem>
+					<para>
+						Boolean
+						variable: A variable that represents a boolean.
+					</para>
+				</listitem>
+				<listitem>
+					<para>
+						Boolean list variable: A variable that represents a list of
+						booleans.
+					</para>
+				</listitem>
+			</itemizedlist>
+			<section>
+				<title>
+					<emphasis role="bold">Example:</emphasis>
+				</title>
+				<para>
+					<programlisting><![CDATA[TYPE newTypeVariable;
+TYPELIST newTypeList;
+INT newIntegerVariable;
+INTLIST newIntList;
+FLOAT newFloatVariable;
+FLOATLIST newFloatList;
+DOUBLE newDoubleVariable;
+DOUBLELIST newDoubleList;
+STRING newStringVariable;
+STRINGLIST newStringList;
+BOOLEAN newBooleanVariable;
+BOOLEANLIST newBooleanList;]]></programlisting>
+				</para>
+			</section>
+		</para>
+	</section>
+	<section id="ugr.tools.tm.language.declarations.ressource">
+		<title>Resources</title>
+		<para>
+			There are two kinds of resource declaration, that make external
+			resources available in the TextMarker system:
+			<itemizedlist mark='opencircle'>
+				<listitem>
+					<para>
+						List: A list
+						represents a normal text file with an entry per
+						line
+						or a compiled
+						tree of a word list.
+					</para>
+				</listitem>
+				<listitem>
+					<para>
+						Table: A table represents comma separated
+						file.
+					</para>
+				</listitem>
+			</itemizedlist>
+			<section>
+				<title>
+					<emphasis role="bold">Example:</emphasis>
+				</title>
+				<para>
+					<programlisting><![CDATA[WORDLIST listName = 'someWordList.txt';
+WORDTABLE tableName = 'someTable.csv';]]></programlisting>
+				</para>
+			</section>
+		</para>
+	</section>
+	<section id="ugr.tools.tm.language.declarations.scripts">
+		<title>Scripts</title>
+		<para>
+			Additional scripts can be imported and reused with the CALL action.
+			The types of the imported rules are then also available, so that it
+			is
+			not necessary to import the Type System of the additional rule
+			script.
+			<section>
+				<title>
+					<emphasis role="bold">Example:</emphasis>
+				</title>
+				<para>
+					<programlisting><![CDATA[SCRIPT my.package.AnotherScript; // "AnotherScript.tm" in the "my.package" 
+								 //package
+Document{->CALL(AnotherScript)}; // <- rule executes "AnotherScript.tm"]]></programlisting>
+				</para>
+			</section>
+		</para>
+	</section>
+	<section id="ugr.tools.tm.language.declarations.components">
+		<title>Components</title>
+		<para>
+			There are two kind of UIMA components that can be imported in a
+			TextMarker script:
+			<itemizedlist mark='opencircle'>
+				<listitem>
+					<para>
+						Type System: includes the types defined in an
+						external type
+						system.
+					</para>
+				</listitem>
+				<listitem>
+					<para>
+						Analysis Engine: makes an external analysis
+						engine available.
+						The
+						type system needed for the analysis engine has
+						to be imported
+						seperately. Please mind the filtering setting when
+						calling an
+						external analysis engine.
+					</para>
+				</listitem>
+			</itemizedlist>
+			<section>
+				<title>
+					<emphasis role="bold">Example:</emphasis>
+				</title>
+				<para>
+					<programlisting><![CDATA[ENINGE my.package.ExternalEngine; // <- "ExternalEngine.xml" in the 
+ // "my.package" package (in the descriptor folder)
+TYPESYSTEM my.package.ExternalTypeSystem; // <- "ExternalTypeSystem.xml" 
+ // in the "my.package" package (in the descriptor folder)
+Document{->RETAINTYPE(SPACE,BREAK),CALL(ExternalEngine)}; 
+ // calls ExternalEngine, but retains white spaces]]></programlisting>
+				</para>
+			</section>
+		</para>
+	</section>
+</section>
\ No newline at end of file