You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2012/10/15 18:22:24 UTC

svn commit: r1398363 [3/3] - in /uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook: ./ images/tools/tm/ images/tools/tm/workbench/ images/tools/tm/workbench/explain/ images/tools/tm/workbench/overview/ images/tools/tm/workbench/projects...

Added: uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.projects.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.projects.xml?rev=1398363&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.projects.xml (added)
+++ uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.projects.xml Mon Oct 15 16:22:23 2012
@@ -0,0 +1,201 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"[
+<!ENTITY imgroot "images/tools/tm/workbench/" >
+<!ENTITY % uimaents SYSTEM "../../target/docbook-shared/entities.ent" >  
+%uimaents;
+]>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	you under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
+
+
+<section id="section.ugr.tools.tm.workbench.projects">
+	<title>TextMarker Projects</title>
+	<para>
+		TextMarker projects used within the TextMarker workbench need to have
+		a certain folder
+		structure. The parts of this folder structure are
+		explained in
+		<xref linkend='table.ugr.tools.tm.workbench.create_project.folder_strucutre' />
+		. To create a TextMarker project it is recommended to use the provided
+		wizard, explained in
+		<xref linkend='section.ugr.tools.tm.workbench.projects.create_projects' />
+		. If this wizard is used, the required folder structure is
+		automatically created.
+	</para>
+
+	<para>
+		<table id="table.ugr.tools.tm.workbench.create_project.folder_strucutre"
+			frame="all">
+			<title>Project folder structure</title>
+			<tgroup cols="2" colsep="1" rowsep="1">
+				<colspec colname="c1" colwidth="1*" />
+				<colspec colname="c2" colwidth="4*" />
+				<thead>
+					<row>
+						<entry align="center">Folder</entry>
+						<entry align="center">Description</entry>
+					</row>
+				</thead>
+				<tbody>
+					<row>
+						<entry>script</entry>
+						<entry>
+							Source folder for TextMarker scripts and packages.
+						</entry>
+					</row>
+					<row>
+						<entry>descriptor</entry>
+						<entry>
+							Build folder for UIMA components. Analysis engines and type
+							systems
+							are created automatically from the related script files.
+						</entry>
+					</row>
+					<row>
+						<entry>input</entry>
+						<entry>
+							Folder that contains the files that will be processed when
+							launching a
+							TextMarker script. Such input files could be plain
+							text,
+							HTML, xmiCAS files or others.
+						</entry>
+					</row>
+					<row>
+						<entry>output</entry>
+						<entry>
+							Folder that contains the resulting xmiCAS files. One xmiCAS
+							file is generated for each associated document in the input
+							folder.
+						</entry>
+					</row>
+					<row>
+						<entry>resources</entry>
+						<entry>
+							Default folder for word lists, dictionaries and tables.
+						</entry>
+					</row>
+					<row>
+						<entry>test</entry>
+						<entry>
+							Folder for test-driven development.
+						</entry>
+					</row>
+				</tbody>
+			</tgroup>
+		</table>
+	</para>
+	<para>
+		<xref linkend='figure.ugr.tools.tm.workbench.projects.test_project' />
+		shows a project, newly created with the wizard.
+
+		<figure id="figure.ugr.tools.tm.workbench.projects.test_project">
+			<title>A newly created TextMarker project</title>
+			<mediaobject>
+				<imageobject role="html">
+					<imagedata width="300px" format="PNG" align="center"
+						fileref="&imgroot;projects/test_project.PNG" />
+				</imageobject>
+				<imageobject role="fo">
+					<imagedata width="3.5in" format="PNG" align="center"
+						fileref="&imgroot;projects/test_project.PNG" />
+				</imageobject>
+				<textobject>
+					<phrase>
+						A newly created TextMarker project.
+					</phrase>
+				</textobject>
+			</mediaobject>
+		</figure>
+	</para>
+
+	<section id="section.ugr.tools.tm.workbench.projects.create_projects">
+		<title>TextMarker create project wizard</title>
+		<para>
+			To create a new TextMarker project switch to TextMarker perspective
+			and click
+			<quote>File &rarr; New &rarr; TextMarker Project</quote>
+			. This opens the corresponding wizard.
+		</para>
+
+		<para>
+			<xref
+				linkend='figure.ugr.tools.tm.workbench.projects.create_projects.wizard1' />
+			shows the start page of the wizard.
+			<figure
+				id="figure.ugr.tools.tm.workbench.projects.create_projects.wizard1">
+				<title>Wizard start page</title>
+				<mediaobject>
+					<imageobject role="html">
+						<imagedata width="450px" format="PNG" align="center"
+							fileref="&imgroot;projects/wizard1.PNG" />
+					</imageobject>
+					<imageobject role="fo">
+						<imagedata width="4.5in" format="PNG" align="center"
+							fileref="&imgroot;projects/wizard1.PNG" />
+					</imageobject>
+					<textobject>
+						<phrase>
+							Wizard start page.
+						</phrase>
+					</textobject>
+				</mediaobject>
+			</figure>
+		</para>
+		<para>
+			To create a simple TextMarker project just enter a project name for
+			your project and click
+			<quote>Finish</quote>
+			. This will create all you need to start.
+		</para>
+		<para>
+			Other possible settings on this page are the desired location of
+			the project,
+			the interpreter to use and the working set you wish to
+			work on, all of them really self-explaining.
+		</para>
+		<para>
+			On the second page of the wizard you can mainly configure the
+			needed build path. This is necessary if you like to use external
+			source
+			folders or if the project to create will be dependent on other
+			projects or if external libraries have to be found. Add the desired
+			configuration in the related tab.
+		</para>
+		<para>
+			<xref
+				linkend='figure.ugr.tools.tm.workbench.projects.create_projects.wizard2' />
+			shows the second page of the wizard.
+			<figure
+				id="figure.ugr.tools.tm.workbench.projects.create_projects.wizard2">
+				<title>Wizard second page</title>
+				<mediaobject>
+					<imageobject role="html">
+						<imagedata width="450px" format="PNG" align="center"
+							fileref="&imgroot;projects/wizard2.PNG" />
+					</imageobject>
+					<imageobject role="fo">
+						<imagedata width="4.5in" format="PNG" align="center"
+							fileref="&imgroot;projects/wizard2.PNG" />
+					</imageobject>
+					<textobject>
+						<phrase>
+							Wizard second page.
+						</phrase>
+					</textobject>
+				</mediaobject>
+			</figure>
+		</para>
+
+	</section>
+
+</section>
\ No newline at end of file

Added: uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.query.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.query.xml?rev=1398363&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.query.xml (added)
+++ uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.query.xml Mon Oct 15 16:22:23 2012
@@ -0,0 +1,148 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"[
+<!ENTITY imgroot "images/tools/tm/workbench/" >
+<!ENTITY % uimaents SYSTEM "../../target/docbook-shared/entities.ent" >  
+%uimaents;
+]>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	you under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
+
+<section id="section.ugr.tools.tm.workbench.tm_query">
+	<title>Query View</title>
+	<para>
+		With the Query View the TextMarker language can be used to write
+		queries on a set of
+		documents. A query is simply a set of TextMarker
+		rules. Each query returns a list of all text
+		passages the query
+		applies to. For example, if you have a set of annotated documents
+		containing a
+		number of Author annotations, you could use the Query
+		View to get a list of all the author names
+		associated with these
+		annotations.
+	</para>
+	<para>
+		<figure id="figure.ugr.tools.tm.workbench.tm_query.query_view">
+			<title>
+				The Query View.
+				<emphasis role="bold">(1)</emphasis>
+				Start Button;
+				<emphasis role="bold">(2)</emphasis>
+				Export Button
+			</title>
+			<mediaobject>
+				<imageobject>
+					<imagedata scale="80" format="PNG"
+						fileref="&imgroot;query/query_numbers_tm2.png" />
+				</imageobject>
+				<textobject>
+					<phrase>Query View</phrase>
+				</textobject>
+			</mediaobject>
+		</figure>
+	</para>
+	<para>
+		Use the Query view as follows:
+		<orderedlist numeration="arabic">
+			<listitem>
+				<para>
+					The field
+					<quote>Query Data</quote>
+					specifies the folder containing the
+					documents on which the
+					query should be executed. You can
+					either
+					click on the button next to the field to specify
+					the folder by
+					browsing through the
+					file system, or you can drag and drop a
+					folder directly
+					into the field. If the checkbox is
+					activated, all
+					subfolders are included.
+				</para>
+			</listitem>
+			<listitem>
+				<para>
+					The field
+					<quote>Type System</quote>
+					has to contain a type system or a
+					TextMarker script that
+					specifies all types that are used
+					in
+					the
+					query. You can either click on the button next to
+					the field to
+					specify the type system
+					by browsing through the file system, or
+					you can drag
+					and drop a type system directly into
+					the field.
+				</para>
+			</listitem>
+			<listitem>
+				<para>
+					The query in form of one or more TextMarker rules is
+					specified in
+					the
+					text field in the
+					middle of the view. The
+					screenshot shows ...
+				</para>
+			</listitem>
+			<listitem>
+				<para>
+					After pressing the start button the query is started. The
+					results are subsequently
+					displayed in the bottom text field.
+				</para>
+			</listitem>
+		</orderedlist>
+	</para>
+	<para>
+		The resulting list consists of all text passages the query
+		applied to. Above the text
+		field,
+		information about the entire number of
+		matches and the
+		number of different documents
+		the query
+		applied to is given. Each item in
+		the list
+		shows both the matched text passage and
+		in brackets the
+		document related to the text
+		passage. By double-clicking on one
+		of
+		the listed
+		items, the related
+		document is opened in the editor and the matched text passage is
+		selected. If the related
+		document is already open you can jump to another matched text
+		passage within the the same
+		document with just one click on the listed item. Of course
+		this text passage is then selected. By
+		clicking on the export button a list of
+		all matched
+		text passaged
+		is showed in a
+		separate window.
+		For further usage, e.g. as a list
+		of authors in
+		another TextMarker
+		project, copy the content of
+		this
+		window to another text
+		file.
+	</para>
+</section>
\ No newline at end of file

Added: uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.testing.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.testing.xml?rev=1398363&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.testing.xml (added)
+++ uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.testing.xml Mon Oct 15 16:22:23 2012
@@ -0,0 +1,284 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"[
+<!ENTITY imgroot "images/tools/tm/workbench/" >
+<!ENTITY % uimaents SYSTEM "../../target/docbook-shared/entities.ent" >  
+%uimaents;
+]>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. 
+  See the NOTICE file distributed with this work for additional information regarding copyright ownership. 
+  The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not 
+  use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
+  Unless required by applicable law or agreed to in writing, software distributed under the License is 
+  distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+  See the License for the specific language governing permissions and limitations under the License. -->
+
+<section id="ugr.tools.tm.testing">
+  <title>Testing</title>
+  <para> The TextMarker Software comes bundled with its own testing environment, that allows you
+    to test and evaluate TextMarker scripts. It provides full back end testing capabilities and
+    allows you to examine test results in detail. As a product of the testing operation a new
+    document file will be created and detailed information on how well the script performed in the
+    test will be added to this document.
+  </para>
+  <section id="ugr.tools.tm.testing.overview">
+    <title>Overview</title>
+    <para>
+      The testing procedure compares a previously annotated gold standard file with the result of
+      the selected TextMarker script using an evaluator. The evaluators compare the offsets of
+      annotations in both documents and, depending on the evaluator, mark a result document with
+      true positive, false positive or false negative annotations. Afterwards the f1-score is
+      calculated for the whole set of tests, each test file and each type in the test file. The
+      testing environment contains the following parts :
+      <itemizedlist>
+        <listitem>
+          <para>Main view</para>
+        </listitem>
+        <listitem>
+          <para>Result views : true positive, false positive, false negative view
+          </para>
+        </listitem>
+        <listitem>
+          <para>Preference page</para>
+        </listitem>
+      </itemizedlist>
+      <screenshot>
+        <mediaobject>
+          <imageobject>
+            <imagedata scale="80" format="PNG" fileref="&imgroot;Screenshot_main.png" />
+          </imageobject>
+          <textobject>
+            <phrase>Eclipse with open TextMarker and testing environment.
+            </phrase>
+          </textobject>
+        </mediaobject>
+      </screenshot>
+      All control elements,that are needed for the interaction with the testing environment, are
+      located in the main view. This is also where test files can be selected and information, on
+      how well the script performed is, displayed. During the testing process a result CAS file is
+      produced that will contain new annotation types like true positives (tp), false positives
+      (fp) and false negatives (fn). While displaying the result .xmi file in the script editor,
+      additional views allow easy navigation through the new annotations. Additional tree views,
+      like the true positive view, display the corresponding annotations in a hierarchic
+      structure. This allows an easy tracing of the results inside the testing document. A
+      preference page allows customization of the behavior of the testing plug-in.
+    </para>
+    <section id="ugr.tools.tm.testing.overview.main">
+      <title>Main View</title>
+      <para>
+        The following picture shows a close up view of the testing environments main-view part.
+        The toolbar contains all buttons needed to operate the plug-ins. The first line shows the
+        name of the script that is going to be tested and a combo-box, where the view, that should
+        be tested, is selected. On the right follow fields that will show some basic information
+        of the results of the test-run. Below and on the left the test-list is located. This list
+        contains the different test-files. Right besides it, you will find a table with statistic
+        information. It shows a total tp, fp and fn information, as well as precision, recall and
+        f1-score of every test-file and for every type in each file.
+        <screenshot>
+          <mediaobject>
+            <imageobject>
+              <imagedata scale="80" format="PNG" fileref="&imgroot;Screenshot_testing_desc_3_resize.png" />
+            </imageobject>
+            <textobject>
+              <phrase>The main view of the testing environment.</phrase>
+            </textobject>
+          </mediaobject>
+        </screenshot>
+      </para>
+    </section>
+    <section id="ugr.tools.tm.testing.overview.result">
+      <title>Result Views</title>
+      <para>
+        This views add additional information to the CAS View, once a result file is opened. Each
+        view displays one of the following annotation types in a hierarchic tree structure : true
+        positives, false positive and false negative. Adding a check mark to one of the
+        annotations in a result view, will highlight the annotation in the CAS Editor.
+        <screenshot>
+          <mediaobject>
+            <imageobject>
+              <imagedata scale="80" format="PNG" fileref="&imgroot;Screenshot_result.png" />
+            </imageobject>
+            <textobject>
+              <phrase>The main view of the testing environment.</phrase>
+            </textobject>
+          </mediaobject>
+        </screenshot>
+      </para>
+    </section>
+    <section id="ugr.tools.tm.testing.overview.preferences">
+      <title>Preference Page</title>
+      <para>
+        The preference page offers a few options that will modify the plug-ins general behavior.
+        For example the preloading of previously collected result data can be turned off, should
+        it produce a to long loading time. An important option in the preference page is the
+        evaluator you can select. On default the "exact evaluator" is selected, which compares the
+        offsets of the annotations, that are contained in the file produced by the selected
+        script, with the annotations in the test file. Other evaluators will compare annotations
+        in a different way.
+        <screenshot>
+          <mediaobject>
+            <imageobject>
+              <imagedata scale="80" format="PNG" fileref="&imgroot;Screenshot_preferences.png" />
+            </imageobject>
+            <textobject>
+              <phrase>The preference page of the testing environment.
+              </phrase>
+            </textobject>
+          </mediaobject>
+        </screenshot>
+      </para>
+    </section>
+    <section id="ugr.tools.tm.testing.overview.project">
+      <title>The TextMarker Project Structure</title>
+      <para>
+        The picture shows the TextMarker's script explorer. Every TextMarker project contains a
+        folder called "test". This folder is the default location for the test-files. In the
+        folder each script-file has its own sub-folder with a relative path equal to the scripts
+        package path in the "script" folder. This folder contains the test files. In every scripts
+        test-folder you will also find a result folder with the results of the tests. Should you
+        use test-files from another location in the file-system, the results will be saved in the
+        "temp" sub-folder of the projects "test" folder. All files in the "temp" folder will be
+        deleted, once eclipse is closed.
+        <screenshot>
+          <mediaobject>
+            <imageobject>
+              <imagedata scale="80" format="PNG" fileref="&imgroot;folder_struc_sep_desc_cut.png" />
+            </imageobject>
+            <textobject>
+              <phrase>Script Explorer with the test folder expanded.</phrase>
+            </textobject>
+          </mediaobject>
+        </screenshot>
+      </para>
+    </section>
+  </section>
+  
+  <section id="ugr.tools.tm.testing.usage">
+    <title>Usage</title>
+    <para> This section will demonstrate how to use the testing environment. It will show the
+      basic actions needed to perform a test run.
+    </para>
+    <para> Preparing Eclipse: The testing environment provides its own perspective called
+      "TextMarker Testing". It will display the main view as well as the different result views on
+      the right hand side. It is encouraged to use this perspective, especially when working with
+      the testing environment for the first time.
+    </para>
+    <para> Selecting a script for testing: TextMarker will always test the script, that is
+      currently open in the script-editor. Should another editor be open, for example a
+      java-editor with some java class being displayed, you will see that the testing view is not
+      available.
+    </para>
+    <para> Creating a test file: A test-file is a previously annotated .xmi file that can be used
+      as a golden standard for the test. To create such a file, no additional tools will be
+      provided, instead the TextMarker system already provides such tools.
+    </para>
+    <para> Selecting a test-file: Test files can be added to the test-list by simply dragging them
+      from the Script Explorer into the test-file list. Depending on the setting in the preference
+      page, test-files from a scripts "test" folder might already be loaded into the list. A
+      different way to add test-files is to use the "Add files from folder" button. It can be used
+      to add all .xmi files from a selected folder. The "del" key can be used to remove files from
+      the test-list.
+    </para>
+    <para> Selecting a CAS View to test: TextMarker supports different views, that allow you to
+      operate on different levels in a document. The InitialView is selected as default, however
+      you can also switch the evaluation to another view by typing the views name into the list or
+      selecting the view you wish to use from the list.
+    </para>
+    <para> Selecting the evaluator: The testing environment supports different evaluators that
+      allow a sophisticated analysis of the behavior of a TextMarker script. The evaluator can be
+      chosen in the testing environments preference page. The preference page can be opened either
+      trough the menu or by clicking the blue preference buttons in the testing views toolbar. The
+      default evaluator is the "Exact CAS Evaluator" which compares the offsets of the annotations
+      between the test file and the file annotated by the tested script.
+    </para>
+    <para> Excluding Types: During a test-run it might be convenient to disable testing for
+      specific types like punctuation or tags. The ''exclude types`` button will open a dialog
+      where all types can be selected that should not be considered in the test.
+    </para>
+    <para> Running the test: A test-run can be started by clicking on the green start button in
+      the toolbar.
+    </para>
+    <para> Result Overview: The testing main view displays some information, on how well the
+      script did, after every test run. It will display an overall number of true positive, false
+      positive and false negatives annotations of all result files as well as an overall f1-score.
+      Furthermore a table will be displayed that contains the overall statistics of the selected
+      test file as well as statistics for every single type in the test file. The information
+      displayed are true positives, false positives, false negatives, precision, recall and
+      f1-measure.
+    </para>
+    <para> The testing environment also supports the export of the overall data in form of a
+      comma-separated table. Clicking the export evaluation data will open a dialog window that
+      contains this table. The text in this table can be copied and easily imported into
+      OpenOffice.org or MS Excel.
+    </para>
+    <para>
+      Result Files: When running a test, the evaluator will create a new result .xmi file and will
+      add new true positive, false positive and false negative annotations. By clicking on a file
+      in the test-file list, you can open the corresponding result .xmi file in the TextMarker
+      script editor. When opening a result file in the script explorer, additional views will
+      open, that allow easy access and browsing of the additional debugging annotations.
+      <screenshot>
+        <mediaobject>
+          <imageobject>
+            <imagedata scale="80" format="PNG"
+              fileref="&imgroot;Screenshot_Result_TP_desc_close_cut.png" />
+          </imageobject>
+          <textobject>
+            <phrase>Open result file and selected true positive annotation in the true positive
+              view.
+            </phrase>
+          </textobject>
+        </mediaobject>
+      </screenshot>
+    </para>
+  </section>
+  <section id="ugr.tools.tm.testing.evaluators">
+    <title>Evaluators</title>
+    <para> When testing a CAS file, the system compared the offsets of the annotations of a
+      previously annotated gold standard file with the offsets of the annotations of the result
+      file the script produced. Responsible for comparing annotations in the two CAS files are
+      evaluators. These evaluators have different methods and strategies, for comparing the
+      annotations, implemented. Also a extension point is provided that allows easy implementation
+      new evaluators.
+    </para>
+    <para> Exact Match Evaluator: The Exact Match Evaluator compares the offsets of the
+      annotations in the result and the golden standard file. Any difference will be marked with
+      either an false positive or false negative annotations.
+    </para>
+    <para> Partial Match Evaluator: The Partial Match Evaluator compares the offsets of the
+      annotations in the result and golden standard file. It will allow differences in the
+      beginning or the end of an annotation. For example "corresponding" and "corresponding " will
+      not be annotated as an error.
+    </para>
+    <para> Core Match Evaluator: The Core Match Evaluator accepts annotations that share a core
+      expression. In this context a core expression is at least four digits long and starts with a
+      capitalized letter. For example the two annotations "L404-123-421" and "L404-321-412" would
+      be considered a true positive match, because of "L404" is considered a core expression that
+      is contained in both annotations.
+    </para>
+    <para> Word Accuracy Evaluator: Compares the labels of all words/numbers in an annotation,
+      whereas the label equals the type of the annotation. This has the consequence, for example,
+      that each word or number that is not part of the annotation is counted as a single false
+      negative. For example we have the sentence: "Christmas is on the 24.12 every year." The
+      script labels "Christmas is on the 12" as a single sentence, while the test file labels the
+      sentence correctly with a single sentence annotation. While for example the Exact CAS
+      Evaluator while only assign a single False Negative annotation, Word Accuracy Evaluator will
+      mark every word or number as a single False Negative.
+    </para>
+    <para> Template Only Evaluator: This Evaluator compares the offsets of the annotations and the
+      features, that have been created by the script. For example the text "Alan Mathison Turing"
+      is marked with the author annotation and "author" contains 2 features: "FirstName" and
+      "LastName". If the script now creates an author annotation with only one feature, the
+      annotation will be marked as a false positive.
+    </para>
+    <para> Template on Word Level Evaluator: The Template On Word Evaluator compares the offsets
+      of the annotations. In addition it also compares the features and feature structures and the
+      values stored in the features. For example the annotation "author" might have features like
+      "FirstName" and "LastName" The authors name is "Alan Mathison Turing" and the script
+      correctly assigns the author annotation. The feature assigned by the script are "Firstname :
+      Alan", "LastName : Mathison", while the correct feature values would be "FirstName Alan",
+      "LastName Turing". In this case the Template Only Evaluator will mark an annotation as a
+      false positive, since the feature values differ.
+    </para>
+  </section>
+</section>
\ No newline at end of file

Added: uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.textruler.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.textruler.xml?rev=1398363&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.textruler.xml (added)
+++ uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.textruler.xml Mon Oct 15 16:22:23 2012
@@ -0,0 +1,152 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"[
+<!ENTITY imgroot "images/tools/tm/workbench/" >
+<!ENTITY % uimaents SYSTEM "../../target/docbook-shared/entities.ent" >  
+%uimaents;
+]>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. 
+  See the NOTICE file distributed with this work for additional information regarding copyright ownership. 
+  The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not 
+  use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
+  Unless required by applicable law or agreed to in writing, software distributed under the License is 
+  distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+  See the License for the specific language governing permissions and limitations under the License. -->
+
+<section id="section.ugr.tools.tm.workbench.textruler">
+  <title>TextRuler</title>
+  <para> Using the knowledge engineering approach, a knowledge engineer normally writes
+    handcrafted rules to create a domain dependent information extraction application, often
+    supported by a gold standard. When starting the engineering process for the acquisition of the
+    extraction knowledge for possibly new slot or more general for new concepts, machine learning
+    methods are often able to offer support in an iterative engineering process. This section
+    gives a conceptual overview of the process model for the semi-automatic development of
+    rule-based information extraction applications.
+  </para>
+  <para> First, a suitable set of documents that contain the text fragments with interesting
+    patterns needs to be selected and annotated with the target concepts. Then, the knowledge
+    engineer chooses and configures the methods for automatic rule acquisition to the best of his
+    knowledge for the learning task: Lambda expressions based on tokens and linguistic features,
+    for example, differ in their application domain from wrappers that process generated HTML
+    pages.
+  </para>
+  <para> Furthermore, parameters like the window size defining relevant features need to be set to
+    an appropriate level. Before the annotated training documents form the input of the learning
+    task, they are enriched with features generated by the partial rule set of the developed
+    application. The result of the methods, that is the learned rules, are proposed to the
+    knowledge engineer for the extraction of the target concept.
+  </para>
+  <para> The knowledge engineer has different options to proceed: If the quality, amount or
+    generality of the presented rules is not sufficient, then additional training documents need
+    to be annotated or additional rules have to be handcrafted to provide more features in general
+    or more appropriate features. Rules or rule sets of high quality can be modified, combined or
+    generalized and transfered to the rule set of the application in order to support the
+    extraction task of the target concept. In the case that the methods did not learn reasonable
+    rules at all, the knowledge engineer proceeds with writing handcrafted rules.
+  </para>
+  <para> Having gathered enough extraction knowledge for the current concept, the semi-automatic
+    process is iterated and the focus is moved to the next concept until the development of the
+    application is completed.
+  </para>
+  <section id="ugr.tools.tm.textruler.learner">
+    <title>Available Learners</title>
+    <para> Overview ||Name||Strategy||Document||Slots||Status |BWI (1) |Boosting, Top Down
+      |Struct, Semi |Single, Boundary |Planning |LP2 (2) |Bottom Up Cover |All |Single, Boundary
+      |Prototype |RAPIER (3) |Top Down/Bottom Up Compr. |Semi |Single |Experimental |WHISK (4)
+      |Top Down Cover |All |Multi |Prototype |WIEN (5) |CSP |Struct |Multi, Rows |Prototype
+    </para>
+    <para> * Strategy: The used strategy of the learning methods are commonly coverage algorithms.
+      * Document: The type of the document may be ''free'' like in newspapers, ''semi'' or
+      ''struct'' like HTML pages. * Slots: The slots refer to a single annotation that represents
+      the goal of the learning task. Some rule are able to create several annotation at once in
+      the same context (multi-slot). However, only single slots are supported by the current
+      implementations. * Status: The current status of the implementation in the TextRuler
+      framework.
+    </para>
+    <para> Publications
+    </para>
+    <para> (1) Dayne Freitag and Nicholas Kushmerick. Boosted Wrapper Induction. In AAAI/IAAI,
+      pages 577–583, 2000.
+    </para>
+    <para> (2) F. Ciravegna. (LP)2, Rule Induction for Information Extraction Using Linguistic
+      Constraints. Technical Report CS-03-07, Department of Computer Science, University of
+      Sheffield, Sheffield, 2003.
+    </para>
+    <para> (3) Mary Elaine Califf and Raymond J. Mooney. Bottom-up Relational Learning of Pattern
+      Matching Rules for Information Extraction. Journal of Machine Learning Research, 4:177–210,
+      2003.
+    </para>
+    <para> (4) Stephen Soderland, Claire Cardie, and Raymond Mooney. Learning Information
+      Extraction Rules for Semi-Structured and Free Text. In Machine Learning, volume 34, pages
+      233–272, 1999.
+    </para>
+    <para> (5) N. Kushmerick, D. Weld, and B. Doorenbos. Wrapper Induction for Information
+      Extraction. In Proc. IJC Artificial Intelligence, 1997.
+    </para>
+    <para> BWI BWI (Boosted Wrapper Induction) uses boosting techniques to improve the performance
+      of simple pattern matching single-slot boundary wrappers (boundary detectors). Two sets of
+      detectors are learned: the "fore" and the "aft" detectors. Weighted by their confidences and
+      combined with a slot length histogram derived from the training data they can classify a
+      given pair of boundaries within a document. BWI can be used for structured, semi-structured
+      and free text. The patterns are token-based with special wildcards for more general rules.
+    </para>
+    <para> Implementations No implementations are yet available.
+    </para>
+    <para> Parameters No parameters are yet available.
+    </para>
+    <para> LP2 This method operates on all three kinds of documents. It learns separate rules for
+      the beginning and the end of a single slot. So called tagging rules insert boundary SGML
+      tags and additionally induced correction rules shift misplaced tags to their correct
+      positions in order to improve precision. The learning strategy is a bottom-up covering
+      algorithm. It starts by creating a specific seed instance with a window of w tokens to the
+      left and right of the target boundary and searches for the best generalization. Other
+      linguistic NLP-features can be used in order to generalize over the flat word sequence.
+    </para>
+    <para> Implementations LP2 (naive): LP2 (optimized):
+    </para>
+    <para> Parameters Context Window Size (to the left and right): Best Rules List Size: Minimum
+      Covered Positives per Rule: Maximum Error Threshold: Contextual Rules List Size:
+    </para>
+    <para> RAPIER RAPIER induces single slot extraction rules for semi-structured documents. The
+      rules consist of three patterns: a pre-filler, a filler and a post-filler pattern. Each can
+      hold several constraints on tokens and their according POS-tag- and semantic information.
+      The algorithm uses a bottom-up compression strategy, starting with a most specific seed rule
+      for each training instance. This initial rule base is compressed by randomly selecting rule
+      pairs and search for the best generalization. Considering two rules, the least general
+      generalization (LGG) of the slot fillers are created and specialized by adding rule items to
+      the pre- and post-filler until the new rules operate well on the training set. The best of
+      the k rules (k-beam search) is added to the rule base and all empirically subsumed rules are
+      removed.
+    </para>
+    <para> Implementations RAPIER:
+    </para>
+    <para> Parameters Maximum Compression Fail Count: Internal Rules List Size: Rule Pairs for
+      Generalizing: Maximum 'No improvement' Count: Maximum Noise Threshold: Minimum Covered
+      Positives Per Rule: PosTag Root Type: Use All 3 GenSets at Specialization:
+    </para>
+    <para> WHISK WHISK is a multi-slot method that operates on all three kinds of documents and
+      learns single- or multi-slot rules looking similar to regular expressions. The top-down
+      covering algorithm begins with the most general rule and specializes it by adding single
+      rule terms until the rule makes no errors on the training set. Domain specific classes or
+      linguistic information obtained by a syntactic analyzer can be used as additional features.
+      The exact definition of a rule term (e.g. a token) and of a problem instance (e.g. a whole
+      document or a single sentence) depends on the operating domain and document type.
+    </para>
+    <para> Implementations WHISK (token): WHISK (generic):
+    </para>
+    <para> Parameters Window Size: Maximum Error Threshold: PosTag Root Type:
+    </para>
+    <para> WIEN WIEN is the only method listed here that operates on highly structured texts only.
+      It induces so called wrappers that anchor the slots by their structured context around them.
+      The HLRT (head left right tail) wrapper class for example can determine and extract several
+      multi-slot-templates by first separating the important information block from unimportant
+      head and tail portions and then extracting multiple data rows from table like data
+      structures from the remaining document. Inducing a wrapper is done by solving a CSP for all
+      possible pattern combinations from the training data.
+    </para>
+    <para> Implementations WIEN:
+    </para>
+    <para> Parameters No parameters are available.
+    </para>
+  </section>
+</section>
\ No newline at end of file

Added: uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.tm_documentation.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.tm_documentation.xml?rev=1398363&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.tm_documentation.xml (added)
+++ uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.tm_documentation.xml Mon Oct 15 16:22:23 2012
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"[
+<!ENTITY imgroot "images/tools/tm/workbench/" >
+<!ENTITY % uimaents SYSTEM "../../target/docbook-shared/entities.ent" >  
+%uimaents;
+]>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. 
+  See the NOTICE file distributed with this work for additional information regarding copyright ownership. 
+  The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not 
+  use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
+  Unless required by applicable law or agreed to in writing, software distributed under the License is 
+  distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+  See the License for the specific language governing permissions and limitations under the License. -->
+
+<section id="section.ugr.tools.tm.workbench.tm_documentation">
+  <title>TextMarker Documentation</title>
+  <para>
+
+  </para>
+
+</section>
\ No newline at end of file

Added: uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.tm_perspective.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.tm_perspective.xml?rev=1398363&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.tm_perspective.xml (added)
+++ uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/workbench/tools.textmarker.workbench.tm_perspective.xml Mon Oct 15 16:22:23 2012
@@ -0,0 +1,133 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"[
+<!ENTITY imgroot "images/tools/tm/workbench/" >
+<!ENTITY % uimaents SYSTEM "../../target/docbook-shared/entities.ent" >  
+%uimaents;
+]>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	you under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
+
+<section id="section.ugr.tools.tm.workbench.tm_perspective">
+	<title>TextMarker Perspective</title>
+	<para>
+		The TextMarker perspective is the main view to manage TextMarker
+		projects. There are several views associated with the TextMarker
+		perspective: Annotation Test, Annotation Browser, Selection,
+		TextRuler, TextMarker Query. Since Annotation Test, TextRuler and
+		TextMarker Query have a stand-alone functionality they are explained
+		in separate sections.
+	</para>
+
+	<para>
+		To make it possible to reproduce all of the examples used below,
+		switch to the TextMarker Explain perspective within your Eclipse
+		workbench.
+		Import the TextMarker example project and open the main
+		TextMarker script file 'Main.tm'. Now press the 'Run' button (green
+		arrow)and wait for the end of execution. Open the resulting xmiCAS
+		file
+		'Test1.txt.xmi', which you can find in the output folder.
+	</para>
+
+	<section
+		id="section.ugr.tools.tm.workbench.tm_perspective.annotation_browser">
+		<title>Annotation Browser</title>
+		<para>
+			The Annotation Browser can be used to view the annotations
+			created by the execution of a TextMarker project. If an xmiCAS file
+			is opened and active in the editor, the related annotations are shown
+			in this view.
+		</para>
+		<para>
+			The result of the execution of the TextMarker example project is
+			shown in
+			<xref
+				linkend='figure.ugr.tools.tm.workbench.tm_perspective.annotation_browser' />
+			.
+		</para>
+		<para>
+			<figure
+				id="figure.ugr.tools.tm.workbench.tm_perspective.annotation_browser">
+				<title> Annotation Browser view
+				</title>
+				<mediaobject>
+					<imageobject role="html">
+						<imagedata width="300px" format="PNG" align="center"
+							fileref="&imgroot;tm/annotation_browser.png" />
+					</imageobject>
+					<imageobject role="fo">
+						<imagedata width="3.0in" format="PNG" align="center"
+							fileref="&imgroot;tm/annotation_browser.png" />
+					</imageobject>
+					<textobject>
+						<phrase>
+							Annotation Browser view.
+						</phrase>
+					</textobject>
+				</mediaobject>
+			</figure>
+		</para>
+		<para>
+			Moreover, this view has two possible filters. Using the
+			<quote>Only types with...</quote>
+			-filter lead to a list containing only those types that contain
+			the
+			entered text. The
+			<quote>Only annotations with...</quote>
+			-filter leads to an analog list.
+		</para>
+	</section>
+
+	<section id="section.ugr.tools.tm.workbench.tm_perspective.selection">
+		<title>Selection</title>
+		<para>
+			The Selection view is very similar to the Annotation Browser
+			view, but only shows annotations that affect a specific text passage.
+			To get such a list, click on any position in the opened xmiCAS
+			document or select a certain text passage.
+		</para>
+		<para>
+			E.g., if you select the text passage
+			<literal>2008</literal>
+			, the Selection view will be generated as shown in
+			<xref
+				linkend='figure.ugr.tools.tm.workbench.tm_perspective.annotation_browser' />
+			.
+		</para>
+		<para>
+			<figure id="figure.ugr.tools.tm.workbench.tm_perspective.selection">
+				<title> Selection view
+				</title>
+				<mediaobject>
+					<imageobject role="html">
+						<imagedata width="560px" format="PNG" align="center"
+							fileref="&imgroot;tm/selection.png" />
+					</imageobject>
+					<imageobject role="fo">
+						<imagedata width="5.5in" format="PNG" align="center"
+							fileref="&imgroot;tm/selection.png" />
+					</imageobject>
+					<textobject>
+						<phrase>
+							Selection view.
+						</phrase>
+					</textobject>
+				</mediaobject>
+			</figure>
+		</para>
+		<para>
+			The Selection view has the same filtering possibilities as
+			described in Annotation Browser view.
+		</para>
+	</section>
+
+</section>
\ No newline at end of file