You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by mb...@apache.org on 2007/05/29 12:51:58 UTC

svn commit: r542483 - in /incubator/uima/uimaj/trunk/uimaj-examples/src/main: data/Apache_UIMA.txt descriptors/analysis_engine/SimpleEmailRecognizer_RegEx_TAE.xml descriptors/analysis_engine/UIMA_Analysis_Example.xml

Author: mbaessler
Date: Tue May 29 03:51:57 2007
New Revision: 542483

URL: http://svn.apache.org/viewvc?view=rev&rev=542483
Log:
UIMA-418

add EmailAddress Regex analysis engine (based on Regex annotator)

JIRA ticket https://issues.apache.org/jira/browse/UIMA-418

Added:
    incubator/uima/uimaj/trunk/uimaj-examples/src/main/data/Apache_UIMA.txt
    incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/SimpleEmailRecognizer_RegEx_TAE.xml
    incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/UIMA_Analysis_Example.xml

Added: incubator/uima/uimaj/trunk/uimaj-examples/src/main/data/Apache_UIMA.txt
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-examples/src/main/data/Apache_UIMA.txt?view=auto&rev=542483
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-examples/src/main/data/Apache_UIMA.txt (added)
+++ incubator/uima/uimaj/trunk/uimaj-examples/src/main/data/Apache_UIMA.txt Tue May 29 03:51:57 2007
@@ -0,0 +1,38 @@
+Welcome to Apache UIMA (Unstructured Information Management Architecture), a incubator project of the  Apache Software Foundation (ASF). 
+Our goal is a thriving community of users and developers of UIMA frameworks, supporting components for analysing unstructured content such as text, audio and video.
+
+What is UIMA?
+
+Unstructured Information Management applications are software systems that analyze large volumes of unstructured information in order to discover knowledge that is relevant to an end user. 
+UIMA is a framework and SDK for developing such applications. An example UIM application might ingest plain text and identify entities, such as persons, places, organizations; or relations, such as works-for or located-at. 
+UIMA enables such an application to be decomposed into components, for example "language identification" -> "language specific segmentation" -> "sentence boundary detection" -> "entity detection (person/place names etc.)". 
+Each component must implement interfaces defined by the framework and must provide self-describing metadata via XML descriptor files. The framework manages these components and the data flow between them. Components are written in Java or C++; the data that flows between components is designed for efficient mapping between these languages. 
+UIMA additionally provides capabilities to wrap components as network services, and can scale to very large volumes by replicating processing pipelines over a cluster of networked nodes.
+
+Apache UIMA is an Apache-licensed open source implementation of the UIMA specification (that specification is, in turn, being developed concurrently by a technical committee within OASIS , a standards organization). 
+We invite and encourage you to participate in both the implementation and specification efforts.
+
+UIMA is a component framework for analysing unstructured content such as text, audio and video. 
+It comprises an SDK and tooling for composing and running analytic components written in Java and C++, with some support for Perl, Python and TCL. 
+
+
+The Apache UIMA mailing lists are:
+
+Users - uima-user@incubator.apache.org
+Developers - uima-dev@incubator.apache.org
+Commits - uima-commits@incubator.apache.org
+
+
+The initial UIMA project committers are:
+
+Michael Baessler
+Edward Epstein
+Thilo Goetz
+Adam Lally
+Marshall Schor
+
+
+The UIMA project Mentors are:
+
+Ken Coar (ASF member and Vice President)
+Sam Ruby (ASF member)
\ No newline at end of file

Added: incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/SimpleEmailRecognizer_RegEx_TAE.xml
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/SimpleEmailRecognizer_RegEx_TAE.xml?view=auto&rev=542483
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/SimpleEmailRecognizer_RegEx_TAE.xml (added)
+++ incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/SimpleEmailRecognizer_RegEx_TAE.xml Tue May 29 03:51:57 2007
@@ -0,0 +1,110 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+	<!--
+	 ***************************************************************
+	 * Licensed to the Apache Software Foundation (ASF) under one
+	 * or more contributor license agreements.  See the NOTICE file
+	 * distributed with this work for additional information
+	 * regarding copyright ownership.  The ASF licenses this file
+	 * to you under the Apache License, Version 2.0 (the
+	 * "License"); you may not use this file except in compliance
+	 * with the License.  You may obtain a copy of the License at
+         *
+	 *   http://www.apache.org/licenses/LICENSE-2.0
+	 * 
+	 * Unless required by applicable law or agreed to in writing,
+	 * software distributed under the License is distributed on an
+	 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+	 * KIND, either express or implied.  See the License for the
+	 * specific language governing permissions and limitations
+	 * under the License.
+	 ***************************************************************
+   -->
+   
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.uima.examples.cas.RegExAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>Simple Name Recognizer using Regular Expressions</name>
+    <description>Detects Names using a simple regular expression.</description>
+    <configurationParameters>
+      <configurationParameter>
+        <name>Patterns</name>
+        <description>Regular expression patterns to match.  The language is that supported by Java 1.4.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>TypeNames</name>
+        <description>Names of CAS Types to create for the patterns found.  The indexes of this array
+correspond to the indexes of the Patterns or PatternFiles arrays.  If a match is found for
+Patterns[i], it will result in an annotation of type
+TypeNames[i].</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>ContainingAnnotationTypes</name>
+        <description>Names of CAS Input Types within which annotations should be created.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>AnnotateEntireContainingAnnotation</name>
+        <description>When the ContainingAnnoationTypes parameter is specified, a value of true for this
+	parameter will cause the entire containing annotation to be used as the span of the new
+	annotation, rather than just the span of the regular expression match.  This can be used
+	to "classify" previously created annotations according to whether or not they contain
+	text matching a regular expression.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>Patterns</name>
+        <value>
+          <array>
+            <string>[a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+[a-zA-Z]{2,4}</string>
+          </array>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>TypeNames</name>
+        <value>
+          <array>
+            <string>example.EmailAddress</string>
+          </array>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <types>
+        <typeDescription>
+          <name>example.EmailAddress</name>
+          <description>Email Address</description>
+          <supertypeName>uima.tcas.Annotation</supertypeName>
+        </typeDescription>
+      </types>
+    </typeSystemDescription>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+         <type>example.EmailAddress</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+	<operationalProperties>
+		<modifiesCas>true</modifiesCas>
+		<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+		<outputsNewCASes>false</outputsNewCASes>
+	</operationalProperties>
+  </analysisEngineMetaData>
+</analysisEngineDescription>

Added: incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/UIMA_Analysis_Example.xml
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/UIMA_Analysis_Example.xml?view=auto&rev=542483
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/UIMA_Analysis_Example.xml (added)
+++ incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/UIMA_Analysis_Example.xml Tue May 29 03:51:57 2007
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+	<!--
+	 ***************************************************************
+	 * Licensed to the Apache Software Foundation (ASF) under one
+	 * or more contributor license agreements.  See the NOTICE file
+	 * distributed with this work for additional information
+	 * regarding copyright ownership.  The ASF licenses this file
+	 * to you under the Apache License, Version 2.0 (the
+	 * "License"); you may not use this file except in compliance
+	 * with the License.  You may obtain a copy of the License at
+         *
+	 *   http://www.apache.org/licenses/LICENSE-2.0
+	 * 
+	 * Unless required by applicable law or agreed to in writing,
+	 * software distributed under the License is distributed on an
+	 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+	 * KIND, either express or implied.  See the License for the
+	 * specific language governing permissions and limitations
+	 * under the License.
+	 ***************************************************************
+   -->
+   
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>false</primitive>
+  <delegateAnalysisEngineSpecifiers>
+    <delegateAnalysisEngine key="TokenAndSentence">
+      <import location="SimpleTokenAndSentenceAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="NamesAndPersonTitles">
+      <import location="NamesAndPersonTitles_TAE.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="Email">
+      <import location="SimpleEmailRecognizer_RegEx_TAE.xml"/>
+    </delegateAnalysisEngine>
+  </delegateAnalysisEngineSpecifiers>
+  <analysisEngineMetaData>
+    <name>Aggregate TAE - Tokenizer, Name Recognizer, Person Title and Email Address Annotator</name>
+    <description>Detects Tokens, Sentences, Names, Person Titles and Email Addresses</description>
+    <configurationParameters/>
+    <configurationParameterSettings/>
+    <flowConstraints>
+      <fixedFlow>
+        <node>TokenAndSentence</node>
+        <node>NamesAndPersonTitles</node>
+        <node>Email</node>
+      </fixedFlow>
+    </flowConstraints>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.uima.examples.tokenizer.Sentence</type>
+          <type allAnnotatorFeatures="true">org.apache.uima.examples.tokenizer.Token</type>
+          <type>example.Name</type>
+          <type allAnnotatorFeatures="true">example.PersonTitle</type>
+          <type>example.EmailAddress</type>
+        </outputs>
+        <languagesSupported>
+          <language>en</language>
+        </languagesSupported>
+      </capability>
+    </capabilities>
+	<operationalProperties>
+		<modifiesCas>true</modifiesCas>
+		<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+		<outputsNewCASes>false</outputsNewCASes>
+	</operationalProperties>
+  </analysisEngineMetaData>
+</analysisEngineDescription>