You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by mb...@apache.org on 2007/05/29 12:51:58 UTC
svn commit: r542483 - in
/incubator/uima/uimaj/trunk/uimaj-examples/src/main: data/Apache_UIMA.txt
descriptors/analysis_engine/SimpleEmailRecognizer_RegEx_TAE.xml
descriptors/analysis_engine/UIMA_Analysis_Example.xml
Author: mbaessler
Date: Tue May 29 03:51:57 2007
New Revision: 542483
URL: http://svn.apache.org/viewvc?view=rev&rev=542483
Log:
UIMA-418
add EmailAddress Regex analysis engine (based on Regex annotator)
JIRA ticket https://issues.apache.org/jira/browse/UIMA-418
Added:
incubator/uima/uimaj/trunk/uimaj-examples/src/main/data/Apache_UIMA.txt
incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/SimpleEmailRecognizer_RegEx_TAE.xml
incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/UIMA_Analysis_Example.xml
Added: incubator/uima/uimaj/trunk/uimaj-examples/src/main/data/Apache_UIMA.txt
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-examples/src/main/data/Apache_UIMA.txt?view=auto&rev=542483
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-examples/src/main/data/Apache_UIMA.txt (added)
+++ incubator/uima/uimaj/trunk/uimaj-examples/src/main/data/Apache_UIMA.txt Tue May 29 03:51:57 2007
@@ -0,0 +1,38 @@
+Welcome to Apache UIMA (Unstructured Information Management Architecture), a incubator project of the Apache Software Foundation (ASF).
+Our goal is a thriving community of users and developers of UIMA frameworks, supporting components for analysing unstructured content such as text, audio and video.
+
+What is UIMA?
+
+Unstructured Information Management applications are software systems that analyze large volumes of unstructured information in order to discover knowledge that is relevant to an end user.
+UIMA is a framework and SDK for developing such applications. An example UIM application might ingest plain text and identify entities, such as persons, places, organizations; or relations, such as works-for or located-at.
+UIMA enables such an application to be decomposed into components, for example "language identification" -> "language specific segmentation" -> "sentence boundary detection" -> "entity detection (person/place names etc.)".
+Each component must implement interfaces defined by the framework and must provide self-describing metadata via XML descriptor files. The framework manages these components and the data flow between them. Components are written in Java or C++; the data that flows between components is designed for efficient mapping between these languages.
+UIMA additionally provides capabilities to wrap components as network services, and can scale to very large volumes by replicating processing pipelines over a cluster of networked nodes.
+
+Apache UIMA is an Apache-licensed open source implementation of the UIMA specification (that specification is, in turn, being developed concurrently by a technical committee within OASIS , a standards organization).
+We invite and encourage you to participate in both the implementation and specification efforts.
+
+UIMA is a component framework for analysing unstructured content such as text, audio and video.
+It comprises an SDK and tooling for composing and running analytic components written in Java and C++, with some support for Perl, Python and TCL.
+
+
+The Apache UIMA mailing lists are:
+
+Users - uima-user@incubator.apache.org
+Developers - uima-dev@incubator.apache.org
+Commits - uima-commits@incubator.apache.org
+
+
+The initial UIMA project committers are:
+
+Michael Baessler
+Edward Epstein
+Thilo Goetz
+Adam Lally
+Marshall Schor
+
+
+The UIMA project Mentors are:
+
+Ken Coar (ASF member and Vice President)
+Sam Ruby (ASF member)
\ No newline at end of file
Added: incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/SimpleEmailRecognizer_RegEx_TAE.xml
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/SimpleEmailRecognizer_RegEx_TAE.xml?view=auto&rev=542483
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/SimpleEmailRecognizer_RegEx_TAE.xml (added)
+++ incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/SimpleEmailRecognizer_RegEx_TAE.xml Tue May 29 03:51:57 2007
@@ -0,0 +1,110 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+ <!--
+ ***************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ***************************************************************
+ -->
+
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>true</primitive>
+ <annotatorImplementationName>org.apache.uima.examples.cas.RegExAnnotator</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>Simple Name Recognizer using Regular Expressions</name>
+ <description>Detects Names using a simple regular expression.</description>
+ <configurationParameters>
+ <configurationParameter>
+ <name>Patterns</name>
+ <description>Regular expression patterns to match. The language is that supported by Java 1.4.</description>
+ <type>String</type>
+ <multiValued>true</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>TypeNames</name>
+ <description>Names of CAS Types to create for the patterns found. The indexes of this array
+correspond to the indexes of the Patterns or PatternFiles arrays. If a match is found for
+Patterns[i], it will result in an annotation of type
+TypeNames[i].</description>
+ <type>String</type>
+ <multiValued>true</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>ContainingAnnotationTypes</name>
+ <description>Names of CAS Input Types within which annotations should be created.</description>
+ <type>String</type>
+ <multiValued>true</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>AnnotateEntireContainingAnnotation</name>
+ <description>When the ContainingAnnoationTypes parameter is specified, a value of true for this
+ parameter will cause the entire containing annotation to be used as the span of the new
+ annotation, rather than just the span of the regular expression match. This can be used
+ to "classify" previously created annotations according to whether or not they contain
+ text matching a regular expression.</description>
+ <type>Boolean</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>Patterns</name>
+ <value>
+ <array>
+ <string>[a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+[a-zA-Z]{2,4}</string>
+ </array>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>TypeNames</name>
+ <value>
+ <array>
+ <string>example.EmailAddress</string>
+ </array>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+ <typeSystemDescription>
+ <types>
+ <typeDescription>
+ <name>example.EmailAddress</name>
+ <description>Email Address</description>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ </typeDescription>
+ </types>
+ </typeSystemDescription>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs>
+ <type>example.EmailAddress</type>
+ </outputs>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+</analysisEngineDescription>
Added: incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/UIMA_Analysis_Example.xml
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/UIMA_Analysis_Example.xml?view=auto&rev=542483
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/UIMA_Analysis_Example.xml (added)
+++ incubator/uima/uimaj/trunk/uimaj-examples/src/main/descriptors/analysis_engine/UIMA_Analysis_Example.xml Tue May 29 03:51:57 2007
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+ <!--
+ ***************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ***************************************************************
+ -->
+
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>false</primitive>
+ <delegateAnalysisEngineSpecifiers>
+ <delegateAnalysisEngine key="TokenAndSentence">
+ <import location="SimpleTokenAndSentenceAnnotator.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="NamesAndPersonTitles">
+ <import location="NamesAndPersonTitles_TAE.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="Email">
+ <import location="SimpleEmailRecognizer_RegEx_TAE.xml"/>
+ </delegateAnalysisEngine>
+ </delegateAnalysisEngineSpecifiers>
+ <analysisEngineMetaData>
+ <name>Aggregate TAE - Tokenizer, Name Recognizer, Person Title and Email Address Annotator</name>
+ <description>Detects Tokens, Sentences, Names, Person Titles and Email Addresses</description>
+ <configurationParameters/>
+ <configurationParameterSettings/>
+ <flowConstraints>
+ <fixedFlow>
+ <node>TokenAndSentence</node>
+ <node>NamesAndPersonTitles</node>
+ <node>Email</node>
+ </fixedFlow>
+ </flowConstraints>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs>
+ <type allAnnotatorFeatures="true">org.apache.uima.examples.tokenizer.Sentence</type>
+ <type allAnnotatorFeatures="true">org.apache.uima.examples.tokenizer.Token</type>
+ <type>example.Name</type>
+ <type allAnnotatorFeatures="true">example.PersonTitle</type>
+ <type>example.EmailAddress</type>
+ </outputs>
+ <languagesSupported>
+ <language>en</language>
+ </languagesSupported>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+</analysisEngineDescription>