You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/09/27 14:35:09 UTC
svn commit: r1176357 -
/incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml
Author: joern
Date: Tue Sep 27 12:35:08 2011
New Revision: 1176357
URL: http://svn.apache.org/viewvc?rev=1176357&view=rev
Log:
OPENNLP-261 Added analysis engine xml file which is derived from Apache UIMAs example RegExAnnotator.xml.
Added:
incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml (with props)
Added: incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml?rev=1176357&view=auto
==============================================================================
--- incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml (added)
+++ incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml Tue Sep 27 12:35:08 2011
@@ -0,0 +1,123 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+ <!--
+ ***************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ***************************************************************
+ -->
+
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>true</primitive>
+ <annotatorImplementationName>org.apache.uima.examples.cas.RegExAnnotator</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>RegEx Annotator</name>
+ <description>Matches regular expressions in document text.</description>
+ <configurationParameters>
+ <configurationParameter>
+ <name>Patterns</name>
+ <description>Regular expression patterns to match. The language is that supported by Java 1.4.</description>
+ <type>String</type>
+ <multiValued>true</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>TypeNames</name>
+ <description>Names of CAS Types to create for the patterns found. The indexes of this array
+correspond to the indexes of the Patterns or PatternFiles arrays. If a match is found for
+Patterns[i] or for any pattern in PatternFile[i], it will result in an annotation of type
+TypeNames[i].</description>
+ <type>String</type>
+ <multiValued>true</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>ContainingAnnotationTypes</name>
+ <description>Names of CAS Input Types within which annotations should be created.</description>
+ <type>String</type>
+ <multiValued>true</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>AnnotateEntireContainingAnnotation</name>
+ <description>When the ContainingAnnoationTypes parameter is specified, a value of true for this
+ parameter will cause the entire containing annotation to be used as the span of the new
+ annotation, rather than just the span of the regular expression match. This can be used
+ to "classify" previously created annotations according to whether or not they contain
+ text matching a regular expression.</description>
+ <type>Boolean</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>Patterns</name>
+ <value>
+ <array>
+ <string>President|Governor|Senator|Representative|Gov\.|Sen\.|Rep\.</string>
+ </array>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>TypeNames</name>
+ <value>
+ <array>
+ <string>GovernmentTitle</string>
+ </array>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+ <typeSystemDescription>
+ <types>
+ <typeDescription>
+ <name>GovernmentTitle</name>
+ <description>The title of a government official.</description>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ </typeDescription>
+ </types>
+ </typeSystemDescription>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs>
+ <type>GovernmentTitle</type>
+ </outputs>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+ <externalResourceDependencies>
+ <externalResourceDependency>
+ <key>PatternFile</key>
+ <description>An optional external file containing regular expressions to match. File format is as follows:
+ - Lines starting with # or whitepsace are ignored
+ - Lines starting with % indicate an annotation type.
+ - All other lines are regular expressions. The language is that supported by Java 1.4.
+ If a regular expression is matched, it will be annotated with the last annotation type
+ declared (the nearest preceding line starting with %).
+ </description>
+ <optional>true</optional>
+ </externalResourceDependency>
+ </externalResourceDependencies>
+</analysisEngineDescription>
Propchange: incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain