You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/09/27 14:35:09 UTC

svn commit: r1176357 - /incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml

Author: joern
Date: Tue Sep 27 12:35:08 2011
New Revision: 1176357

URL: http://svn.apache.org/viewvc?rev=1176357&view=rev
Log:
OPENNLP-261 Added analysis engine xml file which is derived from Apache UIMAs example RegExAnnotator.xml.

Added:
    incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml   (with props)

Added: incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml?rev=1176357&view=auto
==============================================================================
--- incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml (added)
+++ incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml Tue Sep 27 12:35:08 2011
@@ -0,0 +1,123 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+	<!--
+	 ***************************************************************
+	 * Licensed to the Apache Software Foundation (ASF) under one
+	 * or more contributor license agreements.  See the NOTICE file
+	 * distributed with this work for additional information
+	 * regarding copyright ownership.  The ASF licenses this file
+	 * to you under the Apache License, Version 2.0 (the
+	 * "License"); you may not use this file except in compliance
+	 * with the License.  You may obtain a copy of the License at
+         *
+	 *   http://www.apache.org/licenses/LICENSE-2.0
+	 * 
+	 * Unless required by applicable law or agreed to in writing,
+	 * software distributed under the License is distributed on an
+	 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+	 * KIND, either express or implied.  See the License for the
+	 * specific language governing permissions and limitations
+	 * under the License.
+	 ***************************************************************
+   -->
+   
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.uima.examples.cas.RegExAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>RegEx Annotator</name>
+    <description>Matches regular expressions in document text.</description>
+    <configurationParameters>
+      <configurationParameter>
+        <name>Patterns</name>
+        <description>Regular expression patterns to match.  The language is that supported by Java 1.4.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>TypeNames</name>
+        <description>Names of CAS Types to create for the patterns found.  The indexes of this array
+correspond to the indexes of the Patterns or PatternFiles arrays.  If a match is found for
+Patterns[i] or for any pattern in PatternFile[i], it will result in an annotation of type
+TypeNames[i].</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>ContainingAnnotationTypes</name>
+        <description>Names of CAS Input Types within which annotations should be created.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>AnnotateEntireContainingAnnotation</name>
+        <description>When the ContainingAnnoationTypes parameter is specified, a value of true for this
+	parameter will cause the entire containing annotation to be used as the span of the new
+	annotation, rather than just the span of the regular expression match.  This can be used
+	to "classify" previously created annotations according to whether or not they contain
+	text matching a regular expression.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>Patterns</name>
+        <value>
+          <array>
+            <string>President|Governor|Senator|Representative|Gov\.|Sen\.|Rep\.</string>
+          </array>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>TypeNames</name>
+        <value>
+          <array>
+            <string>GovernmentTitle</string>
+          </array>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <types>
+        <typeDescription>
+          <name>GovernmentTitle</name>
+          <description>The title of a government official.</description>
+          <supertypeName>uima.tcas.Annotation</supertypeName>
+        </typeDescription>
+      </types>
+    </typeSystemDescription>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type>GovernmentTitle</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+	<operationalProperties>
+		<modifiesCas>true</modifiesCas>
+		<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+		<outputsNewCASes>false</outputsNewCASes>
+	</operationalProperties>
+  </analysisEngineMetaData>
+  <externalResourceDependencies>
+    <externalResourceDependency>
+      <key>PatternFile</key>
+      <description>An optional external file containing regular expressions to match. File format is as follows: 
+		  - Lines starting with # or whitepsace are ignored 
+		  - Lines starting with % indicate an annotation type. 
+		  - All other lines are regular expressions. The language is that supported by Java 1.4. 
+		    If a regular expression is matched, it will be annotated with the last annotation type 
+		    declared (the nearest preceding line starting with %).
+	  </description>
+      <optional>true</optional>
+    </externalResourceDependency>
+  </externalResourceDependencies>
+</analysisEngineDescription>

Propchange: incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain