You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/09/27 14:44:50 UTC
svn commit: r1176362 -
/incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml
Author: joern
Date: Tue Sep 27 12:44:50 2011
New Revision: 1176362
URL: http://svn.apache.org/viewvc?rev=1176362&view=rev
Log:
OPENNLP-261 Adapted descriptor to work with CSCasWriter.
Modified:
incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml
Modified: incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml?rev=1176362&r1=1176361&r2=1176362&view=diff
==============================================================================
--- incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml (original)
+++ incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml Tue Sep 27 12:44:50 2011
@@ -24,80 +24,38 @@
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
- <annotatorImplementationName>org.apache.uima.examples.cas.RegExAnnotator</annotatorImplementationName>
+ <annotatorImplementationName>org.apache.opennlp.corpus_server.connector.CSCasWriter</annotatorImplementationName>
<analysisEngineMetaData>
- <name>RegEx Annotator</name>
- <description>Matches regular expressions in document text.</description>
+ <name>Corpus Server Cas Writer</name>
+ <description>Writes a CAS (back) to the Corpus Server.</description>
<configurationParameters>
- <configurationParameter>
- <name>Patterns</name>
- <description>Regular expression patterns to match. The language is that supported by Java 1.4.</description>
- <type>String</type>
- <multiValued>true</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>TypeNames</name>
- <description>Names of CAS Types to create for the patterns found. The indexes of this array
-correspond to the indexes of the Patterns or PatternFiles arrays. If a match is found for
-Patterns[i] or for any pattern in PatternFile[i], it will result in an annotation of type
-TypeNames[i].</description>
- <type>String</type>
- <multiValued>true</multiValued>
- <mandatory>true</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>ContainingAnnotationTypes</name>
- <description>Names of CAS Input Types within which annotations should be created.</description>
- <type>String</type>
- <multiValued>true</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
- <configurationParameter>
- <name>AnnotateEntireContainingAnnotation</name>
- <description>When the ContainingAnnoationTypes parameter is specified, a value of true for this
- parameter will cause the entire containing annotation to be used as the span of the new
- annotation, rather than just the span of the regular expression match. This can be used
- to "classify" previously created annotations according to whether or not they contain
- text matching a regular expression.</description>
- <type>Boolean</type>
- <multiValued>false</multiValued>
- <mandatory>false</mandatory>
- </configurationParameter>
+ <configurationParameter>
+ <name>CorpusAddress</name>
+ <description>Directory containing input files</description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
</configurationParameters>
<configurationParameterSettings>
- <nameValuePair>
- <name>Patterns</name>
- <value>
- <array>
- <string>President|Governor|Senator|Representative|Gov\.|Sen\.|Rep\.</string>
- </array>
- </value>
- </nameValuePair>
- <nameValuePair>
- <name>TypeNames</name>
- <value>
- <array>
- <string>GovernmentTitle</string>
- </array>
- </value>
- </nameValuePair>
+ <nameValuePair>
+ <name>CorpusAddress</name>
+ <value>
+ <string>http://localhost:8080/corpus-server/rest/corpora/wikinews</string>
+ </value>
+ </nameValuePair>
</configurationParameterSettings>
- <typeSystemDescription>
- <types>
- <typeDescription>
- <name>GovernmentTitle</name>
- <description>The title of a government official.</description>
- <supertypeName>uima.tcas.Annotation</supertypeName>
- </typeDescription>
- </types>
- </typeSystemDescription>
+
+ <!-- TODO: Can TS be imported via http?! Otherwise it must be downloaded by the user! -->
+ <typeSystemDescription>
+ <imports>
+ <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+ </imports>
+ </typeSystemDescription>
<capabilities>
<capability>
<inputs/>
- <outputs>
- <type>GovernmentTitle</type>
- </outputs>
+ <outputs/>
<languagesSupported/>
</capability>
</capabilities>
@@ -107,17 +65,4 @@ TypeNames[i].</description>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
- <externalResourceDependencies>
- <externalResourceDependency>
- <key>PatternFile</key>
- <description>An optional external file containing regular expressions to match. File format is as follows:
- - Lines starting with # or whitepsace are ignored
- - Lines starting with % indicate an annotation type.
- - All other lines are regular expressions. The language is that supported by Java 1.4.
- If a regular expression is matched, it will be annotated with the last annotation type
- declared (the nearest preceding line starting with %).
- </description>
- <optional>true</optional>
- </externalResourceDependency>
- </externalResourceDependencies>
</analysisEngineDescription>