You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/09/27 14:44:50 UTC

svn commit: r1176362 - /incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml

Author: joern
Date: Tue Sep 27 12:44:50 2011
New Revision: 1176362

URL: http://svn.apache.org/viewvc?rev=1176362&view=rev
Log:
OPENNLP-261 Adapted descriptor to work with CSCasWriter.

Modified:
    incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml

Modified: incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml?rev=1176362&r1=1176361&r2=1176362&view=diff
==============================================================================
--- incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml (original)
+++ incubator/opennlp/sandbox/corpus-server-connector/desc/CSCasWriter.xml Tue Sep 27 12:44:50 2011
@@ -24,80 +24,38 @@
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
   <primitive>true</primitive>
-  <annotatorImplementationName>org.apache.uima.examples.cas.RegExAnnotator</annotatorImplementationName>
+  <annotatorImplementationName>org.apache.opennlp.corpus_server.connector.CSCasWriter</annotatorImplementationName>
   <analysisEngineMetaData>
-    <name>RegEx Annotator</name>
-    <description>Matches regular expressions in document text.</description>
+    <name>Corpus Server Cas Writer</name>
+    <description>Writes a CAS (back) to the Corpus Server.</description>
     <configurationParameters>
-      <configurationParameter>
-        <name>Patterns</name>
-        <description>Regular expression patterns to match.  The language is that supported by Java 1.4.</description>
-        <type>String</type>
-        <multiValued>true</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>TypeNames</name>
-        <description>Names of CAS Types to create for the patterns found.  The indexes of this array
-correspond to the indexes of the Patterns or PatternFiles arrays.  If a match is found for
-Patterns[i] or for any pattern in PatternFile[i], it will result in an annotation of type
-TypeNames[i].</description>
-        <type>String</type>
-        <multiValued>true</multiValued>
-        <mandatory>true</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>ContainingAnnotationTypes</name>
-        <description>Names of CAS Input Types within which annotations should be created.</description>
-        <type>String</type>
-        <multiValued>true</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>AnnotateEntireContainingAnnotation</name>
-        <description>When the ContainingAnnoationTypes parameter is specified, a value of true for this
-	parameter will cause the entire containing annotation to be used as the span of the new
-	annotation, rather than just the span of the regular expression match.  This can be used
-	to "classify" previously created annotations according to whether or not they contain
-	text matching a regular expression.</description>
-        <type>Boolean</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
+		<configurationParameter>
+			<name>CorpusAddress</name>
+			<description>Directory containing input files</description>
+			<type>String</type>
+			<multiValued>false</multiValued>
+			<mandatory>true</mandatory>
+		</configurationParameter>
     </configurationParameters>
     <configurationParameterSettings>
-      <nameValuePair>
-        <name>Patterns</name>
-        <value>
-          <array>
-            <string>President|Governor|Senator|Representative|Gov\.|Sen\.|Rep\.</string>
-          </array>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>TypeNames</name>
-        <value>
-          <array>
-            <string>GovernmentTitle</string>
-          </array>
-        </value>
-      </nameValuePair>
+		<nameValuePair>
+			<name>CorpusAddress</name>
+			<value>
+				<string>http://localhost:8080/corpus-server/rest/corpora/wikinews</string>
+			</value>
+		</nameValuePair>
     </configurationParameterSettings>
-    <typeSystemDescription>
-      <types>
-        <typeDescription>
-          <name>GovernmentTitle</name>
-          <description>The title of a government official.</description>
-          <supertypeName>uima.tcas.Annotation</supertypeName>
-        </typeDescription>
-      </types>
-    </typeSystemDescription>
+    
+	<!-- TODO: Can TS be imported via http?! Otherwise it must be downloaded by the user! -->
+	<typeSystemDescription>
+		<imports>
+			<import name="org.apache.uima.examples.SourceDocumentInformation"/>
+		</imports>
+	</typeSystemDescription>
     <capabilities>
       <capability>
         <inputs/>
-        <outputs>
-          <type>GovernmentTitle</type>
-        </outputs>
+        <outputs/>
         <languagesSupported/>
       </capability>
     </capabilities>
@@ -107,17 +65,4 @@ TypeNames[i].</description>
 		<outputsNewCASes>false</outputsNewCASes>
 	</operationalProperties>
   </analysisEngineMetaData>
-  <externalResourceDependencies>
-    <externalResourceDependency>
-      <key>PatternFile</key>
-      <description>An optional external file containing regular expressions to match. File format is as follows: 
-		  - Lines starting with # or whitepsace are ignored 
-		  - Lines starting with % indicate an annotation type. 
-		  - All other lines are regular expressions. The language is that supported by Java 1.4. 
-		    If a regular expression is matched, it will be annotated with the last annotation type 
-		    declared (the nearest preceding line starting with %).
-	  </description>
-      <optional>true</optional>
-    </externalResourceDependency>
-  </externalResourceDependencies>
 </analysisEngineDescription>