You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by mb...@apache.org on 2007/09/25 12:58:12 UTC

svn commit: r579196 - in /incubator/uima/sandbox/trunk/RegularExpressionAnnotator/desc: RegExAnnotator.xml concepts.xml

Author: mbaessler
Date: Tue Sep 25 03:58:04 2007
New Revision: 579196

URL: http://svn.apache.org/viewvc?rev=579196&view=rev
Log:
UIMA-539

update regex rules file

https://issues.apache.org/jira/browse/UIMA-539

Modified:
    incubator/uima/sandbox/trunk/RegularExpressionAnnotator/desc/RegExAnnotator.xml
    incubator/uima/sandbox/trunk/RegularExpressionAnnotator/desc/concepts.xml

Modified: incubator/uima/sandbox/trunk/RegularExpressionAnnotator/desc/RegExAnnotator.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/RegularExpressionAnnotator/desc/RegExAnnotator.xml?rev=579196&r1=579195&r2=579196&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/RegularExpressionAnnotator/desc/RegExAnnotator.xml (original)
+++ incubator/uima/sandbox/trunk/RegularExpressionAnnotator/desc/RegExAnnotator.xml Tue Sep 25 03:58:04 2007
@@ -52,38 +52,24 @@
     <typeSystemDescription>
       <types>
         <typeDescription>
-          <name>org.apache.uima.TestAnnot</name>
+          <name>org.apache.uima.EmailAddress</name>
           <description/>
           <supertypeName>uima.tcas.Annotation</supertypeName>
           <features>
             <featureDescription>
-              <name>testFeature</name>
+              <name>localPart</name>
               <description/>
               <rangeTypeName>uima.cas.String</rangeTypeName>
             </featureDescription>
             <featureDescription>
-              <name>testFeature1</name>
-              <description/>
-              <rangeTypeName>org.apache.uima.TestAnnot1</rangeTypeName>
-            </featureDescription>
-            <featureDescription>
-              <name>confidenceValue</name>
-              <description/>
-              <rangeTypeName>uima.cas.Float</rangeTypeName>
-            </featureDescription>
-            <featureDescription>
-              <name>ruleId</name>
+              <name>domainPart</name>
               <description/>
               <rangeTypeName>uima.cas.String</rangeTypeName>
             </featureDescription>
           </features>
         </typeDescription>
-        <typeDescription>
-          <name>org.apache.uima.TestAnnot1</name>
-          <description/>
-          <supertypeName>uima.tcas.Annotation</supertypeName>
-        </typeDescription>
-      </types>
+         
+       </types>
     </typeSystemDescription>
     <capabilities>
       <capability>

Modified: incubator/uima/sandbox/trunk/RegularExpressionAnnotator/desc/concepts.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/RegularExpressionAnnotator/desc/concepts.xml?rev=579196&r1=579195&r2=579196&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/RegularExpressionAnnotator/desc/concepts.xml (original)
+++ incubator/uima/sandbox/trunk/RegularExpressionAnnotator/desc/concepts.xml Tue Sep 25 03:58:04 2007
@@ -19,59 +19,18 @@
  -->
 <conceptSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="concept.xsd">
 	
-    <concept name="carConcept">
+    <concept name="emailAddressDetection">
     	<rules>
-			<rule ruleId="ID1" regEx="TestRegex" matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation" confidence="1.0">
-				<matchTypeFilter>
-					<feature name="language">en</feature>
-				</matchTypeFilter>
-				<updateMatchTypeAnnotation>
-			 		<setFeature name="language" type="String">$0</setFeature>
-			 	</updateMatchTypeAnnotation>	
-			 	<ruleExceptions>	
-	  				<exception matchType="uima.tcas.DocumentAnnotation">Exception</exception>
-  				</ruleExceptions>
-
-			</rule>
-      		<rule regEx="TestRegexSubrule1" matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation" confidence="0.7">
-				<matchTypeFilter>
-					<feature name="language">en</feature>
-				</matchTypeFilter>
-			</rule>
-			<rule regEx="TestRegexSubrule2" matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation" confidence="0.3">
-				<matchTypeFilter>
-					<feature name="language">en</feature>
-				</matchTypeFilter>
-				
-			</rule>		
+	   		<rule regEx="([a-zA-Z0-9!#$%*+'/=?^_-`{|}~.\x26]+)@([a-zA-Z0-9._-]+[a-zA-Z]{2,4})" matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"/>		
 	    </rules>
 	    <createAnnotations>	
-			<annotation id="testannot" type="org.apache.uima.TestAnnot">
-				<begin group="0"/>
-				<end group="0"/>
-				<setFeature name="testFeature" type="String">$0</setFeature>		
-				<setFeature name="testFeature1" type="Reference">testannot1</setFeature>
-				<setFeature name="confidenceValue" type="Confidence"/>
-				<setFeature name="ruleId" type="RuleId"/>
-		 	</annotation>
-		 			 	
-		 	<annotation id="testannot1" type="org.apache.uima.TestAnnot1">
-				<begin group="0"/>
-				<end group="0"/>
-		 	</annotation>
-	 	</createAnnotations>
-    </concept>
-    
-    <concept name="simpleConcept">
-    	<rules>
-			<rule regEx="RestRegex" matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"/>
- 	    </rules>
-	    <createAnnotations>
-		 	<annotation id="testannotation" type="org.apache.uima.TestAnnot1">
+			<annotation id="emailAnnot" type="org.apache.uima.EmailAddress">
 				<begin group="0"/>
 				<end group="0"/>
+				<setFeature name="localPart" type="String">$1</setFeature>		
+				<setFeature name="domainPart" type="String">$2</setFeature>
 		 	</annotation>
 	 	</createAnnotations>
     </concept>
-    
+        
 </conceptSet>