You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by tw...@apache.org on 2009/01/14 17:55:39 UTC

svn commit: r734444 - in /incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src: main/java/org/apache/uima/annotator/regex/impl/ test/java/org/apache/uima/annotator/regex/ test/resources/emptyRegex/

Author: twgoetz
Date: Wed Jan 14 08:55:21 2009
New Revision: 734444

URL: http://svn.apache.org/viewvc?rev=734444&view=rev
Log:
Jira UIMA-1264: fix infinite loop in regex annotator when matching empty string on match-all.

https://issues.apache.org/jira/browse/UIMA-1264

Added:
    incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/java/org/apache/uima/annotator/regex/TestEmptyRegex.java
    incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/
    incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/RegExEmptyRegex.xml
    incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/emptyRegex.xml
Modified:
    incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/main/java/org/apache/uima/annotator/regex/impl/RegExAnnotator.java

Modified: incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/main/java/org/apache/uima/annotator/regex/impl/RegExAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/main/java/org/apache/uima/annotator/regex/impl/RegExAnnotator.java?rev=734444&r1=734443&r2=734444&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/main/java/org/apache/uima/annotator/regex/impl/RegExAnnotator.java (original)
+++ incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/main/java/org/apache/uima/annotator/regex/impl/RegExAnnotator.java Wed Jan 14 08:55:21 2009
@@ -371,8 +371,18 @@
                      }
                      // set start match position for the next match to the
                      // current end match position
-                     pos = matcher.end();
-
+                     if (matcher.end() == pos) {
+                       // Special case: matched the empty string.  If at the end of the input, need
+                       // to break.
+                       if (pos == matchValue.length()) {
+                         break;
+                       }
+                       // Otherwise increment search pos so as not to loop.
+                       ++pos;
+                     } else {
+                       // Default case: match was non-empty.
+                       pos = matcher.end();
+                     }
                   }
                }
                // MatchStrategy - MATCH_COMPLETE

Added: incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/java/org/apache/uima/annotator/regex/TestEmptyRegex.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/java/org/apache/uima/annotator/regex/TestEmptyRegex.java?rev=734444&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/java/org/apache/uima/annotator/regex/TestEmptyRegex.java (added)
+++ incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/java/org/apache/uima/annotator/regex/TestEmptyRegex.java Wed Jan 14 08:55:21 2009
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.annotator.regex;
+
+import java.io.File;
+
+import junit.framework.TestCase;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.test.junit_extension.AnnotatorTester;
+import org.apache.uima.test.junit_extension.JUnitExtension;
+
+/**
+ * 
+ * 
+ */
+public class TestEmptyRegex extends TestCase {
+
+  /**
+   * test the annotation creation with different begin and end positions and with different match
+   * groups
+   * 
+   * @throws Exception
+   */
+  public void testEmptyRegex() throws Exception {
+
+    // create annotation tester with the regex annotator specifier
+    AnnotatorTester annotTester = new AnnotatorTester(JUnitExtension
+        .getFile("emptyRegex/RegExEmptyRegex.xml"));
+    CAS cas = annotTester.performTest("Test an empty regex.", "en");
+
+//    // define result interested in
+//    String[] tofs = { "org.apache.uima.TestAnnot", "org.apache.uima.TestAnnot:testFeature",
+//        "org.apache.uima.TestAnnot:testFeature1", "org.apache.uima.TestAnnot1",
+//        "org.apache.uima.TestAnnot1:testFeature", "org.apache.uima.TestAnnot1:testFeature1",
+//        "org.apache.uima.TestAnnot2", "org.apache.uima.TestAnnot2:testFeature",
+//        "org.apache.uima.TestAnnot2:testFeature1", "org.apache.uima.TestAnnot3",
+//        "org.apache.uima.TestAnnot3:testFeature", "org.apache.uima.TestAnnot3:testFeature1",
+//        "org.apache.uima.TestAnnot4" };
+//
+//    // compare results
+//    File outputFile = new File(JUnitExtension.getFile("annotationCreation"),
+//        "createAnnot_testoutput.txt");
+//    AnnotatorTester.checkResult(cas, tofs, JUnitExtension
+//        .getFile("annotationCreation/createAnnotRef.txt"), outputFile);
+  }
+
+}

Added: incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/RegExEmptyRegex.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/RegExEmptyRegex.xml?rev=734444&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/RegExEmptyRegex.xml (added)
+++ incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/RegExEmptyRegex.xml Wed Jan 14 08:55:21 2009
@@ -0,0 +1,157 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ -->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.uima.annotator.regex.impl.RegExAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>RegExAnnotator</name>
+    <description>Matches regular expressions in document text.</description>
+    <configurationParameters>
+           	
+       <configurationParameter>
+          <name>ConceptFiles</name>
+          <description>list of concept files to configure the annotator
+          </description>
+          <type>String</type>
+          <multiValued>true</multiValued>
+          <mandatory>true</mandatory>
+       </configurationParameter>
+       
+    </configurationParameters>
+    
+    <configurationParameterSettings>
+
+       <nameValuePair>
+          <name>ConceptFiles</name>
+          <value>
+          	<array>
+          	   <string>emptyRegex/emptyRegex.xml</string> 
+          	</array>
+          </value>
+       </nameValuePair>
+    
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <types>
+        <typeDescription>
+          <name>org.apache.uima.TestAnnot</name>
+          <description/>
+          <supertypeName>uima.tcas.Annotation</supertypeName>
+          <features>
+            <featureDescription>
+              <name>testFeature</name>
+              <description/>
+              <rangeTypeName>uima.cas.String</rangeTypeName>
+            </featureDescription>
+           <featureDescription>
+              <name>testFeature1</name>
+              <description/>
+              <rangeTypeName>uima.cas.String</rangeTypeName>
+            </featureDescription>
+          </features>
+        </typeDescription>
+       <typeDescription>
+          <name>org.apache.uima.TestAnnot1</name>
+          <description/>
+          <supertypeName>uima.tcas.Annotation</supertypeName>
+          <features>
+            <featureDescription>
+              <name>testFeature</name>
+              <description/>
+              <rangeTypeName>uima.cas.String</rangeTypeName>
+            </featureDescription>
+           <featureDescription>
+              <name>testFeature1</name>
+              <description/>
+              <rangeTypeName>uima.cas.String</rangeTypeName>
+            </featureDescription>
+          </features>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.TestAnnot2</name>
+          <description/>
+          <supertypeName>uima.tcas.Annotation</supertypeName>
+          <features>
+            <featureDescription>
+              <name>testFeature</name>
+              <description/>
+              <rangeTypeName>uima.cas.String</rangeTypeName>
+            </featureDescription>
+           <featureDescription>
+              <name>testFeature1</name>
+              <description/>
+              <rangeTypeName>uima.cas.String</rangeTypeName>
+            </featureDescription>
+          </features>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.TestAnnot3</name>
+          <description/>
+          <supertypeName>uima.tcas.Annotation</supertypeName>
+          <features>
+            <featureDescription>
+              <name>testFeature</name>
+              <description/>
+              <rangeTypeName>uima.cas.String</rangeTypeName>
+            </featureDescription>
+           <featureDescription>
+              <name>testFeature1</name>
+              <description/>
+              <rangeTypeName>uima.cas.String</rangeTypeName>
+            </featureDescription>
+          </features>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.TestAnnot4</name>
+          <description/>
+          <supertypeName>uima.tcas.Annotation</supertypeName>
+          <features>
+          </features>
+        </typeDescription>
+        
+      </types>
+    </typeSystemDescription>
+        
+    <typePriorities>
+	  	<priorityList>
+    		<type>org.apache.uima.TestAnnot</type>
+    		<type>org.apache.uima.TestAnnot1</type>
+    		<type>org.apache.uima.TestAnnot2</type>
+    		<type>org.apache.uima.TestAnnot3</type>
+    		<type>org.apache.uima.TestAnnot4</type>
+  		</priorityList>
+	</typePriorities>
+    
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+
+</analysisEngineDescription>

Added: incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/emptyRegex.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/emptyRegex.xml?rev=734444&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/emptyRegex.xml (added)
+++ incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/emptyRegex.xml Wed Jan 14 08:55:21 2009
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+	<!--
+		* Licensed to the Apache Software Foundation (ASF) under one * or more
+		contributor license agreements. See the NOTICE file * distributed with
+		this work for additional information * regarding copyright ownership.
+		The ASF licenses this file * to you under the Apache License, Version
+		2.0 (the * "License"); you may not use this file except in compliance
+		* with the License. You may obtain a copy of the License at * *
+		http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by
+		applicable law or agreed to in writing, * software distributed under
+		the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES
+		OR CONDITIONS OF ANY * KIND, either express or implied. See the
+		License for the * specific language governing permissions and
+		limitations * under the License.
+	-->
+<conceptSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xmlns="http://incubator.apache.org/uima/regex">
+	<concept name="annotationCreationTest">
+		<rules>
+			<rule ruleId="TestID" regEx="a*" matchStrategy="matchAll"
+				matchType="uima.tcas.DocumentAnnotation" />
+		</rules>
+		<createAnnotations>
+			<annotation id="Test1" type="org.apache.uima.TestAnnot">
+				<begin group="0" />
+				<end group="0" />
+			</annotation>
+		</createAnnotations>
+	</concept>
+</conceptSet>
\ No newline at end of file