You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by tw...@apache.org on 2009/01/14 17:55:39 UTC
svn commit: r734444 - in
/incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src:
main/java/org/apache/uima/annotator/regex/impl/
test/java/org/apache/uima/annotator/regex/ test/resources/emptyRegex/
Author: twgoetz
Date: Wed Jan 14 08:55:21 2009
New Revision: 734444
URL: http://svn.apache.org/viewvc?rev=734444&view=rev
Log:
Jira UIMA-1264: fix infinite loop in regex annotator when matching empty string on match-all.
https://issues.apache.org/jira/browse/UIMA-1264
Added:
incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/java/org/apache/uima/annotator/regex/TestEmptyRegex.java
incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/
incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/RegExEmptyRegex.xml
incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/emptyRegex.xml
Modified:
incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/main/java/org/apache/uima/annotator/regex/impl/RegExAnnotator.java
Modified: incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/main/java/org/apache/uima/annotator/regex/impl/RegExAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/main/java/org/apache/uima/annotator/regex/impl/RegExAnnotator.java?rev=734444&r1=734443&r2=734444&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/main/java/org/apache/uima/annotator/regex/impl/RegExAnnotator.java (original)
+++ incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/main/java/org/apache/uima/annotator/regex/impl/RegExAnnotator.java Wed Jan 14 08:55:21 2009
@@ -371,8 +371,18 @@
}
// set start match position for the next match to the
// current end match position
- pos = matcher.end();
-
+ if (matcher.end() == pos) {
+ // Special case: matched the empty string. If at the end of the input, need
+ // to break.
+ if (pos == matchValue.length()) {
+ break;
+ }
+ // Otherwise increment search pos so as not to loop.
+ ++pos;
+ } else {
+ // Default case: match was non-empty.
+ pos = matcher.end();
+ }
}
}
// MatchStrategy - MATCH_COMPLETE
Added: incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/java/org/apache/uima/annotator/regex/TestEmptyRegex.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/java/org/apache/uima/annotator/regex/TestEmptyRegex.java?rev=734444&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/java/org/apache/uima/annotator/regex/TestEmptyRegex.java (added)
+++ incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/java/org/apache/uima/annotator/regex/TestEmptyRegex.java Wed Jan 14 08:55:21 2009
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.annotator.regex;
+
+import java.io.File;
+
+import junit.framework.TestCase;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.test.junit_extension.AnnotatorTester;
+import org.apache.uima.test.junit_extension.JUnitExtension;
+
+/**
+ *
+ *
+ */
+public class TestEmptyRegex extends TestCase {
+
+ /**
+ * test the annotation creation with different begin and end positions and with different match
+ * groups
+ *
+ * @throws Exception
+ */
+ public void testEmptyRegex() throws Exception {
+
+ // create annotation tester with the regex annotator specifier
+ AnnotatorTester annotTester = new AnnotatorTester(JUnitExtension
+ .getFile("emptyRegex/RegExEmptyRegex.xml"));
+ CAS cas = annotTester.performTest("Test an empty regex.", "en");
+
+// // define result interested in
+// String[] tofs = { "org.apache.uima.TestAnnot", "org.apache.uima.TestAnnot:testFeature",
+// "org.apache.uima.TestAnnot:testFeature1", "org.apache.uima.TestAnnot1",
+// "org.apache.uima.TestAnnot1:testFeature", "org.apache.uima.TestAnnot1:testFeature1",
+// "org.apache.uima.TestAnnot2", "org.apache.uima.TestAnnot2:testFeature",
+// "org.apache.uima.TestAnnot2:testFeature1", "org.apache.uima.TestAnnot3",
+// "org.apache.uima.TestAnnot3:testFeature", "org.apache.uima.TestAnnot3:testFeature1",
+// "org.apache.uima.TestAnnot4" };
+//
+// // compare results
+// File outputFile = new File(JUnitExtension.getFile("annotationCreation"),
+// "createAnnot_testoutput.txt");
+// AnnotatorTester.checkResult(cas, tofs, JUnitExtension
+// .getFile("annotationCreation/createAnnotRef.txt"), outputFile);
+ }
+
+}
Added: incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/RegExEmptyRegex.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/RegExEmptyRegex.xml?rev=734444&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/RegExEmptyRegex.xml (added)
+++ incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/RegExEmptyRegex.xml Wed Jan 14 08:55:21 2009
@@ -0,0 +1,157 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ -->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>true</primitive>
+ <annotatorImplementationName>org.apache.uima.annotator.regex.impl.RegExAnnotator</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>RegExAnnotator</name>
+ <description>Matches regular expressions in document text.</description>
+ <configurationParameters>
+
+ <configurationParameter>
+ <name>ConceptFiles</name>
+ <description>list of concept files to configure the annotator
+ </description>
+ <type>String</type>
+ <multiValued>true</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+
+ </configurationParameters>
+
+ <configurationParameterSettings>
+
+ <nameValuePair>
+ <name>ConceptFiles</name>
+ <value>
+ <array>
+ <string>emptyRegex/emptyRegex.xml</string>
+ </array>
+ </value>
+ </nameValuePair>
+
+ </configurationParameterSettings>
+ <typeSystemDescription>
+ <types>
+ <typeDescription>
+ <name>org.apache.uima.TestAnnot</name>
+ <description/>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ <features>
+ <featureDescription>
+ <name>testFeature</name>
+ <description/>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ <featureDescription>
+ <name>testFeature1</name>
+ <description/>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ </features>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.TestAnnot1</name>
+ <description/>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ <features>
+ <featureDescription>
+ <name>testFeature</name>
+ <description/>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ <featureDescription>
+ <name>testFeature1</name>
+ <description/>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ </features>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.TestAnnot2</name>
+ <description/>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ <features>
+ <featureDescription>
+ <name>testFeature</name>
+ <description/>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ <featureDescription>
+ <name>testFeature1</name>
+ <description/>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ </features>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.TestAnnot3</name>
+ <description/>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ <features>
+ <featureDescription>
+ <name>testFeature</name>
+ <description/>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ <featureDescription>
+ <name>testFeature1</name>
+ <description/>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ </features>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.TestAnnot4</name>
+ <description/>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ <features>
+ </features>
+ </typeDescription>
+
+ </types>
+ </typeSystemDescription>
+
+ <typePriorities>
+ <priorityList>
+ <type>org.apache.uima.TestAnnot</type>
+ <type>org.apache.uima.TestAnnot1</type>
+ <type>org.apache.uima.TestAnnot2</type>
+ <type>org.apache.uima.TestAnnot3</type>
+ <type>org.apache.uima.TestAnnot4</type>
+ </priorityList>
+ </typePriorities>
+
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs/>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+
+</analysisEngineDescription>
Added: incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/emptyRegex.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/emptyRegex.xml?rev=734444&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/emptyRegex.xml (added)
+++ incubator/uima/sandbox/trunk/RegularExpressionAnnotator/src/test/resources/emptyRegex/emptyRegex.xml Wed Jan 14 08:55:21 2009
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+ <!--
+ * Licensed to the Apache Software Foundation (ASF) under one * or more
+ contributor license agreements. See the NOTICE file * distributed with
+ this work for additional information * regarding copyright ownership.
+ The ASF licenses this file * to you under the Apache License, Version
+ 2.0 (the * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at * *
+ http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by
+ applicable law or agreed to in writing, * software distributed under
+ the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES
+ OR CONDITIONS OF ANY * KIND, either express or implied. See the
+ License for the * specific language governing permissions and
+ limitations * under the License.
+ -->
+<conceptSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns="http://incubator.apache.org/uima/regex">
+ <concept name="annotationCreationTest">
+ <rules>
+ <rule ruleId="TestID" regEx="a*" matchStrategy="matchAll"
+ matchType="uima.tcas.DocumentAnnotation" />
+ </rules>
+ <createAnnotations>
+ <annotation id="Test1" type="org.apache.uima.TestAnnot">
+ <begin group="0" />
+ <end group="0" />
+ </annotation>
+ </createAnnotations>
+ </concept>
+</conceptSet>
\ No newline at end of file