You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2012/01/12 17:32:48 UTC
svn commit: r1230625 - in /uima/sandbox/trunk/TextMarker:
uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/
uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/
uimaj-ep-textmarker-engine/src/main/java...
Author: pkluegl
Date: Thu Jan 12 16:32:47 2012
New Revision: 1230625
URL: http://svn.apache.org/viewvc?rev=1230625&view=rev
Log:
UIMA-2330
Added greedy filtering behavior for rule inference. Added unit tests for basic filtering functionality.
Added:
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt
Modified:
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java
Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java?rev=1230625&r1=1230624&r2=1230625&view=diff
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java (original)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java Thu Jan 12 16:32:47 2012
@@ -64,7 +64,7 @@ public class BasicTypeConstraint impleme
TextMarkerBasic tmb = (TextMarkerBasic) fs;
if (types != null) {
for (Type each : types) {
- result |= tmb.beginsWith(each) && tmb.endsWith(each);
+ result |= tmb.isPartOf(each)|| tmb.beginsWith(each) || tmb.endsWith(each);
if (result)
break;
}
Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java?rev=1230625&r1=1230624&r2=1230625&view=diff
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java (original)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java Thu Jan 12 16:32:47 2012
@@ -26,7 +26,6 @@ import java.util.List;
import java.util.TreeSet;
import org.apache.uima.cas.ConstraintFactory;
-import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.FSMatchConstraint;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
@@ -51,7 +50,8 @@ public class TextMarkerTypeMatcher imple
public Collection<AnnotationFS> getMatchingAnnotations(TextMarkerStream stream,
TextMarkerBlock parent) {
-
+ // TODO what about the matching direction?
+ // TODO this comparator can ignore some annotations?! same offset same type
Collection<AnnotationFS> result = new TreeSet<AnnotationFS>(comparator);
List<Type> types = getTypes(parent, stream);
for (Type type : types) {
@@ -64,19 +64,18 @@ public class TextMarkerTypeMatcher imple
result.add(stream.getDocumentAnnotation());
} else {
- FSIterator<AnnotationFS> iterator = stream.getFilter().createFilteredIterator(
- stream.getCas(), type);
-
- // AnnotationIndex<AnnotationFS> annotationIndex = stream.getCas().getAnnotationIndex(type);
- // stream.getCas().createFilteredIterator(annotationIndex.iterator(),
- // stream.getFilter().createFilteredIterator(null, stream, type));
- // FSMatchConstraint anchorConstraint = createAnchorConstraints(parent, stream);
- // FSIterator<AnnotationFS> iterator = stream.getFilteredBasicIterator(anchorConstraint);
- // iterator.moveToFirst();
- while (iterator.isValid()) {
- AnnotationFS annotation = iterator.get();
- result.add(annotation);
- iterator.moveToNext();
+ stream.moveToFirst();
+ while (stream.isValid()) {
+ TextMarkerBasic nextBasic = (TextMarkerBasic) stream.get();
+ List<Type> allTypes = stream.getCas().getTypeSystem().getProperlySubsumedTypes(type);
+ allTypes.add(type);
+ for (Type eachType : allTypes) {
+ Collection<AnnotationFS> beginAnchors = nextBasic.getBeginAnchors(eachType);
+ if (beginAnchors != null) {
+ result.addAll(beginAnchors);
+ }
+ }
+ stream.moveToNext();
}
}
}
@@ -91,12 +90,10 @@ public class TextMarkerTypeMatcher imple
stream.moveToNext();
if (stream.isValid()) {
TextMarkerBasic nextBasic = (TextMarkerBasic) stream.get();
- // TODO also child types!
List<Type> reTypes = ruleElement.getMatcher().getTypes(parent, stream);
Collection<AnnotationFS> anchors = new TreeSet<AnnotationFS>(new AnnotationComparator());
for (Type eachMatchType : reTypes) {
-
List<Type> types = stream.getCas().getTypeSystem().getProperlySubsumedTypes(eachMatchType);
types.add(eachMatchType);
for (Type eachType : types) {
Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java?rev=1230625&r1=1230624&r2=1230625&view=diff
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java (original)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java Thu Jan 12 16:32:47 2012
@@ -47,7 +47,7 @@ public class PlusGreedy implements RuleE
}
for (RuleElementMatch match : matches) {
allEmpty &= match.getTextsMatched().isEmpty();
- result &= match.getTextsMatched().isEmpty() || match.matched();
+ result &= match.matched();
}
if (!result && matches.size() > 1) {
matches.remove(matches.size() - 1);
Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java?rev=1230625&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java Thu Jan 12 16:32:47 2012
@@ -0,0 +1,12 @@
+package org.apache.uima.textmarker;
+
+import org.junit.runner.RunWith;
+import org.junit.runners.Suite;
+import org.junit.runners.Suite.SuiteClasses;
+
+@RunWith(Suite.class)
+@SuiteClasses({ DynamicAnchoringTest.class, DynamicAnchoringTest2.class, FilteringTest.class,
+ QuantifierTest1.class, QuantifierTest2.class, RuleInferenceTest.class })
+public class AllTests {
+
+}
Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java?rev=1230625&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java Thu Jan 12 16:32:47 2012
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.textmarker;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.junit.Test;
+
+public class FilteringTest {
+
+ @Test
+ public void test() {
+ String name = this.getClass().getSimpleName();
+ String namespace = this.getClass().getPackage().getName().replaceAll("\\.", "/");
+ CAS cas = null;
+ try {
+ cas = TextMarkerTestUtils.process(namespace + "/" + name + ".tm", namespace + "/" + name
+ + ".txt", 50);
+ } catch (Exception e) {
+ e.printStackTrace();
+ assert (false);
+ }
+ Type t = null;
+ AnnotationIndex<AnnotationFS> ai = null;
+ FSIterator<AnnotationFS> iterator = null;
+
+ t = TextMarkerTestUtils.getTestType(cas, 1);
+ ai = cas.getAnnotationIndex(t);
+ assertEquals(0, ai.size());
+
+ t = TextMarkerTestUtils.getTestType(cas, 2);
+ ai = cas.getAnnotationIndex(t);
+ iterator = ai.iterator();
+ assertEquals(2, ai.size());
+ assertEquals("Peter, Jochen", iterator.next().getCoveredText());
+ assertEquals("Jochen, Flo", iterator.next().getCoveredText());
+
+ t = TextMarkerTestUtils.getTestType(cas, 3);
+ ai = cas.getAnnotationIndex(t);
+ iterator = ai.iterator();
+ assertEquals(0, ai.size());
+
+ t = TextMarkerTestUtils.getTestType(cas, 4);
+ ai = cas.getAnnotationIndex(t);
+ iterator = ai.iterator();
+ assertEquals(2, ai.size());
+ assertEquals(", Jochen, ", iterator.next().getCoveredText());
+ assertEquals(", ", iterator.next().getCoveredText());
+
+ t = TextMarkerTestUtils.getTestType(cas, 5);
+ ai = cas.getAnnotationIndex(t);
+ iterator = ai.iterator();
+ assertEquals(4, ai.size());
+ assertEquals("Peter, Jochen, Flo", iterator.next().getCoveredText());
+ assertEquals("Flo und", iterator.next().getCoveredText());
+ assertEquals("und Georg", iterator.next().getCoveredText());
+ assertEquals("Georg.", iterator.next().getCoveredText());
+
+
+ t = TextMarkerTestUtils.getTestType(cas, 15);
+ ai = cas.getAnnotationIndex(t);
+ iterator = ai.iterator();
+ assertEquals(1, ai.size());
+ assertEquals("Peter, Jochen", iterator.next().getCoveredText());
+
+ t = TextMarkerTestUtils.getTestType(cas, 16);
+ ai = cas.getAnnotationIndex(t);
+ iterator = ai.iterator();
+ assertEquals(1, ai.size());
+ assertEquals("Georg.", iterator.next().getCoveredText());
+
+ t = TextMarkerTestUtils.getTestType(cas, 17);
+ ai = cas.getAnnotationIndex(t);
+ iterator = ai.iterator();
+ assertEquals(1, ai.size());
+ assertEquals("Flo und Georg.", iterator.next().getCoveredText());
+
+ if (cas != null) {
+ cas.release();
+ }
+
+ }
+}
Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm?rev=1230625&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm Thu Jan 12 16:32:47 2012
@@ -0,0 +1,28 @@
+PACKAGE org.apache.uima;
+
+DECLARE T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15,T16,T17,T18,T19,T20,T21,T22,T23,T24,T25;
+
+
+Document{-> RETAINTYPE(SPACE)};
+
+W W{-> MARK(T1)};
+
+W PM SPACE W{-> MARK(T2,1,2,3,4)};
+
+Document{-> FILTERTYPE(W)};
+
+W{-> MARK(T3)};
+
+(PM SPACE)+{-> MARK(T4)};
+
+Document{-> RETAINTYPE(W)};
+
+Document{-> FILTERTYPE(T4)};
+
+W ANY{-> MARK(T5,1,2)};
+
+Document{-> FILTERTYPE, RETAINTYPE};
+
+(((CW PM) CW){-> MARK(T15)} COMMA)
+ (CW SW (CW PERIOD){-> MARK(T16)})
+ {-> MARK(T17)};
\ No newline at end of file
Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt?rev=1230625&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt (added)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt Thu Jan 12 16:32:47 2012
@@ -0,0 +1 @@
+Peter, Jochen, Flo und Georg.
Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java?rev=1230625&r1=1230624&r2=1230625&view=diff
==============================================================================
--- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java (original)
+++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java Thu Jan 12 16:32:47 2012
@@ -28,6 +28,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Stack;
+import java.util.TreeSet;
import org.apache.uima.UIMAFramework;
import org.apache.uima.resource.ResourceManager;
@@ -485,7 +486,7 @@ public class TextMarkerTypeChecker imple
System.err.println("ERROR: Failed to get BasicTypeSystem!! " + this.toString());
}
if (types == null) {
- types = new HashSet<String>();
+ types = new TreeSet<String>();
}
// traverse:
@@ -511,7 +512,7 @@ public class TextMarkerTypeChecker imple
}
public Set<String> getShortTypeNames(Set<String> types) {
- Set<String> result = new HashSet<String>();
+ Set<String> result = new TreeSet<String>();
for (String string : types) {
String[] nameSpace = string.split("[.]");
result.add(nameSpace[nameSpace.length - 1]);
@@ -539,7 +540,7 @@ public class TextMarkerTypeChecker imple
fileExtended += "/";
}
fileExtended = fileExtended.substring(0, fileExtended.length() - 1) + ".xml";
- Set<String> types = new HashSet<String>();
+ Set<String> types = new TreeSet<String>();
for (IFolder folder : folders) {
types.addAll(getTypes(folder, fileExtended));
}