You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2016/07/19 08:44:28 UTC
svn commit: r1753352 - in /uima/ruta/trunk/ruta-core/src:
main/java/org/apache/uima/ruta/RutaStream.java
main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java
test/java/org/apache/uima/ruta/WildCard2Test.java
Author: pkluegl
Date: Tue Jul 19 08:44:28 2016
New Revision: 1753352
URL: http://svn.apache.org/viewvc?rev=1753352&view=rev
Log:
UIMA-5013
- added dummy annotations in order to use continue instead of start
- added test
Modified:
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/WildCard2Test.java
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java?rev=1753352&r1=1753351&r2=1753352&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java Tue Jul 19 08:44:28 2016
@@ -76,6 +76,7 @@ import org.apache.uima.ruta.rule.MatchCo
import org.apache.uima.ruta.rule.RuleElement;
import org.apache.uima.ruta.type.RutaAnnotation;
import org.apache.uima.ruta.type.RutaBasic;
+import org.apache.uima.ruta.type.RutaOptional;
import org.apache.uima.ruta.utils.RutaListUtils;
import org.apache.uima.ruta.utils.UIMAUtils;
import org.apache.uima.ruta.visitor.InferenceCrowd;
@@ -118,6 +119,11 @@ public class RutaStream extends FSIterat
private boolean onlyOnce = false;
+ private Annotation documentBeginAnchor;
+
+ private Annotation documentEndAnchor;
+
+
public RutaStream(CAS cas, Type basicType, FilterManager filter, boolean lowMemoryProfile,
boolean simpleGreedyForComposed, InferenceCrowd crowd) {
super();
@@ -134,6 +140,8 @@ public class RutaStream extends FSIterat
documentAnnotation = cas.getDocumentAnnotation();
documentAnnotationType = getCas().getDocumentAnnotation().getType();
basicIt.moveToFirst();
+ documentBeginAnchor = new RutaOptional(getJCas(), 0, 0);
+ documentEndAnchor = new RutaOptional(getJCas(), documentAnnotation.getEnd(), documentAnnotation.getEnd());
} else {
documentAnnotation = additionalWindow;
documentAnnotationType = filter.getWindowType();
@@ -1226,4 +1234,23 @@ public class RutaStream extends FSIterat
changeOffsets(annotation, annotation.getBegin(), end, modifikator);
}
+ public AnnotationFS getVeryFirstBeforeWindow(boolean direction) {
+ if(direction) {
+ RutaBasic firstBasicOfAll = getFirstBasicOfAll();
+ int begin = firstBasicOfAll.getBegin();
+ if(begin == 0) {
+ return documentBeginAnchor;
+ } else {
+ return getEndAnchor(begin);
+ }
+ } else {
+ RutaBasic lastBasicOfAll = getLastBasicOfAll();
+ int end = lastBasicOfAll.getEnd();
+ if(end == cas.getDocumentAnnotation().getEnd()) {
+ return documentEndAnchor;
+ } else {
+ return getBeginAnchor(end);
+ }
+ }
+ }
}
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java?rev=1753352&r1=1753351&r2=1753352&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java Tue Jul 19 08:44:28 2016
@@ -124,7 +124,8 @@ public class WildCardRuleElement extends
List<RuleMatch> result = new ArrayList<RuleMatch>();
AnnotationFS nextOne = annotation;
boolean doneHere = false;
- while (!doneHere && (nextOne = getNextPositionForComposed(cre, after, nextOne, stream)) != null) {
+ while (!doneHere
+ && (nextOne = getNextPositionForComposed(cre, after, nextOne, stream)) != null) {
int pointer = after ? nextOne.getBegin() : nextOne.getEnd();
RutaBasic anchor = stream.getAnchor(!after, pointer);
ComposedRuleElementMatch extendedContainerMatch = containerMatch.copy();
@@ -310,8 +311,8 @@ public class WildCardRuleElement extends
ComposedRuleElementMatch nextContainerMatch = getContainerMatchOfNextElement(
containerMatch, nextDepth);
if (coveredByWildCard == null) {
- result = nextElement.startMatch(ruleMatch, ruleApply, nextContainerMatch,
- nextElement, stream, crowd);
+ result = nextElement.startMatch(ruleMatch, ruleApply, nextContainerMatch, nextElement,
+ stream, crowd);
} else {
// TODO match and containermatch should be on the correct level!
result = nextElement.continueMatch(after, coveredByWildCard, ruleMatch, ruleApply,
@@ -336,7 +337,7 @@ public class WildCardRuleElement extends
while (!doneHere && iterator.isValid() && stream.isVisible(iterator.get())) {
AnnotationFS nextOne = iterator.get();
int pointer = after ? nextOne.getBegin() : nextOne.getEnd();
- RutaBasic anchor = stream.getAnchor(!after, pointer);
+ AnnotationFS anchor = stream.getAnchor(!after, pointer);
ComposedRuleElementMatch extendedContainerMatch = containerMatch.copy();
RuleMatch extendedMatch = ruleMatch.copy(extendedContainerMatch, after);
@@ -348,15 +349,14 @@ public class WildCardRuleElement extends
ComposedRuleElementMatch nextContainerMatch = getContainerMatchOfNextElement(
extendedContainerMatch, nextDepth);
if (anchor == null) {
- result = nextElement.startMatch(extendedMatch, ruleApply, nextContainerMatch,
- nextElement, stream, crowd);
- } else {
- // TODO match and containermatch should be on the correct level!
- result = nextElement.continueMatch(after, anchor, extendedMatch, ruleApply,
- nextContainerMatch, sideStepOrigin, nextElement, stream, crowd);
+ anchor = stream.getVeryFirstBeforeWindow(after);
}
+ // TODO match and containermatch should be on the correct level!
+ result = nextElement.continueMatch(after, anchor, extendedMatch, ruleApply,
+ nextContainerMatch, sideStepOrigin, nextElement, stream, crowd);
List<RuleElementMatch> nextList = nextContainerMatch.getInnerMatches().get(nextElement);
- if (nextList == null || nextList.isEmpty() || !nextList.get(nextList.size() - 1).matched()) {
+ if (nextList == null || nextList.isEmpty()
+ || !nextList.get(nextList.size() - 1).matched()) {
moveOn(after, iterator, stream);
} else {
doneHere = true;
@@ -456,8 +456,8 @@ public class WildCardRuleElement extends
} catch (CASException e) {
e.printStackTrace();
}
- RutaFrame window = new RutaFrame(jcas, stream.getDocumentAnnotation().getBegin(), stream
- .getDocumentAnnotation().getEnd());
+ RutaFrame window = new RutaFrame(jcas, stream.getDocumentAnnotation().getBegin(),
+ stream.getDocumentAnnotation().getEnd());
if (annotation == null) {
result = cas.getAnnotationIndex(type).subiterator(window);
} else {
@@ -511,8 +511,8 @@ public class WildCardRuleElement extends
while (!doneHere && (indexOf = document.indexOf(stringValue, pointer)) < document.length()) {
if (indexOf < 0) {
// can't match, the next next element will see it.
- ComposedRuleElementMatch nextContainerMatch = getContainerMatchOfNextElement(
- containerMatch, nextDepth);
+ ComposedRuleElementMatch nextContainerMatch = getContainerMatchOfNextElement(containerMatch,
+ nextDepth);
nextElement.continueMatch(after, annotation, ruleMatch, ruleApply, nextContainerMatch,
sideStepOrigin, null, stream, crowd);
doneHere = true;
@@ -529,8 +529,8 @@ public class WildCardRuleElement extends
ComposedRuleElementMatch nextContainerMatch = getContainerMatchOfNextElement(
extendedContainerMatch, nextDepth);
if (endAnchor == null) {
- result = nextElement.startMatch(extendedMatch, ruleApply, nextContainerMatch,
- nextElement, stream, crowd);
+ result = nextElement.startMatch(extendedMatch, ruleApply, nextContainerMatch, nextElement,
+ stream, crowd);
} else {
result = nextElement.continueMatch(after, endAnchor, extendedMatch, ruleApply,
nextContainerMatch, sideStepOrigin, nextElement, stream, crowd);
Modified: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/WildCard2Test.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/WildCard2Test.java?rev=1753352&r1=1753351&r2=1753352&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/WildCard2Test.java (original)
+++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/WildCard2Test.java Tue Jul 19 08:44:28 2016
@@ -26,60 +26,57 @@ import org.junit.Test;
public class WildCard2Test {
- @Test
- public void test() {
- String document =
- "Ogren, P.V., Wetzler, P.G., Bethard, S.: ClearTK: A UIMA Toolkit for Statistical Natural Language Processing. In: UIMA for NLP workshop at LREC 08. (2008)";
- document += "\n";
- document +=
- "Stephen Soderland, Claire Cardie, and Raymond Mooney. Learning Information Extraction Rules for Semi-Structured and Free Text. In Machine Learning, volume 34, pages 233–272, 1999.";
- String script = "";
- script += "RETAINTYPE(BREAK, SPACE);\n";
- script += "#{-> T6} BREAK #{-> T6};\n";
- script += "T6{-> TRIM(BREAK, SPACE)};\n";
- script += "CW{REGEXP(\".\")} PERIOD{->T7};\n";
- script += "RETAINTYPE;\n";
- script += "BLOCK(forEach) T6 {}{\n";
- script += "(# COLON){-> T1} (# PERIOD){-> T2} # \"(\" NUM{REGEXP(\"....\")-> T3} \")\";\n";
- script +=
- "(#{-CONTAINS(COLON)} PERIOD{-PARTOF(T7)}){-> T1} (# PERIOD){-> T2} # NUM{REGEXP(\"....\")-> T3};\n";
- script += "}\n";
-
- CAS cas = null;
- try {
- cas = RutaTestUtils.getCAS(document);
- Ruta.apply(cas, script);
- } catch (Exception e) {
- e.printStackTrace();
- }
-
- RutaTestUtils.assertAnnotationsEquals(cas, 1, 2, "Ogren, P.V., Wetzler, P.G., Bethard, S.:",
- "Stephen Soderland, Claire Cardie, and Raymond Mooney.");
- RutaTestUtils.assertAnnotationsEquals(cas, 2, 2,
- "ClearTK: A UIMA Toolkit for Statistical Natural Language Processing.",
- "Learning Information Extraction Rules for Semi-Structured and Free Text.");
- RutaTestUtils.assertAnnotationsEquals(cas, 3, 2, "2008", "1999");
-
- cas.release();
- }
-
- @Test
- public void testOptional() {
- String document = "Cw 1 2 3";
- String script = "(CW #){-> T1} SW?;";
-
- CAS cas = null;
- try {
- cas = RutaTestUtils.getCAS(document);
- Ruta.apply(cas, script);
- } catch (Exception e) {
- e.printStackTrace();
- }
-
- RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "Cw 1 2 3");
-
- cas.release();
- }
+ @Test
+ public void test() {
+ String document = "Ogren, P.V., Wetzler, P.G., Bethard, S.: ClearTK: A UIMA Toolkit for Statistical Natural Language Processing. In: UIMA for NLP workshop at LREC 08. (2008)";
+ document += "\n";
+ document += "Stephen Soderland, Claire Cardie, and Raymond Mooney. Learning Information Extraction Rules for Semi-Structured and Free Text. In Machine Learning, volume 34, pages 233–272, 1999.";
+ String script = "";
+ script += "RETAINTYPE(BREAK, SPACE);\n";
+ script += "#{-> T6} BREAK #{-> T6};\n";
+ script += "T6{-> TRIM(BREAK, SPACE)};\n";
+ script += "CW{REGEXP(\".\")} PERIOD{->T7};\n";
+ script += "RETAINTYPE;\n";
+ script += "BLOCK(forEach) T6 {}{\n";
+ script += "(# COLON){-> T1} (# PERIOD){-> T2} # \"(\" NUM{REGEXP(\"....\")-> T3} \")\";\n";
+ script += "(#{-CONTAINS(COLON)} PERIOD{-PARTOF(T7)}){-> T1} (# PERIOD){-> T2} # NUM{REGEXP(\"....\")-> T3};\n";
+ script += "}\n";
+
+ CAS cas = null;
+ try {
+ cas = RutaTestUtils.getCAS(document);
+ Ruta.apply(cas, script);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 2, "Ogren, P.V., Wetzler, P.G., Bethard, S.:",
+ "Stephen Soderland, Claire Cardie, and Raymond Mooney.");
+ RutaTestUtils.assertAnnotationsEquals(cas, 2, 2,
+ "ClearTK: A UIMA Toolkit for Statistical Natural Language Processing.",
+ "Learning Information Extraction Rules for Semi-Structured and Free Text.");
+ RutaTestUtils.assertAnnotationsEquals(cas, 3, 2, "2008", "1999");
+
+ cas.release();
+ }
+
+ @Test
+ public void testOptional() {
+ String document = "Cw 1 2 3";
+ String script = "(CW #){-> T1} SW?;";
+
+ CAS cas = null;
+ try {
+ cas = RutaTestUtils.getCAS(document);
+ Ruta.apply(cas, script);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "Cw 1 2 3");
+
+ cas.release();
+ }
@Test
public void testLookaheadInGreedy() {
@@ -102,5 +99,21 @@ public class WildCard2Test {
cas.release();
}
+
+ @Test
+ public void testMatchAtDocumentBegin() throws Exception{
+ String document = "First test. Some test. Last test.";
+ String script = "# CW{-> T1};";
+ script += "CW{-> T2};";
+ script += "T1 # (T2 #){->T3};";
+ script += "T3->{# CW{->T4};};";
+ CAS cas = RutaTestUtils.getCAS(document);
+ Ruta.apply(cas, script);
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "First");
+ RutaTestUtils.assertAnnotationsEquals(cas, 2, 3, "First", "Some", "Last");
+ RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "Some test. Last test.");
+ RutaTestUtils.assertAnnotationsEquals(cas, 4, 1, "Some");
+ cas.release();
+ }
}