You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2019/04/11 10:09:21 UTC
svn commit: r1857307 - in /uima/ruta/trunk/ruta-core/src:
main/jflex/org/apache/uima/ruta/seed/SeedLexer.flex
test/java/org/apache/uima/ruta/seed/DefaultSeederTest.java
Author: pkluegl
Date: Thu Apr 11 10:09:21 2019
New Revision: 1857307
URL: http://svn.apache.org/viewvc?rev=1857307&view=rev
Log:
UIMA-5994: improve fallback default for SPECIAL, added LS to BREAK
Modified:
uima/ruta/trunk/ruta-core/src/main/jflex/org/apache/uima/ruta/seed/SeedLexer.flex
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/seed/DefaultSeederTest.java
Modified: uima/ruta/trunk/ruta-core/src/main/jflex/org/apache/uima/ruta/seed/SeedLexer.flex
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/jflex/org/apache/uima/ruta/seed/SeedLexer.flex?rev=1857307&r1=1857306&r2=1857307&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/jflex/org/apache/uima/ruta/seed/SeedLexer.flex (original)
+++ uima/ruta/trunk/ruta-core/src/main/jflex/org/apache/uima/ruta/seed/SeedLexer.flex Thu Apr 11 10:09:21 2019
@@ -60,7 +60,7 @@ import org.apache.uima.ruta.type.SW;
ALPHA=[A-Za-z]
DIGIT=[0-9]
-BREAK=[\n\r\b\012\u000b]
+BREAK=[\n\r\b\012\u000b\u2028]
SPACE=[ \t]
%%
@@ -206,9 +206,18 @@ SPACE=[ \t]
return t;
}
- <<EOF>> {
+ <<EOF>> {
return null;
}
+ [^] {
+ SPECIAL t = new SPECIAL(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+
+ return t;
+ }
+
+
}
Modified: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/seed/DefaultSeederTest.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/seed/DefaultSeederTest.java?rev=1857307&r1=1857306&r2=1857307&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/seed/DefaultSeederTest.java (original)
+++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/seed/DefaultSeederTest.java Thu Apr 11 10:09:21 2019
@@ -162,4 +162,15 @@ public class DefaultSeederTest {
RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "\u000b");
}
+ @Test
+ public void testSpecialChars() throws Exception {
+
+ String document = "Some text â¨Dr.";
+ String script = "RETAINTYPE(WS);\nBREAK{-> T1};";
+ CAS cas = RutaTestUtils.getCAS(document);
+ Ruta.apply(cas, script);
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "â¨");
+ }
+
}