You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2014/11/04 13:32:11 UTC

svn commit: r1636562 - in /uima/ruta/trunk/ruta-core/src: main/java/org/apache/uima/ruta/engine/ main/resources/org/apache/uima/ruta/engine/ test/java/org/apache/uima/ruta/engine/ test/resources/org/apache/uima/ruta/engine/

Author: pkluegl
Date: Tue Nov  4 12:32:11 2014
New Revision: 1636562

URL: http://svn.apache.org/r1636562
Log:
UIMA-4085
- fixed and added test

Added:
    uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java   (with props)
    uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt   (with props)
Modified:
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java
    uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java?rev=1636562&r1=1636561&r2=1636562&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java Tue Nov  4 12:32:11 2014
@@ -36,6 +36,8 @@ public class PlainTextAnnotator extends 
   public static final String TYPE_LINE = "org.apache.uima.ruta.type.Line";
 
   public static final String TYPE_WSLINE = "org.apache.uima.ruta.type.WSLine";
+  
+  public static final String TYPE_EMPTYLINE = "org.apache.uima.ruta.type.EmptyLine";
 
   public static final String TYPE_PARAGRAPH = "org.apache.uima.ruta.type.Paragraph";
 
@@ -46,6 +48,7 @@ public class PlainTextAnnotator extends 
     BufferedReader br = new BufferedReader(new StringReader(documentText));
     Type lineType = cas.getTypeSystem().getType(TYPE_LINE);
     Type wsLineType = cas.getTypeSystem().getType(TYPE_WSLINE);
+    Type emptyLineType = cas.getTypeSystem().getType(TYPE_EMPTYLINE);
     Type paragraphType = cas.getTypeSystem().getType(TYPE_PARAGRAPH);
 
     int offsetTillNow = 0;
@@ -70,7 +73,13 @@ public class PlainTextAnnotator extends 
           paragraphBegin = offsetTillNow;
         }
 
-        if (wsLine && !emptyLine) {
+        if (wsLine && emptyLine) {
+          // do not create annotation with length 0
+          // instead append the line break to the annotation
+          AnnotationFS newEmptyLineFS = cas.createAnnotation(emptyLineType, offsetTillNow, offsetTillNow
+                  + nlLength);
+          cas.addFsToIndexes(newEmptyLineFS);
+        } else if (wsLine && !emptyLine) {
           AnnotationFS newWSLineFS = cas.createAnnotation(wsLineType, offsetTillNow, offsetTillNow
                   + eachLine.length());
           cas.addFsToIndexes(newWSLineFS);
@@ -89,6 +98,10 @@ public class PlainTextAnnotator extends 
           AnnotationFS newParaFS = cas.createAnnotation(paragraphType, paragraphBegin,
                   offsetAfterLine);
           cas.addFsToIndexes(newParaFS);
+        } else if (offsetAfterLine == documentText.length()) {
+          AnnotationFS newParaFS = cas.createAnnotation(paragraphType, paragraphBegin,
+                  offsetAfterLine);
+          cas.addFsToIndexes(newParaFS);
         }
         if (wsLine) {
           lastWasEmpty = true;

Modified: uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml?rev=1636562&r1=1636561&r2=1636562&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml (original)
+++ uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml Tue Nov  4 12:32:11 2014
@@ -1,4 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
+
 <!--
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
@@ -17,7 +18,6 @@
   specific language governing permissions and limitations
   under the License.
 -->
-
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>PlainTextTypeSystem</name>
   <description/>
@@ -37,6 +37,11 @@
     <typeDescription>
       <name>org.apache.uima.ruta.type.WSLine</name>
       <description/>
+      <supertypeName>org.apache.uima.ruta.type.EmptyLine</supertypeName>
+    </typeDescription>
+    <typeDescription>
+      <name>org.apache.uima.ruta.type.EmptyLine</name>
+      <description/>
       <supertypeName>org.apache.uima.ruta.type.AnyLine</supertypeName>
     </typeDescription>
     <typeDescription>

Added: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java?rev=1636562&view=auto
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java (added)
+++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java Tue Nov  4 12:32:11 2014
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ruta.engine;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.net.URL;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.resource.ResourceSpecifier;
+import org.apache.uima.util.FileUtils;
+import org.apache.uima.util.XMLInputSource;
+import org.junit.Test;
+
+public class PlainTextAnnotatorTest {
+
+  @Test
+  public void test() throws Exception {
+    String namespace = this.getClass().getPackage().getName().replaceAll("\\.", "/");
+    String name = namespace + "/" + "PlainTextAnnotatorTest.txt";
+    URL textURL = PlainTextAnnotatorTest.class.getClassLoader().getResource(name);
+    File textFile = new File(textURL.toURI());
+    String text = FileUtils.file2String(textFile, "UTF-8");
+    URL url = PlainTextAnnotator.class.getClassLoader().getResource("PlainTextAnnotator.xml");
+    if (url == null) {
+      url = HtmlAnnotator.class.getClassLoader().getResource(
+              "org/apache/uima/ruta/engine/PlainTextAnnotator.xml");
+    }
+    XMLInputSource in = new XMLInputSource(url);
+    ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
+    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
+    CAS cas = ae.newCAS();
+    AnnotationIndex<AnnotationFS> ai = null;
+
+    cas.setDocumentText(text);
+    ae.process(cas);
+    
+    ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.AnyLine"));
+    assertEquals(18, ai.size());
+
+    ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.Line"));
+    assertEquals(10, ai.size());
+
+    ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.EmptyLine"));
+    assertEquals(8, ai.size());
+
+    ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.WSLine"));
+    assertEquals(4, ai.size());
+
+    ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.Paragraph"));
+    assertEquals(4, ai.size());
+
+    ae.destroy();
+    cas.release();
+  }
+}

Propchange: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt?rev=1636562&view=auto
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt (added)
+++ uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt Tue Nov  4 12:32:11 2014
@@ -0,0 +1,18 @@
+1 some text
+2 some text
+3 some text
+
+ 
+  
+  
+8 some text
+9 some text
+10 some text
+ 
+
+13 some text
+14 some text
+15 some text
+
+
+18 end
\ No newline at end of file

Propchange: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt
------------------------------------------------------------------------------
    svn:eol-style = native