You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2014/11/04 13:32:11 UTC
svn commit: r1636562 - in /uima/ruta/trunk/ruta-core/src:
main/java/org/apache/uima/ruta/engine/
main/resources/org/apache/uima/ruta/engine/
test/java/org/apache/uima/ruta/engine/
test/resources/org/apache/uima/ruta/engine/
Author: pkluegl
Date: Tue Nov 4 12:32:11 2014
New Revision: 1636562
URL: http://svn.apache.org/r1636562
Log:
UIMA-4085
- fixed and added test
Added:
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java (with props)
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt (with props)
Modified:
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java?rev=1636562&r1=1636561&r2=1636562&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java Tue Nov 4 12:32:11 2014
@@ -36,6 +36,8 @@ public class PlainTextAnnotator extends
public static final String TYPE_LINE = "org.apache.uima.ruta.type.Line";
public static final String TYPE_WSLINE = "org.apache.uima.ruta.type.WSLine";
+
+ public static final String TYPE_EMPTYLINE = "org.apache.uima.ruta.type.EmptyLine";
public static final String TYPE_PARAGRAPH = "org.apache.uima.ruta.type.Paragraph";
@@ -46,6 +48,7 @@ public class PlainTextAnnotator extends
BufferedReader br = new BufferedReader(new StringReader(documentText));
Type lineType = cas.getTypeSystem().getType(TYPE_LINE);
Type wsLineType = cas.getTypeSystem().getType(TYPE_WSLINE);
+ Type emptyLineType = cas.getTypeSystem().getType(TYPE_EMPTYLINE);
Type paragraphType = cas.getTypeSystem().getType(TYPE_PARAGRAPH);
int offsetTillNow = 0;
@@ -70,7 +73,13 @@ public class PlainTextAnnotator extends
paragraphBegin = offsetTillNow;
}
- if (wsLine && !emptyLine) {
+ if (wsLine && emptyLine) {
+ // do not create annotation with length 0
+ // instead append the line break to the annotation
+ AnnotationFS newEmptyLineFS = cas.createAnnotation(emptyLineType, offsetTillNow, offsetTillNow
+ + nlLength);
+ cas.addFsToIndexes(newEmptyLineFS);
+ } else if (wsLine && !emptyLine) {
AnnotationFS newWSLineFS = cas.createAnnotation(wsLineType, offsetTillNow, offsetTillNow
+ eachLine.length());
cas.addFsToIndexes(newWSLineFS);
@@ -89,6 +98,10 @@ public class PlainTextAnnotator extends
AnnotationFS newParaFS = cas.createAnnotation(paragraphType, paragraphBegin,
offsetAfterLine);
cas.addFsToIndexes(newParaFS);
+ } else if (offsetAfterLine == documentText.length()) {
+ AnnotationFS newParaFS = cas.createAnnotation(paragraphType, paragraphBegin,
+ offsetAfterLine);
+ cas.addFsToIndexes(newParaFS);
}
if (wsLine) {
lastWasEmpty = true;
Modified: uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml?rev=1636562&r1=1636561&r2=1636562&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml (original)
+++ uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml Tue Nov 4 12:32:11 2014
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
@@ -17,7 +18,6 @@
specific language governing permissions and limitations
under the License.
-->
-
<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
<name>PlainTextTypeSystem</name>
<description/>
@@ -37,6 +37,11 @@
<typeDescription>
<name>org.apache.uima.ruta.type.WSLine</name>
<description/>
+ <supertypeName>org.apache.uima.ruta.type.EmptyLine</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.ruta.type.EmptyLine</name>
+ <description/>
<supertypeName>org.apache.uima.ruta.type.AnyLine</supertypeName>
</typeDescription>
<typeDescription>
Added: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java?rev=1636562&view=auto
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java (added)
+++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java Tue Nov 4 12:32:11 2014
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ruta.engine;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.net.URL;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.resource.ResourceSpecifier;
+import org.apache.uima.util.FileUtils;
+import org.apache.uima.util.XMLInputSource;
+import org.junit.Test;
+
+public class PlainTextAnnotatorTest {
+
+ @Test
+ public void test() throws Exception {
+ String namespace = this.getClass().getPackage().getName().replaceAll("\\.", "/");
+ String name = namespace + "/" + "PlainTextAnnotatorTest.txt";
+ URL textURL = PlainTextAnnotatorTest.class.getClassLoader().getResource(name);
+ File textFile = new File(textURL.toURI());
+ String text = FileUtils.file2String(textFile, "UTF-8");
+ URL url = PlainTextAnnotator.class.getClassLoader().getResource("PlainTextAnnotator.xml");
+ if (url == null) {
+ url = HtmlAnnotator.class.getClassLoader().getResource(
+ "org/apache/uima/ruta/engine/PlainTextAnnotator.xml");
+ }
+ XMLInputSource in = new XMLInputSource(url);
+ ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
+ AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
+ CAS cas = ae.newCAS();
+ AnnotationIndex<AnnotationFS> ai = null;
+
+ cas.setDocumentText(text);
+ ae.process(cas);
+
+ ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.AnyLine"));
+ assertEquals(18, ai.size());
+
+ ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.Line"));
+ assertEquals(10, ai.size());
+
+ ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.EmptyLine"));
+ assertEquals(8, ai.size());
+
+ ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.WSLine"));
+ assertEquals(4, ai.size());
+
+ ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.Paragraph"));
+ assertEquals(4, ai.size());
+
+ ae.destroy();
+ cas.release();
+ }
+}
Propchange: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt?rev=1636562&view=auto
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt (added)
+++ uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt Tue Nov 4 12:32:11 2014
@@ -0,0 +1,18 @@
+1 some text
+2 some text
+3 some text
+
+
+
+
+8 some text
+9 some text
+10 some text
+
+
+13 some text
+14 some text
+15 some text
+
+
+18 end
\ No newline at end of file
Propchange: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt
------------------------------------------------------------------------------
svn:eol-style = native