You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2013/03/20 15:27:04 UTC

svn commit: r1458851 - /uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/engine/PlainTextAnnotator.java

Author: pkluegl
Date: Wed Mar 20 14:27:03 2013
New Revision: 1458851

URL: http://svn.apache.org/r1458851
Log:
UIMA-2760
- fixed last line problem
- remove test code

Modified:
    uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/engine/PlainTextAnnotator.java

Modified: uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/engine/PlainTextAnnotator.java
URL: http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/engine/PlainTextAnnotator.java?rev=1458851&r1=1458850&r2=1458851&view=diff
==============================================================================
--- uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/engine/PlainTextAnnotator.java (original)
+++ uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/engine/PlainTextAnnotator.java Wed Mar 20 14:27:03 2013
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.io.StringReader;
 import java.net.URL;
 
+import org.apache.commons.lang3.StringUtils;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -63,8 +64,8 @@ public class PlainTextAnnotator extends 
     String eachLine = null;
     try {
       while ((eachLine = br.readLine()) != null) {
-        boolean wsLine = "".equals(eachLine.trim());
-        boolean emptyLine = "".equals(eachLine);
+        boolean wsLine = StringUtils.isBlank(eachLine);
+        boolean emptyLine = StringUtils.isEmpty(eachLine);
         int offsetAfterLine = offsetTillNow + eachLine.length();
         int nlLength = 1;
         if (documentText.length() >= offsetAfterLine + 2) {
@@ -92,6 +93,9 @@ public class PlainTextAnnotator extends 
         if (wsLine && !lastWasEmpty && lastLineEnd != 0) {
           AnnotationFS newParaFS = cas.createAnnotation(paragraphType, paragraphBegin, lastLineEnd);
           cas.addFsToIndexes(newParaFS);
+        } else if(offsetAfterLine + nlLength == documentText.length()) {
+          AnnotationFS newParaFS = cas.createAnnotation(paragraphType, paragraphBegin, offsetAfterLine);
+          cas.addFsToIndexes(newParaFS);
         }
         if (wsLine) {
           lastWasEmpty = true;
@@ -104,24 +108,4 @@ public class PlainTextAnnotator extends 
 
   }
 
-  public static void main(String[] args) throws Exception {
-    URL url = TextMarkerEngine.class.getClassLoader().getResource("PlainTextAnnotator.xml");
-    if (url == null) {
-      url = PlainTextAnnotator.class.getClassLoader().getResource(
-              "org/apache/uima/textmarker/engine/PlainTextAnnotator.xml");
-    }
-    XMLInputSource in = new XMLInputSource(url);
-    ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
-    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
-    CAS cas = ae.newCAS();
-    cas.setDocumentText(FileUtils.file2String(new File(
-            "D:/work/workspace-textmarker/Test/input/list1.txt"), "UTF-8"));
-    ae.process(cas);
-    AnnotationIndex<AnnotationFS> annotationIndex = cas.getAnnotationIndex();
-    for (AnnotationFS annotationFS : annotationIndex) {
-      System.out.println(annotationFS.getType().getShortName() + " :  "
-              + annotationFS.getCoveredText());
-    }
-  }
-
 }