You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2013/03/20 15:27:04 UTC
svn commit: r1458851 -
/uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/engine/PlainTextAnnotator.java
Author: pkluegl
Date: Wed Mar 20 14:27:03 2013
New Revision: 1458851
URL: http://svn.apache.org/r1458851
Log:
UIMA-2760
- fixed last line problem
- remove test code
Modified:
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/engine/PlainTextAnnotator.java
Modified: uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/engine/PlainTextAnnotator.java
URL: http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/engine/PlainTextAnnotator.java?rev=1458851&r1=1458850&r2=1458851&view=diff
==============================================================================
--- uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/engine/PlainTextAnnotator.java (original)
+++ uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/engine/PlainTextAnnotator.java Wed Mar 20 14:27:03 2013
@@ -25,6 +25,7 @@ import java.io.IOException;
import java.io.StringReader;
import java.net.URL;
+import org.apache.commons.lang3.StringUtils;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -63,8 +64,8 @@ public class PlainTextAnnotator extends
String eachLine = null;
try {
while ((eachLine = br.readLine()) != null) {
- boolean wsLine = "".equals(eachLine.trim());
- boolean emptyLine = "".equals(eachLine);
+ boolean wsLine = StringUtils.isBlank(eachLine);
+ boolean emptyLine = StringUtils.isEmpty(eachLine);
int offsetAfterLine = offsetTillNow + eachLine.length();
int nlLength = 1;
if (documentText.length() >= offsetAfterLine + 2) {
@@ -92,6 +93,9 @@ public class PlainTextAnnotator extends
if (wsLine && !lastWasEmpty && lastLineEnd != 0) {
AnnotationFS newParaFS = cas.createAnnotation(paragraphType, paragraphBegin, lastLineEnd);
cas.addFsToIndexes(newParaFS);
+ } else if(offsetAfterLine + nlLength == documentText.length()) {
+ AnnotationFS newParaFS = cas.createAnnotation(paragraphType, paragraphBegin, offsetAfterLine);
+ cas.addFsToIndexes(newParaFS);
}
if (wsLine) {
lastWasEmpty = true;
@@ -104,24 +108,4 @@ public class PlainTextAnnotator extends
}
- public static void main(String[] args) throws Exception {
- URL url = TextMarkerEngine.class.getClassLoader().getResource("PlainTextAnnotator.xml");
- if (url == null) {
- url = PlainTextAnnotator.class.getClassLoader().getResource(
- "org/apache/uima/textmarker/engine/PlainTextAnnotator.xml");
- }
- XMLInputSource in = new XMLInputSource(url);
- ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
- AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
- CAS cas = ae.newCAS();
- cas.setDocumentText(FileUtils.file2String(new File(
- "D:/work/workspace-textmarker/Test/input/list1.txt"), "UTF-8"));
- ae.process(cas);
- AnnotationIndex<AnnotationFS> annotationIndex = cas.getAnnotationIndex();
- for (AnnotationFS annotationFS : annotationIndex) {
- System.out.println(annotationFS.getType().getShortName() + " : "
- + annotationFS.getCoveredText());
- }
- }
-
}