You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/04/25 22:55:34 UTC
svn commit: r1590139 -
/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java
Author: tmill
Date: Fri Apr 25 20:55:33 2014
New Revision: 1590139
URL: http://svn.apache.org/r1590139
Log:
CTAKES-16: Fix TokenizerAnnotatorPTB to use UIMAFit select instead of iterator.
Modified:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java?rev=1590139&r1=1590138&r2=1590139&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java Fri Apr 25 20:55:33 2014
@@ -18,6 +18,7 @@
*/
package org.apache.ctakes.core.ae;
+import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
@@ -31,13 +32,11 @@ import org.apache.ctakes.typesystem.type
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.JFSIndexRepository;
-import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.util.JCasUtil;
/**
* UIMA annotator that tokenizes based on Penn Treebank rules.
@@ -87,10 +86,8 @@ public class TokenizerAnnotatorPTB exten
tokenCount = 0;
- JFSIndexRepository indexes = jcas.getJFSIndexRepository();
- FSIterator<Annotation> segmentItr = indexes.getAnnotationIndex(Segment.type).iterator();
- while (segmentItr.hasNext()) {
- Segment sa = (Segment) segmentItr.next();
+ Collection<Segment> segments = JCasUtil.select(jcas, Segment.class);
+ for(Segment sa : segments){
String segmentID = sa.getId();
if (!skipSegmentsSet.contains(segmentID)) {
annotateRange(jcas, sa.getBegin(), sa.getEnd());
@@ -112,9 +109,8 @@ public class TokenizerAnnotatorPTB exten
protected void annotateRange(JCas jcas, int rangeBegin, int rangeEnd) throws AnalysisEngineProcessException {
// int tokenCount = 0; // can't start with tokenCount=0 here because this method can be called multiple times
- JFSIndexRepository indexes = jcas.getJFSIndexRepository();
-
- // First look for all newlines and carriage returns (which are not contained within sentences)
+
+ // First look for all newlines and carriage returns (which are not contained within sentences)
String docText = jcas.getDocumentText();
for (int i = rangeBegin; i<rangeEnd; i++) {
@@ -140,10 +136,10 @@ public class TokenizerAnnotatorPTB exten
}
// Now process each sentence
- FSIterator<?> sentencesIter = indexes.getAnnotationIndex(Sentence.type).iterator();
- // Tokenize each sentence, adding the tokens to the cas index
- while (sentencesIter.hasNext()) {
- Sentence sentence = (Sentence) sentencesIter.next();
+ Collection<Sentence> sentences = JCasUtil.select(jcas, Sentence.class);
+
+ // Tokenize each sentence, adding the tokens to the cas index
+ for(Sentence sentence : sentences){
if (sentence.getBegin() < rangeBegin || sentence.getEnd() > rangeEnd) {
continue;
}
@@ -167,9 +163,8 @@ public class TokenizerAnnotatorPTB exten
}
// Now add the tokenNumber in the order of offsets
- FSIterator<?> baseTokenIter = indexes.getAnnotationIndex(BaseToken.type).iterator();
- while (baseTokenIter.hasNext()) {
- BaseToken bta = (BaseToken) baseTokenIter.next();
+ Collection<BaseToken> tokens = JCasUtil.select(jcas, BaseToken.class);
+ for(BaseToken bta : tokens){
if (bta.getBegin()>=rangeBegin && bta.getBegin()<rangeEnd) {
bta.setTokenNumber(tokenCount);
tokenCount++;