You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/04/22 22:52:03 UTC

svn commit: r1589291 - /ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java

Author: tmill
Date: Tue Apr 22 20:52:02 2014
New Revision: 1589291

URL: http://svn.apache.org/r1589291
Log:
CTAKES-295: Update ContextDependentTokenizerAnnotator to UIMAFit-style methods and Java >=6 overrides.

Modified:
    ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java

Modified: ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java?rev=1589291&r1=1589290&r2=1589291&view=diff
==============================================================================
--- ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java (original)
+++ ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java Tue Apr 22 20:52:02 2014
@@ -19,21 +19,11 @@
 package org.apache.ctakes.contexttokenizer.ae;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 
-import org.apache.log4j.Logger;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.FSIterator;
-import org.apache.uima.cas.text.AnnotationIndex;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.JFSIndexRepository;
-import org.apache.uima.resource.ResourceInitializationException;
-
-
 import org.apache.ctakes.core.ae.TokenizerAnnotator;
 import org.apache.ctakes.core.fsm.adapters.ContractionTokenAdapter;
 import org.apache.ctakes.core.fsm.adapters.DecimalTokenAdapter;
@@ -72,6 +62,13 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation;
 import org.apache.ctakes.typesystem.type.textsem.TimeAnnotation;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.util.JCasUtil;
 
 /**
  * Finds tokens based on context.
@@ -90,7 +87,8 @@ public class ContextDependentTokenizerAn
 	private MeasurementFSM iv_measurementFSM;
 	private PersonTitleFSM iv_personTitleFSM;
 
-	public void initialize(UimaContext annotCtx) throws ResourceInitializationException {
+	@Override
+  public void initialize(UimaContext annotCtx) throws ResourceInitializationException {
 		super.initialize(annotCtx);
 
 		iv_dateFSM = new DateFSM();
@@ -103,27 +101,22 @@ public class ContextDependentTokenizerAn
 		iv_logger.info("Finite state machines loaded.");
 	}
 
-	public void process(JCas jcas) throws AnalysisEngineProcessException {
+	@Override
+  public void process(JCas jcas) throws AnalysisEngineProcessException {
 
 		try {
 			
-	    	iv_logger.info("process(JCas)");
+		  iv_logger.info("process(JCas)");
 
-			JFSIndexRepository indexes = jcas.getJFSIndexRepository();
-			Iterator<?> sentItr = indexes.getAnnotationIndex(Sentence.type).iterator();
-			AnnotationIndex baseTokenIndex = jcas.getJFSIndexRepository().getAnnotationIndex(
-					org.apache.ctakes.typesystem.type.syntax.BaseToken.type);
+			Collection<Sentence> sents = JCasUtil.select(jcas, Sentence.class);
 			
-			while (sentItr.hasNext()) {
-				Sentence sentAnnot = (Sentence) sentItr.next();
-				FSIterator btaItr = baseTokenIndex.subiterator(sentAnnot);
-
-				// adapt JCas objects into objects expected by the Finite state
-				// machines
-				List<BaseToken> baseTokenList = new ArrayList<BaseToken>();
-				while (btaItr.hasNext()) {
-					org.apache.ctakes.typesystem.type.syntax.BaseToken bta = (org.apache.ctakes.typesystem.type.syntax.BaseToken) btaItr
-							.next();
+			for(Sentence sentAnnot : sents){
+			  List<org.apache.ctakes.typesystem.type.syntax.BaseToken> tokens = 
+			      JCasUtil.selectCovered(org.apache.ctakes.typesystem.type.syntax.BaseToken.class, sentAnnot);
+			  // adapt JCas objects into objects expected by the Finite state
+			  // machines
+				List<BaseToken> baseTokenList = new ArrayList<>();
+				for(org.apache.ctakes.typesystem.type.syntax.BaseToken bta : tokens){
 					// ignore newlines, avoid null tokens
 					BaseToken bt = adaptToBaseToken(bta);
 					if(bt != null && !(bt instanceof EolToken))
@@ -207,7 +200,7 @@ public class ContextDependentTokenizerAn
 	 * @param obj
 	 * @return
 	 */
-	private BaseToken adaptToBaseToken(org.apache.ctakes.typesystem.type.syntax.BaseToken obj) throws Exception {
+	private static BaseToken adaptToBaseToken(org.apache.ctakes.typesystem.type.syntax.BaseToken obj) throws Exception {
 		if (obj instanceof WordToken) {
 			WordToken wta = (WordToken) obj;
 			return new WordTokenAdapter(wta);
@@ -215,9 +208,8 @@ public class ContextDependentTokenizerAn
 			NumToken nta = (NumToken) obj;
 			if (nta.getNumType() == TokenizerAnnotator.TOKEN_NUM_TYPE_INTEGER) {
 				return new IntegerTokenAdapter(nta);
-			} else {
-				return new DecimalTokenAdapter(nta);
 			}
+      return new DecimalTokenAdapter(nta);
 		} else if (obj instanceof PunctuationToken) {
 			PunctuationToken pta = (PunctuationToken) obj;
 			return new PunctuationTokenAdapter(pta);