You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2013/11/04 17:47:39 UTC

svn commit: r1538670 - in /ctakes/trunk: ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/ ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/

Author: chenpei
Date: Mon Nov  4 16:47:38 2013
New Revision: 1538670

URL: http://svn.apache.org/r1538670
Log:
CTAKES-253 - YTEX ctakes patches port. Thanks Vijay Garla.
* ctakes-context-tokenizer\src\main\java\org\apache\ctakes\contexttokenizer\ae\ContextDependentTokenizerAnnotator.java 
add null check: changed to avoid NPE in case BaseToken is null. Also ignore newline tokens (they should be treated as whitespace). 
* ctakes-core\src\main\java\org\apache\ctakes\core\fsm\adapters\NumberTokenAdapter.java 
add null check: ignore empty numbertokens 
* ctakes-core\src\main\java\org\apache\ctakes\core\fsm\machine\DateFSM.java 
Modified to include years in dates 

Modified:
    ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java

Modified: ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java?rev=1538670&r1=1538669&r2=1538670&view=diff
==============================================================================
--- ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java (original)
+++ ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java Mon Nov  4 16:47:38 2013
@@ -18,21 +18,21 @@
  */
 package org.apache.ctakes.contexttokenizer.ae;
 
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.log4j.Logger;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.FSIterator;
-import org.apache.uima.cas.text.AnnotationIndex;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.JFSIndexRepository;
-import org.apache.uima.resource.ResourceInitializationException;
-
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.JFSIndexRepository;
+import org.apache.uima.resource.ResourceInitializationException;
+
 
 import org.apache.ctakes.core.ae.TokenizerAnnotator;
 import org.apache.ctakes.core.fsm.adapters.ContractionTokenAdapter;
@@ -57,20 +57,21 @@ import org.apache.ctakes.core.fsm.output
 import org.apache.ctakes.core.fsm.output.RomanNumeralToken;
 import org.apache.ctakes.core.fsm.output.TimeToken;
 import org.apache.ctakes.core.fsm.token.BaseToken;
-import org.apache.ctakes.typesystem.type.syntax.ContractionToken;
-import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
-import org.apache.ctakes.typesystem.type.syntax.NumToken;
-import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
-import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
-import org.apache.ctakes.typesystem.type.syntax.WordToken;
-import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.FractionAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.MeasurementAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.PersonTitleAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.RangeAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.TimeAnnotation;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.core.fsm.token.EolToken;
+import org.apache.ctakes.typesystem.type.syntax.ContractionToken;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
+import org.apache.ctakes.typesystem.type.syntax.NumToken;
+import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
+import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.FractionAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.MeasurementAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.PersonTitleAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.RangeAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
 
 /**
  * Finds tokens based on context.
@@ -123,7 +124,10 @@ public class ContextDependentTokenizerAn
 				while (btaItr.hasNext()) {
 					org.apache.ctakes.typesystem.type.syntax.BaseToken bta = (org.apache.ctakes.typesystem.type.syntax.BaseToken) btaItr
 							.next();
-					baseTokenList.add(adaptToBaseToken(bta));
+					// ignore newlines, avoid null tokens
+					BaseToken bt = adaptToBaseToken(bta);
+					if(bt != null && !(bt instanceof EolToken))
+						baseTokenList.add(bt);
 				}
 
 				// execute FSM logic

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java?rev=1538670&r1=1538669&r2=1538670&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java Mon Nov  4 16:47:38 2013
@@ -21,6 +21,8 @@ package org.apache.ctakes.core.fsm.adapt
 import org.apache.ctakes.core.fsm.token.NumberToken;
 import org.apache.ctakes.typesystem.type.syntax.NumToken;
 
+import com.google.common.base.Strings;
+
 /**
  * Adapts JCas token annotation to interface expected by the Context Dependent
  * Tokenizer.
@@ -36,7 +38,7 @@ public class NumberTokenAdapter extends 
 	{
 		super(nta);
 		
-		if (nta.getCoveredText().length() > 0 && nta.getCoveredText().charAt(0) == '-')
+		if (!Strings.isNullOrEmpty(nta.getCoveredText()) && nta.getCoveredText().length() > 0 && nta.getCoveredText().charAt(0) == '-')
 		{
 			iv_isPositive = false;
 		}		

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java?rev=1538670&r1=1538669&r2=1538670&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java Mon Nov  4 16:47:38 2013
@@ -220,16 +220,16 @@ public class DateFSM {
 		startState.addTransition(new AnyCondition(), startState);
 
 		monthFullTextState.addTransition(dayNumCondition, dayNumState);
-		monthFullTextState.addTransition(yearNotDayNumCondition, ntEndState);
+		monthFullTextState.addTransition(yearNotDayNumCondition, endState);
 		monthFullTextState.addTransition(new AnyCondition(), startState);
 
 		monthShortTextState.addTransition(dayNumCondition, dayNumState);
 		monthShortTextState.addTransition(periodCondition, periodState);
-		monthShortTextState.addTransition(yearNotDayNumCondition, ntEndState);
+		monthShortTextState.addTransition(yearNotDayNumCondition, endState);
 		monthShortTextState.addTransition(new AnyCondition(), startState);
 
 		periodState.addTransition(dayNumCondition, dayNumState);
-		periodState.addTransition(yearNotDayNumCondition, ntEndState);
+		periodState.addTransition(yearNotDayNumCondition, endState);
 		periodState.addTransition(new AnyCondition(), startState);
 
 		dayNumState.addTransition(yearNumCondition, endState);