You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2013/11/04 17:47:39 UTC
svn commit: r1538670 - in /ctakes/trunk:
ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/
ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/
ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/
Author: chenpei
Date: Mon Nov 4 16:47:38 2013
New Revision: 1538670
URL: http://svn.apache.org/r1538670
Log:
CTAKES-253 - YTEX ctakes patches port. Thanks Vijay Garla.
* ctakes-context-tokenizer\src\main\java\org\apache\ctakes\contexttokenizer\ae\ContextDependentTokenizerAnnotator.java
add null check: changed to avoid NPE in case BaseToken is null. Also ignore newline tokens (they should be treated as whitespace).
* ctakes-core\src\main\java\org\apache\ctakes\core\fsm\adapters\NumberTokenAdapter.java
add null check: ignore empty numbertokens
* ctakes-core\src\main\java\org\apache\ctakes\core\fsm\machine\DateFSM.java
Modified to include years in dates
Modified:
ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java
Modified: ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java?rev=1538670&r1=1538669&r2=1538670&view=diff
==============================================================================
--- ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java (original)
+++ ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java Mon Nov 4 16:47:38 2013
@@ -18,21 +18,21 @@
*/
package org.apache.ctakes.contexttokenizer.ae;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.log4j.Logger;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.FSIterator;
-import org.apache.uima.cas.text.AnnotationIndex;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.JFSIndexRepository;
-import org.apache.uima.resource.ResourceInitializationException;
-
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.JFSIndexRepository;
+import org.apache.uima.resource.ResourceInitializationException;
+
import org.apache.ctakes.core.ae.TokenizerAnnotator;
import org.apache.ctakes.core.fsm.adapters.ContractionTokenAdapter;
@@ -57,20 +57,21 @@ import org.apache.ctakes.core.fsm.output
import org.apache.ctakes.core.fsm.output.RomanNumeralToken;
import org.apache.ctakes.core.fsm.output.TimeToken;
import org.apache.ctakes.core.fsm.token.BaseToken;
-import org.apache.ctakes.typesystem.type.syntax.ContractionToken;
-import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
-import org.apache.ctakes.typesystem.type.syntax.NumToken;
-import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
-import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
-import org.apache.ctakes.typesystem.type.syntax.WordToken;
-import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.FractionAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.MeasurementAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.PersonTitleAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.RangeAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.TimeAnnotation;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.core.fsm.token.EolToken;
+import org.apache.ctakes.typesystem.type.syntax.ContractionToken;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
+import org.apache.ctakes.typesystem.type.syntax.NumToken;
+import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
+import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.FractionAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.MeasurementAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.PersonTitleAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.RangeAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
/**
* Finds tokens based on context.
@@ -123,7 +124,10 @@ public class ContextDependentTokenizerAn
while (btaItr.hasNext()) {
org.apache.ctakes.typesystem.type.syntax.BaseToken bta = (org.apache.ctakes.typesystem.type.syntax.BaseToken) btaItr
.next();
- baseTokenList.add(adaptToBaseToken(bta));
+ // ignore newlines, avoid null tokens
+ BaseToken bt = adaptToBaseToken(bta);
+ if(bt != null && !(bt instanceof EolToken))
+ baseTokenList.add(bt);
}
// execute FSM logic
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java?rev=1538670&r1=1538669&r2=1538670&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java Mon Nov 4 16:47:38 2013
@@ -21,6 +21,8 @@ package org.apache.ctakes.core.fsm.adapt
import org.apache.ctakes.core.fsm.token.NumberToken;
import org.apache.ctakes.typesystem.type.syntax.NumToken;
+import com.google.common.base.Strings;
+
/**
* Adapts JCas token annotation to interface expected by the Context Dependent
* Tokenizer.
@@ -36,7 +38,7 @@ public class NumberTokenAdapter extends
{
super(nta);
- if (nta.getCoveredText().length() > 0 && nta.getCoveredText().charAt(0) == '-')
+ if (!Strings.isNullOrEmpty(nta.getCoveredText()) && nta.getCoveredText().length() > 0 && nta.getCoveredText().charAt(0) == '-')
{
iv_isPositive = false;
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java?rev=1538670&r1=1538669&r2=1538670&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java Mon Nov 4 16:47:38 2013
@@ -220,16 +220,16 @@ public class DateFSM {
startState.addTransition(new AnyCondition(), startState);
monthFullTextState.addTransition(dayNumCondition, dayNumState);
- monthFullTextState.addTransition(yearNotDayNumCondition, ntEndState);
+ monthFullTextState.addTransition(yearNotDayNumCondition, endState);
monthFullTextState.addTransition(new AnyCondition(), startState);
monthShortTextState.addTransition(dayNumCondition, dayNumState);
monthShortTextState.addTransition(periodCondition, periodState);
- monthShortTextState.addTransition(yearNotDayNumCondition, ntEndState);
+ monthShortTextState.addTransition(yearNotDayNumCondition, endState);
monthShortTextState.addTransition(new AnyCondition(), startState);
periodState.addTransition(dayNumCondition, dayNumState);
- periodState.addTransition(yearNotDayNumCondition, ntEndState);
+ periodState.addTransition(yearNotDayNumCondition, endState);
periodState.addTransition(new AnyCondition(), startState);
dayNumState.addTransition(yearNumCondition, endState);