You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2012/10/31 06:26:55 UTC
svn commit: r1403989 [9/28] - in /incubator/ctakes/branches/SHARPn-cTAKES:
Constituency Parser/src/org/chboston/cnlp/ctakes/parser/ Constituency
Parser/src/org/chboston/cnlp/ctakes/parser/uima/ae/ Constituency
Parser/src/org/chboston/cnlp/ctakes/parser...
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/MeasurementFSM.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/MeasurementFSM.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/MeasurementFSM.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/MeasurementFSM.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,307 +14,307 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.fsm.machine;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import net.openai.util.fsm.AnyCondition;
-import net.openai.util.fsm.Condition;
-import net.openai.util.fsm.Machine;
-import net.openai.util.fsm.State;
-import edu.mayo.bmi.fsm.condition.IntegerRangeCondition;
-import edu.mayo.bmi.fsm.condition.NumberCondition;
-import edu.mayo.bmi.fsm.condition.PunctuationValueCondition;
-import edu.mayo.bmi.fsm.condition.RangeCondition;
-import edu.mayo.bmi.fsm.condition.WordSetCondition;
-import edu.mayo.bmi.fsm.output.MeasurementToken;
-import edu.mayo.bmi.fsm.state.NamedState;
-import edu.mayo.bmi.fsm.token.BaseToken;
-
-/**
- * Uses one or more finite state machines to detect measurements in the given
- * input of tokens.
- *
- * @author Mayo Clinic
- */
-public class MeasurementFSM {
- // text fractions
- Set<String> iv_fullTextSet = new HashSet<String>();
- Set<String> iv_shortTextSet = new HashSet<String>();
- Set<String> iv_textNumberSet = new HashSet<String>();
-
- // contains the finite state machines
- private Set<Machine> iv_machineSet = new HashSet<Machine>();
-
- /**
- *
- * Constructor
- *
- */
- public MeasurementFSM() {
- iv_fullTextSet.add("gallon");
- iv_fullTextSet.add("gallons");
- iv_fullTextSet.add("pint");
- iv_fullTextSet.add("pints");
- iv_fullTextSet.add("ounce");
- iv_fullTextSet.add("ounces");
- iv_fullTextSet.add("pound");
- iv_fullTextSet.add("pounds");
- iv_fullTextSet.add("drop");
- iv_fullTextSet.add("drops");
- iv_fullTextSet.add("hour");
- iv_fullTextSet.add("hours");
- iv_fullTextSet.add("minute");
- iv_fullTextSet.add("minutes");
- iv_fullTextSet.add("second");
- iv_fullTextSet.add("seconds");
- iv_fullTextSet.add("foot");
- iv_fullTextSet.add("feet");
- iv_fullTextSet.add("grain");
- iv_fullTextSet.add("grains");
- iv_fullTextSet.add("teaspoon");
- iv_fullTextSet.add("teaspoons");
- iv_fullTextSet.add("tablespoon");
- iv_fullTextSet.add("tablespoons");
- iv_fullTextSet.add("kilogram");
- iv_fullTextSet.add("kilograms");
- iv_fullTextSet.add("gram");
- iv_fullTextSet.add("grams");
- iv_fullTextSet.add("centigram");
- iv_fullTextSet.add("centigrams");
- iv_fullTextSet.add("milligram");
- iv_fullTextSet.add("milligrams");
- iv_fullTextSet.add("liter");
- iv_fullTextSet.add("liters");
- iv_fullTextSet.add("centiliter");
- iv_fullTextSet.add("centiliters");
- iv_fullTextSet.add("milliliter");
- iv_fullTextSet.add("milliliters");
- iv_fullTextSet.add("meter");
- iv_fullTextSet.add("meters");
- iv_fullTextSet.add("centimeter");
- iv_fullTextSet.add("centimeters");
- iv_fullTextSet.add("millimeter");
- iv_fullTextSet.add("millimeters");
-
- iv_shortTextSet.add("gal");
- iv_shortTextSet.add("gals");
- iv_shortTextSet.add("pt");
- iv_shortTextSet.add("pts");
- iv_shortTextSet.add("oz");
- iv_shortTextSet.add("ozs");
- iv_shortTextSet.add("lb");
- iv_shortTextSet.add("lbs");
- iv_shortTextSet.add("gtts");
- iv_shortTextSet.add("hr");
- iv_shortTextSet.add("min");
- iv_shortTextSet.add("sec");
- iv_shortTextSet.add("ft");
- iv_shortTextSet.add("gr");
- iv_shortTextSet.add("tsp");
- iv_shortTextSet.add("tbsp");
- iv_shortTextSet.add("g");
- iv_shortTextSet.add("kg");
- iv_shortTextSet.add("mg");
- iv_shortTextSet.add("l");
- iv_shortTextSet.add("cl");
- iv_shortTextSet.add("ml");
- iv_shortTextSet.add("m");
- iv_shortTextSet.add("cm");
- iv_shortTextSet.add("mm");
- iv_shortTextSet.add("cc");
-
- iv_textNumberSet.add("one");
- iv_textNumberSet.add("two");
- iv_textNumberSet.add("three");
- iv_textNumberSet.add("four");
- iv_textNumberSet.add("five");
- iv_textNumberSet.add("six");
- iv_textNumberSet.add("seven");
- iv_textNumberSet.add("eight");
- iv_textNumberSet.add("nine");
- iv_textNumberSet.add("ten");
-
- iv_machineSet.add(getBloodPressureMachine());
- iv_machineSet.add(getSubstanceQuantityMachine());
- }
-
- /**
- * Gets a finite state machine that detects the following:
- * <ol>
- * <li>110/80</li>
- * </ol>
- *
- * @return
- */
- private Machine getBloodPressureMachine() {
- State startState = new NamedState("START");
- State endState = new NamedState("END");
- endState.setEndStateFlag(true);
-
- Machine m = new Machine(startState);
- State systolicState = new NamedState("SYSTOLIC");
- State fslashState = new NamedState("FSLASH");
-
- Condition systolicCondition = new IntegerRangeCondition(80, 200);
- Condition diastolicCondition = new IntegerRangeCondition(60, 160);
- Condition fslashCondition = new PunctuationValueCondition('/');
-
- startState.addTransition(systolicCondition, systolicState);
- startState.addTransition(new AnyCondition(), startState);
-
- systolicState.addTransition(fslashCondition, fslashState);
- systolicState.addTransition(new AnyCondition(), startState);
-
- fslashState.addTransition(diastolicCondition, endState);
- fslashState.addTransition(new AnyCondition(), startState);
-
- endState.addTransition(new AnyCondition(), startState);
-
- return m;
- }
-
- /**
- * Gets a finite state machine that detects the following:
- * <ol>
- * <li>one teaspoon</li>
- * <li>one tsp</li>
- * <li>1 teaspoon</li>
- * <li>1 tsp</li>
- * <li>0.5 tsp</li>
- * <li>1-5 teaspoons</li>
- * </ol>
- *
- * @return
- */
- private Machine getSubstanceQuantityMachine() {
- State startState = new NamedState("START");
- State endState = new NamedState("END");
- endState.setEndStateFlag(true);
-
- Machine m = new Machine(startState);
- State quanitityState = new NamedState("QUANITITY");
-
- Condition numberCondition = new NumberCondition();
- Condition numberTextCondition = new WordSetCondition(iv_textNumberSet,
- false);
- Condition rangeCondition = new RangeCondition();
- Condition fullTextCondition = new WordSetCondition(iv_fullTextSet,
- false);
- Condition shortTextCondition = new WordSetCondition(iv_shortTextSet,
- false);
-
- startState.addTransition(numberCondition, quanitityState);
- startState.addTransition(rangeCondition, quanitityState);
- startState.addTransition(numberTextCondition, quanitityState);
- startState.addTransition(new AnyCondition(), startState);
-
- quanitityState.addTransition(fullTextCondition, endState);
- quanitityState.addTransition(shortTextCondition, endState);
- quanitityState.addTransition(new AnyCondition(), startState);
-
- endState.addTransition(new AnyCondition(), startState);
-
- return m;
- }
-
- /**
- * Executes the finite state machines.
- *
- * @param tokens
- * @return Set of RangeToken objects.
- * @throws Exception
- */
- public Set<MeasurementToken> execute(List<? extends BaseToken> tokens,
- Set<? extends BaseToken> overrideSet) throws Exception {
- Set<MeasurementToken> measurementSet = new HashSet<MeasurementToken>();
-
- // maps a fsm to a token start index
- // key = fsm , value = token start index
- Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
-
- Iterator<? extends BaseToken> overrideTokenItr = overrideSet.iterator();
- // key = start offset, value = override BaseToken object
- Map<Integer, BaseToken> overrideTokenMap = new HashMap<Integer, BaseToken>();
- while (overrideTokenItr.hasNext()) {
- BaseToken t = overrideTokenItr.next();
- Integer key = new Integer(t.getStartOffset());
- overrideTokenMap.put(key, t);
- }
-
- boolean overrideOn = false;
- int overrideEndOffset = -1;
- for (int i = 0; i < tokens.size(); i++) {
- BaseToken token = tokens.get(i);
-
- Integer key = new Integer(token.getStartOffset());
-
- if (overrideOn) {
- if (token.getStartOffset() >= overrideEndOffset) {
- overrideOn = false;
- overrideEndOffset = -1;
- } else {
- // step to next iteration of for loop
- continue;
- }
- } else {
- if (overrideTokenMap.containsKey(key)) {
- // override one or more tokens until the override
- // token is complete
- token = overrideTokenMap.get(key);
- overrideOn = true;
- overrideEndOffset = token.getEndOffset();
- }
- }
-
- Iterator<Machine> machineItr = iv_machineSet.iterator();
- while (machineItr.hasNext()) {
- Machine fsm = machineItr.next();
-
- fsm.input(token);
-
- State currentState = fsm.getCurrentState();
- if (currentState.getStartStateFlag()) {
- tokenStartMap.put(fsm, new Integer(i));
- }
- if (currentState.getEndStateFlag()) {
- Object o = tokenStartMap.get(fsm);
- int tokenStartIndex;
- if (o == null) {
- // By default, all machines start with
- // token zero.
- tokenStartIndex = 0;
- } else {
- tokenStartIndex = ((Integer) o).intValue();
- // skip ahead over single token we don't want
- tokenStartIndex++;
- }
- BaseToken startToken = tokens.get(tokenStartIndex);
- BaseToken endToken = token;
- MeasurementToken measurementToken = new MeasurementToken(
- startToken.getStartOffset(), endToken
- .getEndOffset());
- measurementSet.add(measurementToken);
- fsm.reset();
- }
- }
- }
-
- // cleanup
- tokenStartMap.clear();
-
- // reset machines
- Iterator<Machine> itr = iv_machineSet.iterator();
- while (itr.hasNext()) {
- Machine fsm = itr.next();
- fsm.reset();
- }
-
- return measurementSet;
- }
-}
+package edu.mayo.bmi.fsm.machine;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import net.openai.util.fsm.AnyCondition;
+import net.openai.util.fsm.Condition;
+import net.openai.util.fsm.Machine;
+import net.openai.util.fsm.State;
+import edu.mayo.bmi.fsm.condition.IntegerRangeCondition;
+import edu.mayo.bmi.fsm.condition.NumberCondition;
+import edu.mayo.bmi.fsm.condition.PunctuationValueCondition;
+import edu.mayo.bmi.fsm.condition.RangeCondition;
+import edu.mayo.bmi.fsm.condition.WordSetCondition;
+import edu.mayo.bmi.fsm.output.MeasurementToken;
+import edu.mayo.bmi.fsm.state.NamedState;
+import edu.mayo.bmi.fsm.token.BaseToken;
+
+/**
+ * Uses one or more finite state machines to detect measurements in the given
+ * input of tokens.
+ *
+ * @author Mayo Clinic
+ */
+public class MeasurementFSM {
+ // text fractions
+ Set<String> iv_fullTextSet = new HashSet<String>();
+ Set<String> iv_shortTextSet = new HashSet<String>();
+ Set<String> iv_textNumberSet = new HashSet<String>();
+
+ // contains the finite state machines
+ private Set<Machine> iv_machineSet = new HashSet<Machine>();
+
+ /**
+ *
+ * Constructor
+ *
+ */
+ public MeasurementFSM() {
+ iv_fullTextSet.add("gallon");
+ iv_fullTextSet.add("gallons");
+ iv_fullTextSet.add("pint");
+ iv_fullTextSet.add("pints");
+ iv_fullTextSet.add("ounce");
+ iv_fullTextSet.add("ounces");
+ iv_fullTextSet.add("pound");
+ iv_fullTextSet.add("pounds");
+ iv_fullTextSet.add("drop");
+ iv_fullTextSet.add("drops");
+ iv_fullTextSet.add("hour");
+ iv_fullTextSet.add("hours");
+ iv_fullTextSet.add("minute");
+ iv_fullTextSet.add("minutes");
+ iv_fullTextSet.add("second");
+ iv_fullTextSet.add("seconds");
+ iv_fullTextSet.add("foot");
+ iv_fullTextSet.add("feet");
+ iv_fullTextSet.add("grain");
+ iv_fullTextSet.add("grains");
+ iv_fullTextSet.add("teaspoon");
+ iv_fullTextSet.add("teaspoons");
+ iv_fullTextSet.add("tablespoon");
+ iv_fullTextSet.add("tablespoons");
+ iv_fullTextSet.add("kilogram");
+ iv_fullTextSet.add("kilograms");
+ iv_fullTextSet.add("gram");
+ iv_fullTextSet.add("grams");
+ iv_fullTextSet.add("centigram");
+ iv_fullTextSet.add("centigrams");
+ iv_fullTextSet.add("milligram");
+ iv_fullTextSet.add("milligrams");
+ iv_fullTextSet.add("liter");
+ iv_fullTextSet.add("liters");
+ iv_fullTextSet.add("centiliter");
+ iv_fullTextSet.add("centiliters");
+ iv_fullTextSet.add("milliliter");
+ iv_fullTextSet.add("milliliters");
+ iv_fullTextSet.add("meter");
+ iv_fullTextSet.add("meters");
+ iv_fullTextSet.add("centimeter");
+ iv_fullTextSet.add("centimeters");
+ iv_fullTextSet.add("millimeter");
+ iv_fullTextSet.add("millimeters");
+
+ iv_shortTextSet.add("gal");
+ iv_shortTextSet.add("gals");
+ iv_shortTextSet.add("pt");
+ iv_shortTextSet.add("pts");
+ iv_shortTextSet.add("oz");
+ iv_shortTextSet.add("ozs");
+ iv_shortTextSet.add("lb");
+ iv_shortTextSet.add("lbs");
+ iv_shortTextSet.add("gtts");
+ iv_shortTextSet.add("hr");
+ iv_shortTextSet.add("min");
+ iv_shortTextSet.add("sec");
+ iv_shortTextSet.add("ft");
+ iv_shortTextSet.add("gr");
+ iv_shortTextSet.add("tsp");
+ iv_shortTextSet.add("tbsp");
+ iv_shortTextSet.add("g");
+ iv_shortTextSet.add("kg");
+ iv_shortTextSet.add("mg");
+ iv_shortTextSet.add("l");
+ iv_shortTextSet.add("cl");
+ iv_shortTextSet.add("ml");
+ iv_shortTextSet.add("m");
+ iv_shortTextSet.add("cm");
+ iv_shortTextSet.add("mm");
+ iv_shortTextSet.add("cc");
+
+ iv_textNumberSet.add("one");
+ iv_textNumberSet.add("two");
+ iv_textNumberSet.add("three");
+ iv_textNumberSet.add("four");
+ iv_textNumberSet.add("five");
+ iv_textNumberSet.add("six");
+ iv_textNumberSet.add("seven");
+ iv_textNumberSet.add("eight");
+ iv_textNumberSet.add("nine");
+ iv_textNumberSet.add("ten");
+
+ iv_machineSet.add(getBloodPressureMachine());
+ iv_machineSet.add(getSubstanceQuantityMachine());
+ }
+
+ /**
+ * Gets a finite state machine that detects the following:
+ * <ol>
+ * <li>110/80</li>
+ * </ol>
+ *
+ * @return
+ */
+ private Machine getBloodPressureMachine() {
+ State startState = new NamedState("START");
+ State endState = new NamedState("END");
+ endState.setEndStateFlag(true);
+
+ Machine m = new Machine(startState);
+ State systolicState = new NamedState("SYSTOLIC");
+ State fslashState = new NamedState("FSLASH");
+
+ Condition systolicCondition = new IntegerRangeCondition(80, 200);
+ Condition diastolicCondition = new IntegerRangeCondition(60, 160);
+ Condition fslashCondition = new PunctuationValueCondition('/');
+
+ startState.addTransition(systolicCondition, systolicState);
+ startState.addTransition(new AnyCondition(), startState);
+
+ systolicState.addTransition(fslashCondition, fslashState);
+ systolicState.addTransition(new AnyCondition(), startState);
+
+ fslashState.addTransition(diastolicCondition, endState);
+ fslashState.addTransition(new AnyCondition(), startState);
+
+ endState.addTransition(new AnyCondition(), startState);
+
+ return m;
+ }
+
+ /**
+ * Gets a finite state machine that detects the following:
+ * <ol>
+ * <li>one teaspoon</li>
+ * <li>one tsp</li>
+ * <li>1 teaspoon</li>
+ * <li>1 tsp</li>
+ * <li>0.5 tsp</li>
+ * <li>1-5 teaspoons</li>
+ * </ol>
+ *
+ * @return
+ */
+ private Machine getSubstanceQuantityMachine() {
+ State startState = new NamedState("START");
+ State endState = new NamedState("END");
+ endState.setEndStateFlag(true);
+
+ Machine m = new Machine(startState);
+ State quanitityState = new NamedState("QUANITITY");
+
+ Condition numberCondition = new NumberCondition();
+ Condition numberTextCondition = new WordSetCondition(iv_textNumberSet,
+ false);
+ Condition rangeCondition = new RangeCondition();
+ Condition fullTextCondition = new WordSetCondition(iv_fullTextSet,
+ false);
+ Condition shortTextCondition = new WordSetCondition(iv_shortTextSet,
+ false);
+
+ startState.addTransition(numberCondition, quanitityState);
+ startState.addTransition(rangeCondition, quanitityState);
+ startState.addTransition(numberTextCondition, quanitityState);
+ startState.addTransition(new AnyCondition(), startState);
+
+ quanitityState.addTransition(fullTextCondition, endState);
+ quanitityState.addTransition(shortTextCondition, endState);
+ quanitityState.addTransition(new AnyCondition(), startState);
+
+ endState.addTransition(new AnyCondition(), startState);
+
+ return m;
+ }
+
+ /**
+ * Executes the finite state machines.
+ *
+ * @param tokens
+ * @return Set of RangeToken objects.
+ * @throws Exception
+ */
+ public Set<MeasurementToken> execute(List<? extends BaseToken> tokens,
+ Set<? extends BaseToken> overrideSet) throws Exception {
+ Set<MeasurementToken> measurementSet = new HashSet<MeasurementToken>();
+
+ // maps a fsm to a token start index
+ // key = fsm , value = token start index
+ Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
+
+ Iterator<? extends BaseToken> overrideTokenItr = overrideSet.iterator();
+ // key = start offset, value = override BaseToken object
+ Map<Integer, BaseToken> overrideTokenMap = new HashMap<Integer, BaseToken>();
+ while (overrideTokenItr.hasNext()) {
+ BaseToken t = overrideTokenItr.next();
+ Integer key = new Integer(t.getStartOffset());
+ overrideTokenMap.put(key, t);
+ }
+
+ boolean overrideOn = false;
+ int overrideEndOffset = -1;
+ for (int i = 0; i < tokens.size(); i++) {
+ BaseToken token = tokens.get(i);
+
+ Integer key = new Integer(token.getStartOffset());
+
+ if (overrideOn) {
+ if (token.getStartOffset() >= overrideEndOffset) {
+ overrideOn = false;
+ overrideEndOffset = -1;
+ } else {
+ // step to next iteration of for loop
+ continue;
+ }
+ } else {
+ if (overrideTokenMap.containsKey(key)) {
+ // override one or more tokens until the override
+ // token is complete
+ token = overrideTokenMap.get(key);
+ overrideOn = true;
+ overrideEndOffset = token.getEndOffset();
+ }
+ }
+
+ Iterator<Machine> machineItr = iv_machineSet.iterator();
+ while (machineItr.hasNext()) {
+ Machine fsm = machineItr.next();
+
+ fsm.input(token);
+
+ State currentState = fsm.getCurrentState();
+ if (currentState.getStartStateFlag()) {
+ tokenStartMap.put(fsm, new Integer(i));
+ }
+ if (currentState.getEndStateFlag()) {
+ Object o = tokenStartMap.get(fsm);
+ int tokenStartIndex;
+ if (o == null) {
+ // By default, all machines start with
+ // token zero.
+ tokenStartIndex = 0;
+ } else {
+ tokenStartIndex = ((Integer) o).intValue();
+ // skip ahead over single token we don't want
+ tokenStartIndex++;
+ }
+ BaseToken startToken = tokens.get(tokenStartIndex);
+ BaseToken endToken = token;
+ MeasurementToken measurementToken = new MeasurementToken(
+ startToken.getStartOffset(), endToken
+ .getEndOffset());
+ measurementSet.add(measurementToken);
+ fsm.reset();
+ }
+ }
+ }
+
+ // cleanup
+ tokenStartMap.clear();
+
+ // reset machines
+ Iterator<Machine> itr = iv_machineSet.iterator();
+ while (itr.hasNext()) {
+ Machine fsm = itr.next();
+ fsm.reset();
+ }
+
+ return measurementSet;
+ }
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/NegationFSM.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/NegationFSM.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/NegationFSM.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/NegationFSM.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,415 +14,415 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.fsm.machine;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import net.openai.util.fsm.AnyCondition;
-import net.openai.util.fsm.Condition;
-import net.openai.util.fsm.Machine;
-import net.openai.util.fsm.State;
-import edu.mayo.bmi.fsm.condition.DisjoinCondition;
-import edu.mayo.bmi.fsm.condition.NegateCondition;
-import edu.mayo.bmi.fsm.condition.TextSetCondition;
-import edu.mayo.bmi.fsm.output.NegationIndicator;
-import edu.mayo.bmi.fsm.state.NamedState;
-import edu.mayo.bmi.fsm.state.NonTerminalEndState;
-import edu.mayo.bmi.fsm.token.BaseToken;
-
-/**
- * Uses one or more finite state machines to detect dates in the given input of
- * tokens.
- *
- * @author Mayo Clinic
- */
-public class NegationFSM {
-
- // regular modal verb
- private Set<String> iv_modalVerbsSet = new HashSet<String>();
- // negative particle
- private Set<String> iv_negParticlesSet = new HashSet<String>();
- // regular verbs requiring negation particle
- private Set<String> iv_regVerbsSet = new HashSet<String>();
- // neagive verbs that contain negation in them
- private Set<String> iv_negVerbsSet = new HashSet<String>();
- // negation preposition
- private Set<String> iv_negPrepositionsSet = new HashSet<String>();
- // negatively charged determiners
- private Set<String> iv_negDeterminersSet = new HashSet<String>();
- // regular nouns - indicators
- private Set<String> iv_regNounsSet = new HashSet<String>();
- // regular prepositions
- private Set<String> iv_regPrepositionsSet = new HashSet<String>();
- // negative adjectives
- private Set<String> iv_negAdjectivesSet = new HashSet<String>();
- // negative collocations
- private Set<String> iv_negCollocSet = new HashSet<String>();
- // NEGATIVE COLLOCATION PARTICLE
- private Set<String> iv_negColPartSet = new HashSet<String>();
-
- // contains the finite state machines
- private Set<Machine> iv_machineSet = new HashSet<Machine>();
-
- /**
- *
- * Constructor
- *
- */
- public NegationFSM() {
- iv_modalVerbsSet.add("can");
- iv_modalVerbsSet.add("ca");
- iv_modalVerbsSet.add("will");
- iv_modalVerbsSet.add("must");
- iv_modalVerbsSet.add("could");
- iv_modalVerbsSet.add("would");
- iv_modalVerbsSet.add("should");
- iv_modalVerbsSet.add("shall");
- iv_modalVerbsSet.add("did");
-
- iv_negParticlesSet.add("not");
- iv_negColPartSet.add("out");
- iv_negParticlesSet.add("n't");
- iv_negParticlesSet.add("'t");
-
- iv_negCollocSet.add("rule");
- iv_negCollocSet.add("rules");
- iv_negCollocSet.add("ruled");
- iv_negCollocSet.add("ruling");
- iv_negCollocSet.add("rule-out");
-
- iv_regVerbsSet.add("reveal");
- iv_regVerbsSet.add("reveals");
- iv_regVerbsSet.add("revealed");
- iv_regVerbsSet.add("revealing");
- iv_regVerbsSet.add("have");
- iv_regVerbsSet.add("had");
- iv_regVerbsSet.add("has");
- iv_regVerbsSet.add("feel");
- iv_regVerbsSet.add("feels");
- iv_regVerbsSet.add("felt");
- iv_regVerbsSet.add("feeling");
- iv_regVerbsSet.add("complain");
- iv_regVerbsSet.add("complains");
- iv_regVerbsSet.add("complained");
- iv_regVerbsSet.add("complaining");
- iv_regVerbsSet.add("demonstrate");
- iv_regVerbsSet.add("demonstrates");
- iv_regVerbsSet.add("demonstrated");
- iv_regVerbsSet.add("demonstrating");
- iv_regVerbsSet.add("appear");
- iv_regVerbsSet.add("appears");
- iv_regVerbsSet.add("appeared");
- iv_regVerbsSet.add("appearing");
- iv_regVerbsSet.add("caused");
- iv_regVerbsSet.add("cause");
- iv_regVerbsSet.add("causing");
- iv_regVerbsSet.add("causes");
- iv_regVerbsSet.add("find");
- iv_regVerbsSet.add("finds");
- iv_regVerbsSet.add("found");
- iv_regVerbsSet.add("discover");
- iv_regVerbsSet.add("discovered");
- iv_regVerbsSet.add("discovers");
-
- iv_negVerbsSet.add("deny");
- iv_negVerbsSet.add("denies");
- iv_negVerbsSet.add("denied");
- iv_negVerbsSet.add("denying");
- iv_negVerbsSet.add("fail");
- iv_negVerbsSet.add("fails");
- iv_negVerbsSet.add("failed");
- iv_negVerbsSet.add("failing");
- iv_negVerbsSet.add("decline");
- iv_negVerbsSet.add("declines");
- iv_negVerbsSet.add("declined");
- iv_negVerbsSet.add("declining");
- iv_negVerbsSet.add("exclude");
- iv_negVerbsSet.add("excludes");
- iv_negVerbsSet.add("excluding");
- iv_negVerbsSet.add("excluded");
-
- iv_negPrepositionsSet.add("without");
- iv_negPrepositionsSet.add("absent");
- iv_negPrepositionsSet.add("none");
-
- iv_negDeterminersSet.add("no");
- iv_negDeterminersSet.add("any");
- iv_negDeterminersSet.add("neither");
- iv_negDeterminersSet.add("nor");
- iv_negDeterminersSet.add("never");
-
- iv_regNounsSet.add("evidence");
- iv_regNounsSet.add("indication");
- iv_regNounsSet.add("indications");
- iv_regNounsSet.add("sign");
- iv_regNounsSet.add("signs");
- iv_regNounsSet.add("symptoms");
- iv_regNounsSet.add("symptom");
- iv_regNounsSet.add("sx");
- iv_regNounsSet.add("dx");
- iv_regNounsSet.add("diagnosis");
- iv_regNounsSet.add("history");
- iv_regNounsSet.add("hx");
- iv_regNounsSet.add("findings");
-
- iv_regPrepositionsSet.add("of");
- iv_regPrepositionsSet.add("in");
- iv_regPrepositionsSet.add("for");
- iv_regPrepositionsSet.add("with");
-
- iv_negAdjectivesSet.add("unremarkable");
- iv_negAdjectivesSet.add("unlikely");
- iv_negAdjectivesSet.add("negative");
-
- iv_machineSet.add(getAspectualNegIndicatorMachine());
- iv_machineSet.add(getNominalNegIndicatorMachine());
- iv_machineSet.add(getAdjNegIndicatorMachine());
-
- }
-
- private Machine getAspectualNegIndicatorMachine() {
- State startState = new NamedState("START");
- State endState = new NamedState("END");
- State anyState = new NamedState("ANY");
-
- State ntEndState = new NonTerminalEndState("NON TERMINAL END");
- endState.setEndStateFlag(true);
- ntEndState.setEndStateFlag(true);
-
- Machine m = new Machine(startState);
- State regModalState = new NamedState("REG_MODAL");
- State negPartState = new NamedState("NEG_PART");
- State negVerbState = new NamedState("NEG_VERB");
- State negCollocState = new NamedState("NEG_COLLOC");
- State negColPartState = new NamedState("NEG_COLPART");
-
- Condition regModalC = new TextSetCondition(iv_modalVerbsSet, false);
- Condition negPartC = new TextSetCondition(iv_negParticlesSet, false);
- Condition regVerbC = new TextSetCondition(iv_regVerbsSet, false);
- Condition negVerbC = new TextSetCondition(iv_negVerbsSet, false);
- Condition negDetC = new TextSetCondition(iv_negDeterminersSet, false);
- Condition negCollocC = new TextSetCondition(iv_negCollocSet, false);
- Condition negColPartC = new TextSetCondition(iv_negColPartSet, false);
-
- Condition notCollocC = new NegateCondition(negCollocC);
-
- startState.addTransition(negVerbC, negVerbState);
- startState.addTransition(negCollocC, negCollocState); // rule
-
- startState.addTransition(new DisjoinCondition(regModalC, regVerbC),
- regModalState); // start with a modal
- startState.addTransition(new DisjoinCondition(negPartC, negDetC),
- negPartState);
-
- startState.addTransition(new AnyCondition(), startState);
-
- regModalState.addTransition(negCollocC, negCollocState);
- negCollocState.addTransition(negColPartC, negColPartState); // out
- negColPartState.addTransition(new AnyCondition(), ntEndState);
- negCollocState.addTransition(new AnyCondition(), startState);
-
- regModalState.addTransition(new DisjoinCondition(negPartC, negDetC),
- negPartState);
- regModalState.addTransition(new AnyCondition(), anyState);
-
- anyState.addTransition(new DisjoinCondition(negPartC, negDetC),
- negPartState);
- anyState.addTransition(new AnyCondition(), startState);
-
- negPartState.addTransition(notCollocC, ntEndState);
- negVerbState.addTransition(notCollocC, ntEndState);
- negPartState.addTransition(new AnyCondition(), startState);
- negVerbState.addTransition(new AnyCondition(), startState);
-
- negPartState.addTransition(new AnyCondition(), ntEndState);
- negVerbState.addTransition(new AnyCondition(), ntEndState);
-
- ntEndState.addTransition(new AnyCondition(), endState);
-
- return m;
- }
-
-
- /**
- * should recognize:
- * <ul><li>A</li>
- * <li>B</li>
- * <li>B C</li>
- * <li>B D* C</li>
- * </ul>
- * <p>where A is one of
- * <ul><li>without</li>
- * <li>absent</li>
- * <li>none</li>
- * </ul>
- * <p> and B is one of
- * <ul>
- * <li>no</li>
- * <li>any</li>
- * <li>neither</li>
- * <li>nor</li>
- * <li>never</li>
- * </ul>
- * <p> and C is one of
- * <ul>
- * <li>evidence</li>
- * <li>indication</li>
- * <li>indications</li>
- * <li>sign</li>
- * <li>signs</li>
- * <li>symptoms</li>
- * <li>symptom</li>
- * <li>sx</li>
- * <li>dx</li>
- * <li>diagnosis</li>
- * <li>history</li>
- * <li>hx</li>
- * <li>findings</li>
- * </ul>
- * <p> and D is anything
- * @return
- */
- private Machine getNominalNegIndicatorMachine() {
- State startState = new NamedState("START");
- State endState = new NamedState("END");
- State anyState = new NamedState("ANY");
- State ntEndState = new NonTerminalEndState("NON TERMINAL END");
- endState.setEndStateFlag(true);
- ntEndState.setEndStateFlag(true);
-
- Machine m = new Machine(startState);
- State negPrepState = new NamedState("NEG_PREP");
- State negDetState = new NamedState("NEG_DET");
- State regNounState = new NamedState("REG_NOUN");
-
- Condition negPrepC = new TextSetCondition(iv_negPrepositionsSet, false);
- Condition negDetC = new TextSetCondition(iv_negDeterminersSet, false);
- Condition regNounC = new TextSetCondition(iv_regNounsSet, false);
-
- startState.addTransition(negDetC, negDetState); // start with a modal
- startState.addTransition(negPrepC, negPrepState);
- startState.addTransition(new AnyCondition(), startState);
-
- negPrepState.addTransition(new AnyCondition(), ntEndState);
- negDetState.addTransition(regNounC, regNounState);
- negDetState.addTransition(new AnyCondition(), ntEndState);
- negDetState.addTransition(new AnyCondition(), anyState);
-
- anyState.addTransition(regNounC, regNounState);
- anyState.addTransition(new AnyCondition(), anyState);
-
- regNounState.addTransition(new AnyCondition(), ntEndState);
-
- ntEndState.addTransition(new AnyCondition(), endState);
-
- return m;
- }
-
-
- /**
- * recognizes "A B ..."
- * where A is unremarkable, unlikely, or negative
- * and B is of, in, for, or with
- */
-
- private Machine getAdjNegIndicatorMachine() {
- State startState = new NamedState("START");
- State endState = new NamedState("END");
- State ntEndState = new NonTerminalEndState("NON TERMINAL END");
- endState.setEndStateFlag(true);
- ntEndState.setEndStateFlag(true);
-
- Machine m = new Machine(startState);
- State regPrepState = new NamedState("REG_PREP");
- State negAdjState = new NamedState("NEG_ADJ");
-
- Condition regPrepC = new TextSetCondition(iv_regPrepositionsSet, false);
- Condition negAdjC = new TextSetCondition(iv_negAdjectivesSet, false);
-
- startState.addTransition(negAdjC, negAdjState); // start with a modal
- startState.addTransition(new AnyCondition(), startState);
-
- negAdjState.addTransition(regPrepC, regPrepState);
- regPrepState.addTransition(new AnyCondition(), ntEndState);
- negAdjState.addTransition(new AnyCondition(), startState);
-
- ntEndState.addTransition(new AnyCondition(), endState);
-
- return m;
- }
-
- /**
- * Executes the finite state machines.
- *
- * @param tokens
- * @return Set of DateToken objects.
- * @throws Exception
- */
- public Set<NegationIndicator> execute(List<?> tokens) throws Exception {
- Set<NegationIndicator> outSet = new HashSet<NegationIndicator>();
-
- // maps a fsm to a token start index
- // key = fsm , value = token start index
- Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
-
- for (int i = 0; i < tokens.size(); i++) {
- BaseToken token = (BaseToken) tokens.get(i);
-
- Iterator<Machine> machineItr = iv_machineSet.iterator();
- while (machineItr.hasNext()) {
- Machine fsm = machineItr.next();
-
- fsm.input(token);
-
- State currentState = fsm.getCurrentState();
- if (currentState.getStartStateFlag()) {
- tokenStartMap.put(fsm, new Integer(i));
- }
- if (currentState.getEndStateFlag()) {
- Object o = tokenStartMap.get(fsm);
- int tokenStartIndex;
- if (o == null) {
- // By default, all machines start with
- // token zero.
- tokenStartIndex = 0;
- } else {
- tokenStartIndex = ((Integer) o).intValue();
- // skip ahead over single token we don't want
- tokenStartIndex++;
- }
- BaseToken endToken = null;
- if (currentState instanceof NonTerminalEndState) {
- endToken = (BaseToken) tokens.get(i - 1);
- } else {
- endToken = token;
- }
-
- BaseToken startToken = (BaseToken) tokens
- .get(tokenStartIndex);
- NegationIndicator neg = new NegationIndicator(startToken
- .getStartOffset(), endToken.getEndOffset());
- outSet.add(neg);
- fsm.reset();
- }
- }
- }
-
- // cleanup
- tokenStartMap.clear();
-
- // reset machines
- Iterator<Machine> itr = iv_machineSet.iterator();
- while (itr.hasNext()) {
- Machine fsm = itr.next();
- fsm.reset();
- }
-
- return outSet;
- }
-}
\ No newline at end of file
+package edu.mayo.bmi.fsm.machine;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import net.openai.util.fsm.AnyCondition;
+import net.openai.util.fsm.Condition;
+import net.openai.util.fsm.Machine;
+import net.openai.util.fsm.State;
+import edu.mayo.bmi.fsm.condition.DisjoinCondition;
+import edu.mayo.bmi.fsm.condition.NegateCondition;
+import edu.mayo.bmi.fsm.condition.TextSetCondition;
+import edu.mayo.bmi.fsm.output.NegationIndicator;
+import edu.mayo.bmi.fsm.state.NamedState;
+import edu.mayo.bmi.fsm.state.NonTerminalEndState;
+import edu.mayo.bmi.fsm.token.BaseToken;
+
+/**
+ * Uses one or more finite state machines to detect dates in the given input of
+ * tokens.
+ *
+ * @author Mayo Clinic
+ */
+public class NegationFSM {
+
+ // regular modal verb
+ private Set<String> iv_modalVerbsSet = new HashSet<String>();
+ // negative particle
+ private Set<String> iv_negParticlesSet = new HashSet<String>();
+ // regular verbs requiring negation particle
+ private Set<String> iv_regVerbsSet = new HashSet<String>();
+ // neagive verbs that contain negation in them
+ private Set<String> iv_negVerbsSet = new HashSet<String>();
+ // negation preposition
+ private Set<String> iv_negPrepositionsSet = new HashSet<String>();
+ // negatively charged determiners
+ private Set<String> iv_negDeterminersSet = new HashSet<String>();
+ // regular nouns - indicators
+ private Set<String> iv_regNounsSet = new HashSet<String>();
+ // regular prepositions
+ private Set<String> iv_regPrepositionsSet = new HashSet<String>();
+ // negative adjectives
+ private Set<String> iv_negAdjectivesSet = new HashSet<String>();
+ // negative collocations
+ private Set<String> iv_negCollocSet = new HashSet<String>();
+ // NEGATIVE COLLOCATION PARTICLE
+ private Set<String> iv_negColPartSet = new HashSet<String>();
+
+ // contains the finite state machines
+ private Set<Machine> iv_machineSet = new HashSet<Machine>();
+
+ /**
+ *
+ * Constructor
+ *
+ */
+ public NegationFSM() {
+ iv_modalVerbsSet.add("can");
+ iv_modalVerbsSet.add("ca");
+ iv_modalVerbsSet.add("will");
+ iv_modalVerbsSet.add("must");
+ iv_modalVerbsSet.add("could");
+ iv_modalVerbsSet.add("would");
+ iv_modalVerbsSet.add("should");
+ iv_modalVerbsSet.add("shall");
+ iv_modalVerbsSet.add("did");
+
+ iv_negParticlesSet.add("not");
+ iv_negColPartSet.add("out");
+ iv_negParticlesSet.add("n't");
+ iv_negParticlesSet.add("'t");
+
+ iv_negCollocSet.add("rule");
+ iv_negCollocSet.add("rules");
+ iv_negCollocSet.add("ruled");
+ iv_negCollocSet.add("ruling");
+ iv_negCollocSet.add("rule-out");
+
+ iv_regVerbsSet.add("reveal");
+ iv_regVerbsSet.add("reveals");
+ iv_regVerbsSet.add("revealed");
+ iv_regVerbsSet.add("revealing");
+ iv_regVerbsSet.add("have");
+ iv_regVerbsSet.add("had");
+ iv_regVerbsSet.add("has");
+ iv_regVerbsSet.add("feel");
+ iv_regVerbsSet.add("feels");
+ iv_regVerbsSet.add("felt");
+ iv_regVerbsSet.add("feeling");
+ iv_regVerbsSet.add("complain");
+ iv_regVerbsSet.add("complains");
+ iv_regVerbsSet.add("complained");
+ iv_regVerbsSet.add("complaining");
+ iv_regVerbsSet.add("demonstrate");
+ iv_regVerbsSet.add("demonstrates");
+ iv_regVerbsSet.add("demonstrated");
+ iv_regVerbsSet.add("demonstrating");
+ iv_regVerbsSet.add("appear");
+ iv_regVerbsSet.add("appears");
+ iv_regVerbsSet.add("appeared");
+ iv_regVerbsSet.add("appearing");
+ iv_regVerbsSet.add("caused");
+ iv_regVerbsSet.add("cause");
+ iv_regVerbsSet.add("causing");
+ iv_regVerbsSet.add("causes");
+ iv_regVerbsSet.add("find");
+ iv_regVerbsSet.add("finds");
+ iv_regVerbsSet.add("found");
+ iv_regVerbsSet.add("discover");
+ iv_regVerbsSet.add("discovered");
+ iv_regVerbsSet.add("discovers");
+
+ iv_negVerbsSet.add("deny");
+ iv_negVerbsSet.add("denies");
+ iv_negVerbsSet.add("denied");
+ iv_negVerbsSet.add("denying");
+ iv_negVerbsSet.add("fail");
+ iv_negVerbsSet.add("fails");
+ iv_negVerbsSet.add("failed");
+ iv_negVerbsSet.add("failing");
+ iv_negVerbsSet.add("decline");
+ iv_negVerbsSet.add("declines");
+ iv_negVerbsSet.add("declined");
+ iv_negVerbsSet.add("declining");
+ iv_negVerbsSet.add("exclude");
+ iv_negVerbsSet.add("excludes");
+ iv_negVerbsSet.add("excluding");
+ iv_negVerbsSet.add("excluded");
+
+ iv_negPrepositionsSet.add("without");
+ iv_negPrepositionsSet.add("absent");
+ iv_negPrepositionsSet.add("none");
+
+ iv_negDeterminersSet.add("no");
+ iv_negDeterminersSet.add("any");
+ iv_negDeterminersSet.add("neither");
+ iv_negDeterminersSet.add("nor");
+ iv_negDeterminersSet.add("never");
+
+ iv_regNounsSet.add("evidence");
+ iv_regNounsSet.add("indication");
+ iv_regNounsSet.add("indications");
+ iv_regNounsSet.add("sign");
+ iv_regNounsSet.add("signs");
+ iv_regNounsSet.add("symptoms");
+ iv_regNounsSet.add("symptom");
+ iv_regNounsSet.add("sx");
+ iv_regNounsSet.add("dx");
+ iv_regNounsSet.add("diagnosis");
+ iv_regNounsSet.add("history");
+ iv_regNounsSet.add("hx");
+ iv_regNounsSet.add("findings");
+
+ iv_regPrepositionsSet.add("of");
+ iv_regPrepositionsSet.add("in");
+ iv_regPrepositionsSet.add("for");
+ iv_regPrepositionsSet.add("with");
+
+ iv_negAdjectivesSet.add("unremarkable");
+ iv_negAdjectivesSet.add("unlikely");
+ iv_negAdjectivesSet.add("negative");
+
+ iv_machineSet.add(getAspectualNegIndicatorMachine());
+ iv_machineSet.add(getNominalNegIndicatorMachine());
+ iv_machineSet.add(getAdjNegIndicatorMachine());
+
+ }
+
+ private Machine getAspectualNegIndicatorMachine() {
+ State startState = new NamedState("START");
+ State endState = new NamedState("END");
+ State anyState = new NamedState("ANY");
+
+ State ntEndState = new NonTerminalEndState("NON TERMINAL END");
+ endState.setEndStateFlag(true);
+ ntEndState.setEndStateFlag(true);
+
+ Machine m = new Machine(startState);
+ State regModalState = new NamedState("REG_MODAL");
+ State negPartState = new NamedState("NEG_PART");
+ State negVerbState = new NamedState("NEG_VERB");
+ State negCollocState = new NamedState("NEG_COLLOC");
+ State negColPartState = new NamedState("NEG_COLPART");
+
+ Condition regModalC = new TextSetCondition(iv_modalVerbsSet, false);
+ Condition negPartC = new TextSetCondition(iv_negParticlesSet, false);
+ Condition regVerbC = new TextSetCondition(iv_regVerbsSet, false);
+ Condition negVerbC = new TextSetCondition(iv_negVerbsSet, false);
+ Condition negDetC = new TextSetCondition(iv_negDeterminersSet, false);
+ Condition negCollocC = new TextSetCondition(iv_negCollocSet, false);
+ Condition negColPartC = new TextSetCondition(iv_negColPartSet, false);
+
+ Condition notCollocC = new NegateCondition(negCollocC);
+
+ startState.addTransition(negVerbC, negVerbState);
+ startState.addTransition(negCollocC, negCollocState); // rule
+
+ startState.addTransition(new DisjoinCondition(regModalC, regVerbC),
+ regModalState); // start with a modal
+ startState.addTransition(new DisjoinCondition(negPartC, negDetC),
+ negPartState);
+
+ startState.addTransition(new AnyCondition(), startState);
+
+ regModalState.addTransition(negCollocC, negCollocState);
+ negCollocState.addTransition(negColPartC, negColPartState); // out
+ negColPartState.addTransition(new AnyCondition(), ntEndState);
+ negCollocState.addTransition(new AnyCondition(), startState);
+
+ regModalState.addTransition(new DisjoinCondition(negPartC, negDetC),
+ negPartState);
+ regModalState.addTransition(new AnyCondition(), anyState);
+
+ anyState.addTransition(new DisjoinCondition(negPartC, negDetC),
+ negPartState);
+ anyState.addTransition(new AnyCondition(), startState);
+
+ negPartState.addTransition(notCollocC, ntEndState);
+ negVerbState.addTransition(notCollocC, ntEndState);
+ negPartState.addTransition(new AnyCondition(), startState);
+ negVerbState.addTransition(new AnyCondition(), startState);
+
+ negPartState.addTransition(new AnyCondition(), ntEndState);
+ negVerbState.addTransition(new AnyCondition(), ntEndState);
+
+ ntEndState.addTransition(new AnyCondition(), endState);
+
+ return m;
+ }
+
+
+ /**
+ * should recognize:
+ * <ul><li>A</li>
+ * <li>B</li>
+ * <li>B C</li>
+ * <li>B D* C</li>
+ * </ul>
+ * <p>where A is one of
+ * <ul><li>without</li>
+ * <li>absent</li>
+ * <li>none</li>
+ * </ul>
+ * <p> and B is one of
+ * <ul>
+ * <li>no</li>
+ * <li>any</li>
+ * <li>neither</li>
+ * <li>nor</li>
+ * <li>never</li>
+ * </ul>
+ * <p> and C is one of
+ * <ul>
+ * <li>evidence</li>
+ * <li>indication</li>
+ * <li>indications</li>
+ * <li>sign</li>
+ * <li>signs</li>
+ * <li>symptoms</li>
+ * <li>symptom</li>
+ * <li>sx</li>
+ * <li>dx</li>
+ * <li>diagnosis</li>
+ * <li>history</li>
+ * <li>hx</li>
+ * <li>findings</li>
+ * </ul>
+ * <p> and D is anything
+ * @return
+ */
+ private Machine getNominalNegIndicatorMachine() {
+ State startState = new NamedState("START");
+ State endState = new NamedState("END");
+ State anyState = new NamedState("ANY");
+ State ntEndState = new NonTerminalEndState("NON TERMINAL END");
+ endState.setEndStateFlag(true);
+ ntEndState.setEndStateFlag(true);
+
+ Machine m = new Machine(startState);
+ State negPrepState = new NamedState("NEG_PREP");
+ State negDetState = new NamedState("NEG_DET");
+ State regNounState = new NamedState("REG_NOUN");
+
+ Condition negPrepC = new TextSetCondition(iv_negPrepositionsSet, false);
+ Condition negDetC = new TextSetCondition(iv_negDeterminersSet, false);
+ Condition regNounC = new TextSetCondition(iv_regNounsSet, false);
+
+ startState.addTransition(negDetC, negDetState); // start with a modal
+ startState.addTransition(negPrepC, negPrepState);
+ startState.addTransition(new AnyCondition(), startState);
+
+ negPrepState.addTransition(new AnyCondition(), ntEndState);
+ negDetState.addTransition(regNounC, regNounState);
+ negDetState.addTransition(new AnyCondition(), ntEndState);
+ negDetState.addTransition(new AnyCondition(), anyState);
+
+ anyState.addTransition(regNounC, regNounState);
+ anyState.addTransition(new AnyCondition(), anyState);
+
+ regNounState.addTransition(new AnyCondition(), ntEndState);
+
+ ntEndState.addTransition(new AnyCondition(), endState);
+
+ return m;
+ }
+
+
+ /**
+ * recognizes "A B ..."
+ * where A is unremarkable, unlikely, or negative
+ * and B is of, in, for, or with
+ */
+
+ private Machine getAdjNegIndicatorMachine() {
+ State startState = new NamedState("START");
+ State endState = new NamedState("END");
+ State ntEndState = new NonTerminalEndState("NON TERMINAL END");
+ endState.setEndStateFlag(true);
+ ntEndState.setEndStateFlag(true);
+
+ Machine m = new Machine(startState);
+ State regPrepState = new NamedState("REG_PREP");
+ State negAdjState = new NamedState("NEG_ADJ");
+
+ Condition regPrepC = new TextSetCondition(iv_regPrepositionsSet, false);
+ Condition negAdjC = new TextSetCondition(iv_negAdjectivesSet, false);
+
+ startState.addTransition(negAdjC, negAdjState); // start with a modal
+ startState.addTransition(new AnyCondition(), startState);
+
+ negAdjState.addTransition(regPrepC, regPrepState);
+ regPrepState.addTransition(new AnyCondition(), ntEndState);
+ negAdjState.addTransition(new AnyCondition(), startState);
+
+ ntEndState.addTransition(new AnyCondition(), endState);
+
+ return m;
+ }
+
+ /**
+ * Executes the finite state machines.
+ *
+ * @param tokens
+ * @return Set of DateToken objects.
+ * @throws Exception
+ */
+ public Set<NegationIndicator> execute(List<?> tokens) throws Exception {
+ Set<NegationIndicator> outSet = new HashSet<NegationIndicator>();
+
+ // maps a fsm to a token start index
+ // key = fsm , value = token start index
+ Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
+
+ for (int i = 0; i < tokens.size(); i++) {
+ BaseToken token = (BaseToken) tokens.get(i);
+
+ Iterator<Machine> machineItr = iv_machineSet.iterator();
+ while (machineItr.hasNext()) {
+ Machine fsm = machineItr.next();
+
+ fsm.input(token);
+
+ State currentState = fsm.getCurrentState();
+ if (currentState.getStartStateFlag()) {
+ tokenStartMap.put(fsm, new Integer(i));
+ }
+ if (currentState.getEndStateFlag()) {
+ Object o = tokenStartMap.get(fsm);
+ int tokenStartIndex;
+ if (o == null) {
+ // By default, all machines start with
+ // token zero.
+ tokenStartIndex = 0;
+ } else {
+ tokenStartIndex = ((Integer) o).intValue();
+ // skip ahead over single token we don't want
+ tokenStartIndex++;
+ }
+ BaseToken endToken = null;
+ if (currentState instanceof NonTerminalEndState) {
+ endToken = (BaseToken) tokens.get(i - 1);
+ } else {
+ endToken = token;
+ }
+
+ BaseToken startToken = (BaseToken) tokens
+ .get(tokenStartIndex);
+ NegationIndicator neg = new NegationIndicator(startToken
+ .getStartOffset(), endToken.getEndOffset());
+ outSet.add(neg);
+ fsm.reset();
+ }
+ }
+ }
+
+ // cleanup
+ tokenStartMap.clear();
+
+ // reset machines
+ Iterator<Machine> itr = iv_machineSet.iterator();
+ while (itr.hasNext()) {
+ Machine fsm = itr.next();
+ fsm.reset();
+ }
+
+ return outSet;
+ }
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/PersonTitleFSM.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/PersonTitleFSM.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/PersonTitleFSM.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/PersonTitleFSM.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,143 +14,143 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.fsm.machine;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import net.openai.util.fsm.AnyCondition;
-import net.openai.util.fsm.Condition;
-import net.openai.util.fsm.Machine;
-import net.openai.util.fsm.State;
-import edu.mayo.bmi.fsm.condition.PunctuationValueCondition;
-import edu.mayo.bmi.fsm.condition.WordSetCondition;
-import edu.mayo.bmi.fsm.output.PersonTitleToken;
-import edu.mayo.bmi.fsm.state.NamedState;
-import edu.mayo.bmi.fsm.token.BaseToken;
-
-/**
- * Uses one or more finite state machines to detect measurements in the given
- * input of tokens.
- *
- * @author Mayo Clinic
- */
-public class PersonTitleFSM {
- // text fractions
- Set<String> iv_fullTextSet = new HashSet<String>();
-
- // contains the finite state machines
- private Set<Machine> iv_machineSet = new HashSet<Machine>();
-
- /**
- *
- * Constructor
- *
- */
- public PersonTitleFSM() {
- iv_fullTextSet.add("mr");
- iv_fullTextSet.add("ms");
- iv_fullTextSet.add("mrs");
- iv_fullTextSet.add("dr");
-
- iv_machineSet.add(getTitleMachine());
- }
-
- /**
- * Gets a finite state machine that detects the following:
- * <ol>
- * <li>Dr.</li>
- * </ol>
- *
- * @return
- */
- private Machine getTitleMachine() {
- State startState = new NamedState("START");
- State endState = new NamedState("END");
- endState.setEndStateFlag(true);
-
- Machine m = new Machine(startState);
- State titleNameState = new NamedState("TITLENAME");
-
- Condition titleNameCondition = new WordSetCondition(iv_fullTextSet,
- false);
- Condition periodCondition = new PunctuationValueCondition('.');
-
- startState.addTransition(titleNameCondition, titleNameState);
- startState.addTransition(new AnyCondition(), startState);
-
- titleNameState.addTransition(periodCondition, endState);
- titleNameState.addTransition(new AnyCondition(), startState);
-
- endState.addTransition(new AnyCondition(), startState);
-
- return m;
- }
-
- /**
- * Executes the finite state machines.
- *
- * @param tokens
- * @return Set of FractionToken objects.
- * @throws Exception
- */
- public Set<PersonTitleToken> execute(List<? extends BaseToken> tokens)
- throws Exception {
- Set<PersonTitleToken> personTitleSet = new HashSet<PersonTitleToken>();
-
- // maps a fsm to a token start index
- // key = fsm , value = token start index
- Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
-
- for (int i = 0; i < tokens.size(); i++) {
- BaseToken token = tokens.get(i);
-
- Iterator<Machine> machineItr = iv_machineSet.iterator();
- while (machineItr.hasNext()) {
- Machine fsm = machineItr.next();
-
- fsm.input(token);
-
- State currentState = fsm.getCurrentState();
- if (currentState.getStartStateFlag()) {
- tokenStartMap.put(fsm, new Integer(i));
- }
- if (currentState.getEndStateFlag()) {
- Object o = tokenStartMap.get(fsm);
- int tokenStartIndex;
- if (o == null) {
- // By default, all machines start with
- // token zero.
- tokenStartIndex = 0;
- } else {
- tokenStartIndex = ((Integer) o).intValue();
- // skip ahead over single token we don't want
- tokenStartIndex++;
- }
- BaseToken startToken = tokens.get(tokenStartIndex);
- BaseToken endToken = token;
- PersonTitleToken ptToken = new PersonTitleToken(startToken
- .getStartOffset(), endToken.getEndOffset());
- personTitleSet.add(ptToken);
- fsm.reset();
- }
- }
- }
-
- // cleanup
- tokenStartMap.clear();
-
- // reset machines
- Iterator<Machine> itr = iv_machineSet.iterator();
- while (itr.hasNext()) {
- Machine fsm = itr.next();
- fsm.reset();
- }
-
- return personTitleSet;
- }
-}
+package edu.mayo.bmi.fsm.machine;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import net.openai.util.fsm.AnyCondition;
+import net.openai.util.fsm.Condition;
+import net.openai.util.fsm.Machine;
+import net.openai.util.fsm.State;
+import edu.mayo.bmi.fsm.condition.PunctuationValueCondition;
+import edu.mayo.bmi.fsm.condition.WordSetCondition;
+import edu.mayo.bmi.fsm.output.PersonTitleToken;
+import edu.mayo.bmi.fsm.state.NamedState;
+import edu.mayo.bmi.fsm.token.BaseToken;
+
+/**
+ * Uses one or more finite state machines to detect measurements in the given
+ * input of tokens.
+ *
+ * @author Mayo Clinic
+ */
+public class PersonTitleFSM {
+ // text fractions
+ Set<String> iv_fullTextSet = new HashSet<String>();
+
+ // contains the finite state machines
+ private Set<Machine> iv_machineSet = new HashSet<Machine>();
+
+ /**
+ *
+ * Constructor
+ *
+ */
+ public PersonTitleFSM() {
+ iv_fullTextSet.add("mr");
+ iv_fullTextSet.add("ms");
+ iv_fullTextSet.add("mrs");
+ iv_fullTextSet.add("dr");
+
+ iv_machineSet.add(getTitleMachine());
+ }
+
+ /**
+ * Gets a finite state machine that detects the following:
+ * <ol>
+ * <li>Dr.</li>
+ * </ol>
+ *
+ * @return
+ */
+ private Machine getTitleMachine() {
+ State startState = new NamedState("START");
+ State endState = new NamedState("END");
+ endState.setEndStateFlag(true);
+
+ Machine m = new Machine(startState);
+ State titleNameState = new NamedState("TITLENAME");
+
+ Condition titleNameCondition = new WordSetCondition(iv_fullTextSet,
+ false);
+ Condition periodCondition = new PunctuationValueCondition('.');
+
+ startState.addTransition(titleNameCondition, titleNameState);
+ startState.addTransition(new AnyCondition(), startState);
+
+ titleNameState.addTransition(periodCondition, endState);
+ titleNameState.addTransition(new AnyCondition(), startState);
+
+ endState.addTransition(new AnyCondition(), startState);
+
+ return m;
+ }
+
+ /**
+ * Executes the finite state machines.
+ *
+ * @param tokens
+ * @return Set of FractionToken objects.
+ * @throws Exception
+ */
+ public Set<PersonTitleToken> execute(List<? extends BaseToken> tokens)
+ throws Exception {
+ Set<PersonTitleToken> personTitleSet = new HashSet<PersonTitleToken>();
+
+ // maps a fsm to a token start index
+ // key = fsm , value = token start index
+ Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
+
+ for (int i = 0; i < tokens.size(); i++) {
+ BaseToken token = tokens.get(i);
+
+ Iterator<Machine> machineItr = iv_machineSet.iterator();
+ while (machineItr.hasNext()) {
+ Machine fsm = machineItr.next();
+
+ fsm.input(token);
+
+ State currentState = fsm.getCurrentState();
+ if (currentState.getStartStateFlag()) {
+ tokenStartMap.put(fsm, new Integer(i));
+ }
+ if (currentState.getEndStateFlag()) {
+ Object o = tokenStartMap.get(fsm);
+ int tokenStartIndex;
+ if (o == null) {
+ // By default, all machines start with
+ // token zero.
+ tokenStartIndex = 0;
+ } else {
+ tokenStartIndex = ((Integer) o).intValue();
+ // skip ahead over single token we don't want
+ tokenStartIndex++;
+ }
+ BaseToken startToken = tokens.get(tokenStartIndex);
+ BaseToken endToken = token;
+ PersonTitleToken ptToken = new PersonTitleToken(startToken
+ .getStartOffset(), endToken.getEndOffset());
+ personTitleSet.add(ptToken);
+ fsm.reset();
+ }
+ }
+ }
+
+ // cleanup
+ tokenStartMap.clear();
+
+ // reset machines
+ Iterator<Machine> itr = iv_machineSet.iterator();
+ while (itr.hasNext()) {
+ Machine fsm = itr.next();
+ fsm.reset();
+ }
+
+ return personTitleSet;
+ }
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RangeFSM.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RangeFSM.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RangeFSM.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RangeFSM.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,206 +14,206 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.fsm.machine;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import net.openai.util.fsm.AnyCondition;
-import net.openai.util.fsm.Condition;
-import net.openai.util.fsm.Machine;
-import net.openai.util.fsm.State;
-import edu.mayo.bmi.fsm.condition.IntegerCondition;
-import edu.mayo.bmi.fsm.condition.PunctuationValueCondition;
-import edu.mayo.bmi.fsm.condition.RomanNumeralCondition;
-import edu.mayo.bmi.fsm.condition.WordSetCondition;
-import edu.mayo.bmi.fsm.output.RangeToken;
-import edu.mayo.bmi.fsm.state.NamedState;
-import edu.mayo.bmi.fsm.token.BaseToken;
-
-/**
- * Uses one or more finite state machines to detect ranges in the given input of
- * tokens.
- *
- * @author Mayo Clinic
- */
-public class RangeFSM {
- // text fractions
- Set<String> iv_textNumberSet = new HashSet<String>();
-
- // contains the finite state machines
- private Set<Machine> iv_machineSet = new HashSet<Machine>();
-
- /**
- *
- * Constructor
- *
- */
- public RangeFSM() {
- iv_textNumberSet.add("one");
- iv_textNumberSet.add("two");
- iv_textNumberSet.add("three");
- iv_textNumberSet.add("four");
- iv_textNumberSet.add("five");
- iv_textNumberSet.add("six");
- iv_textNumberSet.add("seven");
- iv_textNumberSet.add("eight");
- iv_textNumberSet.add("nine");
- iv_textNumberSet.add("ten");
-
- iv_machineSet.add(getMachine());
- }
-
- /**
- * Gets a finite state machine that detects the following:
- * <ol>
- * <li>250-300</li>
- * <li>I-IV</li>
- * <li>two-three</li>
- * </ol>
- *
- * @return
- */
- private Machine getMachine() {
- State startState = new NamedState("START");
- State endState = new NamedState("END");
- endState.setEndStateFlag(true);
-
- Machine m = new Machine(startState);
- State leftNumIntegerState = new NamedState("LEFT_NUM_INTEGER");
- State leftNumRomanState = new NamedState("LEFT_NUM_ROMAN");
- State leftNumTextState = new NamedState("LEFT_NUM_TEXT");
- State dashState = new NamedState("DASH");
-
- Condition leftIntCondition = new IntegerCondition();
- Condition rightIntCondition = new IntegerCondition();
- Condition dashCondition = new PunctuationValueCondition('-');
- Condition leftRomanNumeralCondition = new RomanNumeralCondition();
- Condition rightRomanNumeralCondition = new RomanNumeralCondition();
- Condition leftNumTextCondition = new WordSetCondition(iv_textNumberSet,
- false);
- Condition rightNumTextCondition = new WordSetCondition(
- iv_textNumberSet, false);
-
- startState.addTransition(leftIntCondition, leftNumIntegerState);
- startState.addTransition(leftRomanNumeralCondition, leftNumRomanState);
- startState.addTransition(leftNumTextCondition, leftNumTextState);
- startState.addTransition(new AnyCondition(), startState);
-
- leftNumIntegerState.addTransition(dashCondition, dashState);
- leftNumIntegerState.addTransition(new AnyCondition(), startState);
-
- leftNumRomanState.addTransition(dashCondition, dashState);
- leftNumRomanState.addTransition(new AnyCondition(), startState);
-
- leftNumTextState.addTransition(dashCondition, dashState);
- leftNumTextState.addTransition(new AnyCondition(), startState);
-
- dashState.addTransition(rightIntCondition, endState);
- dashState.addTransition(rightRomanNumeralCondition, endState);
- dashState.addTransition(rightNumTextCondition, endState);
- dashState.addTransition(new AnyCondition(), startState);
-
- endState.addTransition(new AnyCondition(), startState);
-
- return m;
- }
-
- /**
- * Executes the finite state machines.
- *
- * @param tokens
- * @return Set of RangeToken objects.
- * @throws Exception
- */
- public Set<RangeToken> execute(List<? extends BaseToken> tokens,
- Set<? extends BaseToken> overrideSet) throws Exception {
- Set<RangeToken> rangeSet = new HashSet<RangeToken>();
-
- // maps a fsm to a token start index
- // key = fsm , value = token start index
- Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
-
- Iterator<? extends BaseToken> overrideTokenItr = overrideSet.iterator();
- // key = start offset, value = override BaseToken object
- Map<Integer, BaseToken> overrideTokenMap = new HashMap<Integer, BaseToken>();
- while (overrideTokenItr.hasNext()) {
- BaseToken t = overrideTokenItr.next();
- Integer key = new Integer(t.getStartOffset());
- overrideTokenMap.put(key, t);
- }
-
- boolean overrideOn = false;
- int overrideEndOffset = -1;
- for (int i = 0; i < tokens.size(); i++) {
- BaseToken token = tokens.get(i);
-
- Integer key = new Integer(token.getStartOffset());
-
- if (overrideOn) {
- if (token.getStartOffset() >= overrideEndOffset) {
- overrideOn = false;
- overrideEndOffset = -1;
- } else {
- // step to next iteration of for loop
- continue;
- }
- } else {
- if (overrideTokenMap.containsKey(key)) {
- // override one or more tokens until the override
- // token is complete
- token = overrideTokenMap.get(key);
- overrideOn = true;
- overrideEndOffset = token.getEndOffset();
- }
- }
-
- Iterator<Machine> machineItr = iv_machineSet.iterator();
- while (machineItr.hasNext()) {
- Machine fsm = machineItr.next();
-
- fsm.input(token);
-
- State currentState = fsm.getCurrentState();
- if (currentState.getStartStateFlag()) {
- tokenStartMap.put(fsm, new Integer(i));
- }
- if (currentState.getEndStateFlag()) {
- Object o = tokenStartMap.get(fsm);
- int tokenStartIndex;
- if (o == null) {
- // By default, all machines start with
- // token zero.
- tokenStartIndex = 0;
- } else {
- tokenStartIndex = ((Integer) o).intValue();
- // skip ahead over single token we don't want
- tokenStartIndex++;
- }
- BaseToken startToken = tokens.get(tokenStartIndex);
- BaseToken endToken = token;
- RangeToken rangeToken = new RangeToken(startToken
- .getStartOffset(), endToken.getEndOffset());
- rangeSet.add(rangeToken);
- fsm.reset();
- }
- }
- }
-
- // cleanup
- tokenStartMap.clear();
-
- // reset machines
- Iterator<Machine> itr = iv_machineSet.iterator();
- while (itr.hasNext()) {
- Machine fsm = itr.next();
- fsm.reset();
- }
-
- return rangeSet;
- }
-}
+package edu.mayo.bmi.fsm.machine;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import net.openai.util.fsm.AnyCondition;
+import net.openai.util.fsm.Condition;
+import net.openai.util.fsm.Machine;
+import net.openai.util.fsm.State;
+import edu.mayo.bmi.fsm.condition.IntegerCondition;
+import edu.mayo.bmi.fsm.condition.PunctuationValueCondition;
+import edu.mayo.bmi.fsm.condition.RomanNumeralCondition;
+import edu.mayo.bmi.fsm.condition.WordSetCondition;
+import edu.mayo.bmi.fsm.output.RangeToken;
+import edu.mayo.bmi.fsm.state.NamedState;
+import edu.mayo.bmi.fsm.token.BaseToken;
+
+/**
+ * Uses one or more finite state machines to detect ranges in the given input of
+ * tokens.
+ *
+ * @author Mayo Clinic
+ */
+public class RangeFSM {
+ // text fractions
+ Set<String> iv_textNumberSet = new HashSet<String>();
+
+ // contains the finite state machines
+ private Set<Machine> iv_machineSet = new HashSet<Machine>();
+
+ /**
+ *
+ * Constructor
+ *
+ */
+ public RangeFSM() {
+ iv_textNumberSet.add("one");
+ iv_textNumberSet.add("two");
+ iv_textNumberSet.add("three");
+ iv_textNumberSet.add("four");
+ iv_textNumberSet.add("five");
+ iv_textNumberSet.add("six");
+ iv_textNumberSet.add("seven");
+ iv_textNumberSet.add("eight");
+ iv_textNumberSet.add("nine");
+ iv_textNumberSet.add("ten");
+
+ iv_machineSet.add(getMachine());
+ }
+
+ /**
+ * Gets a finite state machine that detects the following:
+ * <ol>
+ * <li>250-300</li>
+ * <li>I-IV</li>
+ * <li>two-three</li>
+ * </ol>
+ *
+ * @return
+ */
+ private Machine getMachine() {
+ State startState = new NamedState("START");
+ State endState = new NamedState("END");
+ endState.setEndStateFlag(true);
+
+ Machine m = new Machine(startState);
+ State leftNumIntegerState = new NamedState("LEFT_NUM_INTEGER");
+ State leftNumRomanState = new NamedState("LEFT_NUM_ROMAN");
+ State leftNumTextState = new NamedState("LEFT_NUM_TEXT");
+ State dashState = new NamedState("DASH");
+
+ Condition leftIntCondition = new IntegerCondition();
+ Condition rightIntCondition = new IntegerCondition();
+ Condition dashCondition = new PunctuationValueCondition('-');
+ Condition leftRomanNumeralCondition = new RomanNumeralCondition();
+ Condition rightRomanNumeralCondition = new RomanNumeralCondition();
+ Condition leftNumTextCondition = new WordSetCondition(iv_textNumberSet,
+ false);
+ Condition rightNumTextCondition = new WordSetCondition(
+ iv_textNumberSet, false);
+
+ startState.addTransition(leftIntCondition, leftNumIntegerState);
+ startState.addTransition(leftRomanNumeralCondition, leftNumRomanState);
+ startState.addTransition(leftNumTextCondition, leftNumTextState);
+ startState.addTransition(new AnyCondition(), startState);
+
+ leftNumIntegerState.addTransition(dashCondition, dashState);
+ leftNumIntegerState.addTransition(new AnyCondition(), startState);
+
+ leftNumRomanState.addTransition(dashCondition, dashState);
+ leftNumRomanState.addTransition(new AnyCondition(), startState);
+
+ leftNumTextState.addTransition(dashCondition, dashState);
+ leftNumTextState.addTransition(new AnyCondition(), startState);
+
+ dashState.addTransition(rightIntCondition, endState);
+ dashState.addTransition(rightRomanNumeralCondition, endState);
+ dashState.addTransition(rightNumTextCondition, endState);
+ dashState.addTransition(new AnyCondition(), startState);
+
+ endState.addTransition(new AnyCondition(), startState);
+
+ return m;
+ }
+
+ /**
+ * Executes the finite state machines.
+ *
+ * @param tokens
+ * @return Set of RangeToken objects.
+ * @throws Exception
+ */
+ public Set<RangeToken> execute(List<? extends BaseToken> tokens,
+ Set<? extends BaseToken> overrideSet) throws Exception {
+ Set<RangeToken> rangeSet = new HashSet<RangeToken>();
+
+ // maps a fsm to a token start index
+ // key = fsm , value = token start index
+ Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
+
+ Iterator<? extends BaseToken> overrideTokenItr = overrideSet.iterator();
+ // key = start offset, value = override BaseToken object
+ Map<Integer, BaseToken> overrideTokenMap = new HashMap<Integer, BaseToken>();
+ while (overrideTokenItr.hasNext()) {
+ BaseToken t = overrideTokenItr.next();
+ Integer key = new Integer(t.getStartOffset());
+ overrideTokenMap.put(key, t);
+ }
+
+ boolean overrideOn = false;
+ int overrideEndOffset = -1;
+ for (int i = 0; i < tokens.size(); i++) {
+ BaseToken token = tokens.get(i);
+
+ Integer key = new Integer(token.getStartOffset());
+
+ if (overrideOn) {
+ if (token.getStartOffset() >= overrideEndOffset) {
+ overrideOn = false;
+ overrideEndOffset = -1;
+ } else {
+ // step to next iteration of for loop
+ continue;
+ }
+ } else {
+ if (overrideTokenMap.containsKey(key)) {
+ // override one or more tokens until the override
+ // token is complete
+ token = overrideTokenMap.get(key);
+ overrideOn = true;
+ overrideEndOffset = token.getEndOffset();
+ }
+ }
+
+ Iterator<Machine> machineItr = iv_machineSet.iterator();
+ while (machineItr.hasNext()) {
+ Machine fsm = machineItr.next();
+
+ fsm.input(token);
+
+ State currentState = fsm.getCurrentState();
+ if (currentState.getStartStateFlag()) {
+ tokenStartMap.put(fsm, new Integer(i));
+ }
+ if (currentState.getEndStateFlag()) {
+ Object o = tokenStartMap.get(fsm);
+ int tokenStartIndex;
+ if (o == null) {
+ // By default, all machines start with
+ // token zero.
+ tokenStartIndex = 0;
+ } else {
+ tokenStartIndex = ((Integer) o).intValue();
+ // skip ahead over single token we don't want
+ tokenStartIndex++;
+ }
+ BaseToken startToken = tokens.get(tokenStartIndex);
+ BaseToken endToken = token;
+ RangeToken rangeToken = new RangeToken(startToken
+ .getStartOffset(), endToken.getEndOffset());
+ rangeSet.add(rangeToken);
+ fsm.reset();
+ }
+ }
+ }
+
+ // cleanup
+ tokenStartMap.clear();
+
+ // reset machines
+ Iterator<Machine> itr = iv_machineSet.iterator();
+ while (itr.hasNext()) {
+ Machine fsm = itr.next();
+ fsm.reset();
+ }
+
+ return rangeSet;
+ }
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RomanNumeralFSM.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RomanNumeralFSM.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RomanNumeralFSM.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RomanNumeralFSM.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,163 +14,163 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.fsm.machine;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import net.openai.util.fsm.AnyCondition;
-import net.openai.util.fsm.Condition;
-import net.openai.util.fsm.Machine;
-import net.openai.util.fsm.State;
-import edu.mayo.bmi.fsm.output.RomanNumeralToken;
-import edu.mayo.bmi.fsm.state.NamedState;
-import edu.mayo.bmi.fsm.token.BaseToken;
-import edu.mayo.bmi.fsm.token.WordToken;
-
-/**
- * Uses one or more finite state machines to detect roman numerals in the given
- * input of tokens.
- *
- * @author Mayo Clinic
- */
-public class RomanNumeralFSM {
-
- // contains the finite state machines
- private Set<Machine> iv_machineSet = new HashSet<Machine>();
-
- /**
- *
- * Constructor
- *
- */
- public RomanNumeralFSM() {
- iv_machineSet.add(getMachine());
- }
-
- /**
- * Gets a finite state machine that detects the following:
- * <ol>
- * <li>III</li>
- * <li>iii</li>
- * </ol>
- *
- * @return
- */
- private Machine getMachine() {
- State startState = new NamedState("START");
- State endState = new NamedState("END");
- endState.setEndStateFlag(true);
-
- Machine m = new Machine(startState);
-
- startState.addTransition(new RomanNumeralCondition(), endState);
- startState.addTransition(new AnyCondition(), startState);
-
- endState.addTransition(new AnyCondition(), startState);
-
- return m;
- }
-
- /**
- * Executes the finite state machines.
- *
- * @param tokens
- * @return Set of RomanNumeralToken objects.
- * @throws Exception
- */
- public Set<RomanNumeralToken> execute(List<? extends BaseToken> tokens)
- throws Exception {
- Set<RomanNumeralToken> romanNumeralSet = new HashSet<RomanNumeralToken>();
-
- // maps a fsm to a token start index
- // key = fsm , value = token start index
- Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
-
- for (int i = 0; i < tokens.size(); i++) {
- BaseToken token = tokens.get(i);
-
- Iterator<Machine> machineItr = iv_machineSet.iterator();
- while (machineItr.hasNext()) {
- Machine fsm = machineItr.next();
-
- fsm.input(token);
-
- State currentState = fsm.getCurrentState();
- if (currentState.getStartStateFlag()) {
- tokenStartMap.put(fsm, new Integer(i));
- }
- if (currentState.getEndStateFlag()) {
- Object o = tokenStartMap.get(fsm);
- int tokenStartIndex;
- if (o == null) {
- // By default, all machines start with
- // token zero.
- tokenStartIndex = 0;
- } else {
- tokenStartIndex = ((Integer) o).intValue();
- // skip ahead over single token we don't want
- tokenStartIndex++;
- }
-
- BaseToken startToken = tokens.get(tokenStartIndex);
- BaseToken endToken = token;
- RomanNumeralToken rnToken = new RomanNumeralToken(
- startToken.getStartOffset(), endToken
- .getEndOffset());
- romanNumeralSet.add(rnToken);
- fsm.reset();
- }
- }
- }
-
- // cleanup
- tokenStartMap.clear();
-
- // reset machines
- Iterator<Machine> itr = iv_machineSet.iterator();
- while (itr.hasNext()) {
- Machine fsm = itr.next();
- fsm.reset();
- }
-
- return romanNumeralSet;
- }
-
- @SuppressWarnings("serial")
- class RomanNumeralCondition extends Condition {
- public boolean satisfiedBy(Object conditional) {
- if (conditional instanceof WordToken) {
- WordToken wt = (WordToken) conditional;
- return isRomanNumeral(wt.getText());
- }
-
- return false;
- }
-
- /**
- * Validates whether the given string is a roman numeral.
- *
- * @param str
- * @return
- */
- private boolean isRomanNumeral(String str) {
- str = str.toUpperCase();
- for (int i = 0; i < str.length(); i++) {
- char currentChar = str.charAt(i);
- if ((currentChar != 'I') && (currentChar != 'V')
- && (currentChar != 'X') && (currentChar != 'L')
- && (currentChar != 'C') && (currentChar != 'D')
- && (currentChar != 'M')) {
- return false;
- }
- }
- return true;
- }
- }
-
-}
+package edu.mayo.bmi.fsm.machine;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import net.openai.util.fsm.AnyCondition;
+import net.openai.util.fsm.Condition;
+import net.openai.util.fsm.Machine;
+import net.openai.util.fsm.State;
+import edu.mayo.bmi.fsm.output.RomanNumeralToken;
+import edu.mayo.bmi.fsm.state.NamedState;
+import edu.mayo.bmi.fsm.token.BaseToken;
+import edu.mayo.bmi.fsm.token.WordToken;
+
+/**
+ * Uses one or more finite state machines to detect roman numerals in the given
+ * input of tokens.
+ *
+ * @author Mayo Clinic
+ */
+public class RomanNumeralFSM {
+
+ // contains the finite state machines
+ private Set<Machine> iv_machineSet = new HashSet<Machine>();
+
+ /**
+ *
+ * Constructor
+ *
+ */
+ public RomanNumeralFSM() {
+ iv_machineSet.add(getMachine());
+ }
+
+ /**
+ * Gets a finite state machine that detects the following:
+ * <ol>
+ * <li>III</li>
+ * <li>iii</li>
+ * </ol>
+ *
+ * @return
+ */
+ private Machine getMachine() {
+ State startState = new NamedState("START");
+ State endState = new NamedState("END");
+ endState.setEndStateFlag(true);
+
+ Machine m = new Machine(startState);
+
+ startState.addTransition(new RomanNumeralCondition(), endState);
+ startState.addTransition(new AnyCondition(), startState);
+
+ endState.addTransition(new AnyCondition(), startState);
+
+ return m;
+ }
+
+ /**
+ * Executes the finite state machines.
+ *
+ * @param tokens
+ * @return Set of RomanNumeralToken objects.
+ * @throws Exception
+ */
+ public Set<RomanNumeralToken> execute(List<? extends BaseToken> tokens)
+ throws Exception {
+ Set<RomanNumeralToken> romanNumeralSet = new HashSet<RomanNumeralToken>();
+
+ // maps a fsm to a token start index
+ // key = fsm , value = token start index
+ Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
+
+ for (int i = 0; i < tokens.size(); i++) {
+ BaseToken token = tokens.get(i);
+
+ Iterator<Machine> machineItr = iv_machineSet.iterator();
+ while (machineItr.hasNext()) {
+ Machine fsm = machineItr.next();
+
+ fsm.input(token);
+
+ State currentState = fsm.getCurrentState();
+ if (currentState.getStartStateFlag()) {
+ tokenStartMap.put(fsm, new Integer(i));
+ }
+ if (currentState.getEndStateFlag()) {
+ Object o = tokenStartMap.get(fsm);
+ int tokenStartIndex;
+ if (o == null) {
+ // By default, all machines start with
+ // token zero.
+ tokenStartIndex = 0;
+ } else {
+ tokenStartIndex = ((Integer) o).intValue();
+ // skip ahead over single token we don't want
+ tokenStartIndex++;
+ }
+
+ BaseToken startToken = tokens.get(tokenStartIndex);
+ BaseToken endToken = token;
+ RomanNumeralToken rnToken = new RomanNumeralToken(
+ startToken.getStartOffset(), endToken
+ .getEndOffset());
+ romanNumeralSet.add(rnToken);
+ fsm.reset();
+ }
+ }
+ }
+
+ // cleanup
+ tokenStartMap.clear();
+
+ // reset machines
+ Iterator<Machine> itr = iv_machineSet.iterator();
+ while (itr.hasNext()) {
+ Machine fsm = itr.next();
+ fsm.reset();
+ }
+
+ return romanNumeralSet;
+ }
+
+ @SuppressWarnings("serial")
+ class RomanNumeralCondition extends Condition {
+ public boolean satisfiedBy(Object conditional) {
+ if (conditional instanceof WordToken) {
+ WordToken wt = (WordToken) conditional;
+ return isRomanNumeral(wt.getText());
+ }
+
+ return false;
+ }
+
+ /**
+ * Validates whether the given string is a roman numeral.
+ *
+ * @param str
+ * @return
+ */
+ private boolean isRomanNumeral(String str) {
+ str = str.toUpperCase();
+ for (int i = 0; i < str.length(); i++) {
+ char currentChar = str.charAt(i);
+ if ((currentChar != 'I') && (currentChar != 'V')
+ && (currentChar != 'X') && (currentChar != 'L')
+ && (currentChar != 'C') && (currentChar != 'D')
+ && (currentChar != 'M')) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+
+}