You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2012/10/31 06:26:55 UTC

svn commit: r1403989 [9/28] - in /incubator/ctakes/branches/SHARPn-cTAKES: Constituency Parser/src/org/chboston/cnlp/ctakes/parser/ Constituency Parser/src/org/chboston/cnlp/ctakes/parser/uima/ae/ Constituency Parser/src/org/chboston/cnlp/ctakes/parser...

Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/MeasurementFSM.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/MeasurementFSM.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/MeasurementFSM.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/MeasurementFSM.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
 /*
- * Copyright: (c) 2009   Mayo Foundation for Medical Education and 
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify 
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0 
  * 
  * Unless required by applicable law or agreed to in writing, software
@@ -21,307 +14,307 @@
  * See the License for the specific language governing permissions and 
  * limitations under the License. 
  */
-package edu.mayo.bmi.fsm.machine;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import net.openai.util.fsm.AnyCondition;
-import net.openai.util.fsm.Condition;
-import net.openai.util.fsm.Machine;
-import net.openai.util.fsm.State;
-import edu.mayo.bmi.fsm.condition.IntegerRangeCondition;
-import edu.mayo.bmi.fsm.condition.NumberCondition;
-import edu.mayo.bmi.fsm.condition.PunctuationValueCondition;
-import edu.mayo.bmi.fsm.condition.RangeCondition;
-import edu.mayo.bmi.fsm.condition.WordSetCondition;
-import edu.mayo.bmi.fsm.output.MeasurementToken;
-import edu.mayo.bmi.fsm.state.NamedState;
-import edu.mayo.bmi.fsm.token.BaseToken;
-
-/**
- * Uses one or more finite state machines to detect measurements in the given
- * input of tokens.
- * 
- * @author Mayo Clinic
- */
-public class MeasurementFSM {
-	// text fractions
-	Set<String> iv_fullTextSet = new HashSet<String>();
-	Set<String> iv_shortTextSet = new HashSet<String>();
-	Set<String> iv_textNumberSet = new HashSet<String>();
-
-	// contains the finite state machines
-	private Set<Machine> iv_machineSet = new HashSet<Machine>();
-
-	/**
-	 * 
-	 * Constructor
-	 * 
-	 */
-	public MeasurementFSM() {
-		iv_fullTextSet.add("gallon");
-		iv_fullTextSet.add("gallons");
-		iv_fullTextSet.add("pint");
-		iv_fullTextSet.add("pints");
-		iv_fullTextSet.add("ounce");
-		iv_fullTextSet.add("ounces");
-		iv_fullTextSet.add("pound");
-		iv_fullTextSet.add("pounds");
-		iv_fullTextSet.add("drop");
-		iv_fullTextSet.add("drops");
-		iv_fullTextSet.add("hour");
-		iv_fullTextSet.add("hours");
-		iv_fullTextSet.add("minute");
-		iv_fullTextSet.add("minutes");
-		iv_fullTextSet.add("second");
-		iv_fullTextSet.add("seconds");
-		iv_fullTextSet.add("foot");
-		iv_fullTextSet.add("feet");
-		iv_fullTextSet.add("grain");
-		iv_fullTextSet.add("grains");
-		iv_fullTextSet.add("teaspoon");
-		iv_fullTextSet.add("teaspoons");
-		iv_fullTextSet.add("tablespoon");
-		iv_fullTextSet.add("tablespoons");
-		iv_fullTextSet.add("kilogram");
-		iv_fullTextSet.add("kilograms");
-		iv_fullTextSet.add("gram");
-		iv_fullTextSet.add("grams");
-		iv_fullTextSet.add("centigram");
-		iv_fullTextSet.add("centigrams");
-		iv_fullTextSet.add("milligram");
-		iv_fullTextSet.add("milligrams");
-		iv_fullTextSet.add("liter");
-		iv_fullTextSet.add("liters");
-		iv_fullTextSet.add("centiliter");
-		iv_fullTextSet.add("centiliters");
-		iv_fullTextSet.add("milliliter");
-		iv_fullTextSet.add("milliliters");
-		iv_fullTextSet.add("meter");
-		iv_fullTextSet.add("meters");
-		iv_fullTextSet.add("centimeter");
-		iv_fullTextSet.add("centimeters");
-		iv_fullTextSet.add("millimeter");
-		iv_fullTextSet.add("millimeters");
-
-		iv_shortTextSet.add("gal");
-		iv_shortTextSet.add("gals");
-		iv_shortTextSet.add("pt");
-		iv_shortTextSet.add("pts");
-		iv_shortTextSet.add("oz");
-		iv_shortTextSet.add("ozs");
-		iv_shortTextSet.add("lb");
-		iv_shortTextSet.add("lbs");
-		iv_shortTextSet.add("gtts");
-		iv_shortTextSet.add("hr");
-		iv_shortTextSet.add("min");
-		iv_shortTextSet.add("sec");
-		iv_shortTextSet.add("ft");
-		iv_shortTextSet.add("gr");
-		iv_shortTextSet.add("tsp");
-		iv_shortTextSet.add("tbsp");
-		iv_shortTextSet.add("g");
-		iv_shortTextSet.add("kg");
-		iv_shortTextSet.add("mg");
-		iv_shortTextSet.add("l");
-		iv_shortTextSet.add("cl");
-		iv_shortTextSet.add("ml");
-		iv_shortTextSet.add("m");
-		iv_shortTextSet.add("cm");
-		iv_shortTextSet.add("mm");
-		iv_shortTextSet.add("cc");
-
-		iv_textNumberSet.add("one");
-		iv_textNumberSet.add("two");
-		iv_textNumberSet.add("three");
-		iv_textNumberSet.add("four");
-		iv_textNumberSet.add("five");
-		iv_textNumberSet.add("six");
-		iv_textNumberSet.add("seven");
-		iv_textNumberSet.add("eight");
-		iv_textNumberSet.add("nine");
-		iv_textNumberSet.add("ten");
-
-		iv_machineSet.add(getBloodPressureMachine());
-		iv_machineSet.add(getSubstanceQuantityMachine());
-	}
-
-	/**
-	 * Gets a finite state machine that detects the following:
-	 * <ol>
-	 * <li>110/80</li>
-	 * </ol>
-	 * 
-	 * @return
-	 */
-	private Machine getBloodPressureMachine() {
-		State startState = new NamedState("START");
-		State endState = new NamedState("END");
-		endState.setEndStateFlag(true);
-
-		Machine m = new Machine(startState);
-		State systolicState = new NamedState("SYSTOLIC");
-		State fslashState = new NamedState("FSLASH");
-
-		Condition systolicCondition = new IntegerRangeCondition(80, 200);
-		Condition diastolicCondition = new IntegerRangeCondition(60, 160);
-		Condition fslashCondition = new PunctuationValueCondition('/');
-
-		startState.addTransition(systolicCondition, systolicState);
-		startState.addTransition(new AnyCondition(), startState);
-
-		systolicState.addTransition(fslashCondition, fslashState);
-		systolicState.addTransition(new AnyCondition(), startState);
-
-		fslashState.addTransition(diastolicCondition, endState);
-		fslashState.addTransition(new AnyCondition(), startState);
-
-		endState.addTransition(new AnyCondition(), startState);
-
-		return m;
-	}
-
-	/**
-	 * Gets a finite state machine that detects the following:
-	 * <ol>
-	 * <li>one teaspoon</li>
-	 * <li>one tsp</li>
-	 * <li>1 teaspoon</li>
-	 * <li>1 tsp</li>
-	 * <li>0.5 tsp</li>
-	 * <li>1-5 teaspoons</li>
-	 * </ol>
-	 * 
-	 * @return
-	 */
-	private Machine getSubstanceQuantityMachine() {
-		State startState = new NamedState("START");
-		State endState = new NamedState("END");
-		endState.setEndStateFlag(true);
-
-		Machine m = new Machine(startState);
-		State quanitityState = new NamedState("QUANITITY");
-
-		Condition numberCondition = new NumberCondition();
-		Condition numberTextCondition = new WordSetCondition(iv_textNumberSet,
-				false);
-		Condition rangeCondition = new RangeCondition();
-		Condition fullTextCondition = new WordSetCondition(iv_fullTextSet,
-				false);
-		Condition shortTextCondition = new WordSetCondition(iv_shortTextSet,
-				false);
-
-		startState.addTransition(numberCondition, quanitityState);
-		startState.addTransition(rangeCondition, quanitityState);
-		startState.addTransition(numberTextCondition, quanitityState);
-		startState.addTransition(new AnyCondition(), startState);
-
-		quanitityState.addTransition(fullTextCondition, endState);
-		quanitityState.addTransition(shortTextCondition, endState);
-		quanitityState.addTransition(new AnyCondition(), startState);
-
-		endState.addTransition(new AnyCondition(), startState);
-
-		return m;
-	}
-
-	/**
-	 * Executes the finite state machines.
-	 * 
-	 * @param tokens
-	 * @return Set of RangeToken objects.
-	 * @throws Exception
-	 */
-	public Set<MeasurementToken> execute(List<? extends BaseToken> tokens,
-			Set<? extends BaseToken> overrideSet) throws Exception {
-		Set<MeasurementToken> measurementSet = new HashSet<MeasurementToken>();
-
-		// maps a fsm to a token start index
-		// key = fsm , value = token start index
-		Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
-
-		Iterator<? extends BaseToken> overrideTokenItr = overrideSet.iterator();
-		// key = start offset, value = override BaseToken object
-		Map<Integer, BaseToken> overrideTokenMap = new HashMap<Integer, BaseToken>();
-		while (overrideTokenItr.hasNext()) {
-			BaseToken t = overrideTokenItr.next();
-			Integer key = new Integer(t.getStartOffset());
-			overrideTokenMap.put(key, t);
-		}
-
-		boolean overrideOn = false;
-		int overrideEndOffset = -1;
-		for (int i = 0; i < tokens.size(); i++) {
-			BaseToken token = tokens.get(i);
-
-			Integer key = new Integer(token.getStartOffset());
-
-			if (overrideOn) {
-				if (token.getStartOffset() >= overrideEndOffset) {
-					overrideOn = false;
-					overrideEndOffset = -1;
-				} else {
-					// step to next iteration of for loop
-					continue;
-				}
-			} else {
-				if (overrideTokenMap.containsKey(key)) {
-					// override one or more tokens until the override
-					// token is complete
-					token = overrideTokenMap.get(key);
-					overrideOn = true;
-					overrideEndOffset = token.getEndOffset();
-				}
-			}
-
-			Iterator<Machine> machineItr = iv_machineSet.iterator();
-			while (machineItr.hasNext()) {
-				Machine fsm = machineItr.next();
-
-				fsm.input(token);
-
-				State currentState = fsm.getCurrentState();
-				if (currentState.getStartStateFlag()) {
-					tokenStartMap.put(fsm, new Integer(i));
-				}
-				if (currentState.getEndStateFlag()) {
-					Object o = tokenStartMap.get(fsm);
-					int tokenStartIndex;
-					if (o == null) {
-						// By default, all machines start with
-						// token zero.
-						tokenStartIndex = 0;
-					} else {
-						tokenStartIndex = ((Integer) o).intValue();
-						// skip ahead over single token we don't want
-						tokenStartIndex++;
-					}
-					BaseToken startToken = tokens.get(tokenStartIndex);
-					BaseToken endToken = token;
-					MeasurementToken measurementToken = new MeasurementToken(
-							startToken.getStartOffset(), endToken
-									.getEndOffset());
-					measurementSet.add(measurementToken);
-					fsm.reset();
-				}
-			}
-		}
-
-		// cleanup
-		tokenStartMap.clear();
-
-		// reset machines
-		Iterator<Machine> itr = iv_machineSet.iterator();
-		while (itr.hasNext()) {
-			Machine fsm = itr.next();
-			fsm.reset();
-		}
-
-		return measurementSet;
-	}
-}
+package edu.mayo.bmi.fsm.machine;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import net.openai.util.fsm.AnyCondition;
+import net.openai.util.fsm.Condition;
+import net.openai.util.fsm.Machine;
+import net.openai.util.fsm.State;
+import edu.mayo.bmi.fsm.condition.IntegerRangeCondition;
+import edu.mayo.bmi.fsm.condition.NumberCondition;
+import edu.mayo.bmi.fsm.condition.PunctuationValueCondition;
+import edu.mayo.bmi.fsm.condition.RangeCondition;
+import edu.mayo.bmi.fsm.condition.WordSetCondition;
+import edu.mayo.bmi.fsm.output.MeasurementToken;
+import edu.mayo.bmi.fsm.state.NamedState;
+import edu.mayo.bmi.fsm.token.BaseToken;
+
+/**
+ * Uses one or more finite state machines to detect measurements in the given
+ * input of tokens.
+ * 
+ * @author Mayo Clinic
+ */
+public class MeasurementFSM {
+	// text fractions
+	Set<String> iv_fullTextSet = new HashSet<String>();
+	Set<String> iv_shortTextSet = new HashSet<String>();
+	Set<String> iv_textNumberSet = new HashSet<String>();
+
+	// contains the finite state machines
+	private Set<Machine> iv_machineSet = new HashSet<Machine>();
+
+	/**
+	 * 
+	 * Constructor
+	 * 
+	 */
+	public MeasurementFSM() {
+		iv_fullTextSet.add("gallon");
+		iv_fullTextSet.add("gallons");
+		iv_fullTextSet.add("pint");
+		iv_fullTextSet.add("pints");
+		iv_fullTextSet.add("ounce");
+		iv_fullTextSet.add("ounces");
+		iv_fullTextSet.add("pound");
+		iv_fullTextSet.add("pounds");
+		iv_fullTextSet.add("drop");
+		iv_fullTextSet.add("drops");
+		iv_fullTextSet.add("hour");
+		iv_fullTextSet.add("hours");
+		iv_fullTextSet.add("minute");
+		iv_fullTextSet.add("minutes");
+		iv_fullTextSet.add("second");
+		iv_fullTextSet.add("seconds");
+		iv_fullTextSet.add("foot");
+		iv_fullTextSet.add("feet");
+		iv_fullTextSet.add("grain");
+		iv_fullTextSet.add("grains");
+		iv_fullTextSet.add("teaspoon");
+		iv_fullTextSet.add("teaspoons");
+		iv_fullTextSet.add("tablespoon");
+		iv_fullTextSet.add("tablespoons");
+		iv_fullTextSet.add("kilogram");
+		iv_fullTextSet.add("kilograms");
+		iv_fullTextSet.add("gram");
+		iv_fullTextSet.add("grams");
+		iv_fullTextSet.add("centigram");
+		iv_fullTextSet.add("centigrams");
+		iv_fullTextSet.add("milligram");
+		iv_fullTextSet.add("milligrams");
+		iv_fullTextSet.add("liter");
+		iv_fullTextSet.add("liters");
+		iv_fullTextSet.add("centiliter");
+		iv_fullTextSet.add("centiliters");
+		iv_fullTextSet.add("milliliter");
+		iv_fullTextSet.add("milliliters");
+		iv_fullTextSet.add("meter");
+		iv_fullTextSet.add("meters");
+		iv_fullTextSet.add("centimeter");
+		iv_fullTextSet.add("centimeters");
+		iv_fullTextSet.add("millimeter");
+		iv_fullTextSet.add("millimeters");
+
+		iv_shortTextSet.add("gal");
+		iv_shortTextSet.add("gals");
+		iv_shortTextSet.add("pt");
+		iv_shortTextSet.add("pts");
+		iv_shortTextSet.add("oz");
+		iv_shortTextSet.add("ozs");
+		iv_shortTextSet.add("lb");
+		iv_shortTextSet.add("lbs");
+		iv_shortTextSet.add("gtts");
+		iv_shortTextSet.add("hr");
+		iv_shortTextSet.add("min");
+		iv_shortTextSet.add("sec");
+		iv_shortTextSet.add("ft");
+		iv_shortTextSet.add("gr");
+		iv_shortTextSet.add("tsp");
+		iv_shortTextSet.add("tbsp");
+		iv_shortTextSet.add("g");
+		iv_shortTextSet.add("kg");
+		iv_shortTextSet.add("mg");
+		iv_shortTextSet.add("l");
+		iv_shortTextSet.add("cl");
+		iv_shortTextSet.add("ml");
+		iv_shortTextSet.add("m");
+		iv_shortTextSet.add("cm");
+		iv_shortTextSet.add("mm");
+		iv_shortTextSet.add("cc");
+
+		iv_textNumberSet.add("one");
+		iv_textNumberSet.add("two");
+		iv_textNumberSet.add("three");
+		iv_textNumberSet.add("four");
+		iv_textNumberSet.add("five");
+		iv_textNumberSet.add("six");
+		iv_textNumberSet.add("seven");
+		iv_textNumberSet.add("eight");
+		iv_textNumberSet.add("nine");
+		iv_textNumberSet.add("ten");
+
+		iv_machineSet.add(getBloodPressureMachine());
+		iv_machineSet.add(getSubstanceQuantityMachine());
+	}
+
+	/**
+	 * Gets a finite state machine that detects the following:
+	 * <ol>
+	 * <li>110/80</li>
+	 * </ol>
+	 * 
+	 * @return
+	 */
+	private Machine getBloodPressureMachine() {
+		State startState = new NamedState("START");
+		State endState = new NamedState("END");
+		endState.setEndStateFlag(true);
+
+		Machine m = new Machine(startState);
+		State systolicState = new NamedState("SYSTOLIC");
+		State fslashState = new NamedState("FSLASH");
+
+		Condition systolicCondition = new IntegerRangeCondition(80, 200);
+		Condition diastolicCondition = new IntegerRangeCondition(60, 160);
+		Condition fslashCondition = new PunctuationValueCondition('/');
+
+		startState.addTransition(systolicCondition, systolicState);
+		startState.addTransition(new AnyCondition(), startState);
+
+		systolicState.addTransition(fslashCondition, fslashState);
+		systolicState.addTransition(new AnyCondition(), startState);
+
+		fslashState.addTransition(diastolicCondition, endState);
+		fslashState.addTransition(new AnyCondition(), startState);
+
+		endState.addTransition(new AnyCondition(), startState);
+
+		return m;
+	}
+
+	/**
+	 * Gets a finite state machine that detects the following:
+	 * <ol>
+	 * <li>one teaspoon</li>
+	 * <li>one tsp</li>
+	 * <li>1 teaspoon</li>
+	 * <li>1 tsp</li>
+	 * <li>0.5 tsp</li>
+	 * <li>1-5 teaspoons</li>
+	 * </ol>
+	 * 
+	 * @return
+	 */
+	private Machine getSubstanceQuantityMachine() {
+		State startState = new NamedState("START");
+		State endState = new NamedState("END");
+		endState.setEndStateFlag(true);
+
+		Machine m = new Machine(startState);
+		State quanitityState = new NamedState("QUANITITY");
+
+		Condition numberCondition = new NumberCondition();
+		Condition numberTextCondition = new WordSetCondition(iv_textNumberSet,
+				false);
+		Condition rangeCondition = new RangeCondition();
+		Condition fullTextCondition = new WordSetCondition(iv_fullTextSet,
+				false);
+		Condition shortTextCondition = new WordSetCondition(iv_shortTextSet,
+				false);
+
+		startState.addTransition(numberCondition, quanitityState);
+		startState.addTransition(rangeCondition, quanitityState);
+		startState.addTransition(numberTextCondition, quanitityState);
+		startState.addTransition(new AnyCondition(), startState);
+
+		quanitityState.addTransition(fullTextCondition, endState);
+		quanitityState.addTransition(shortTextCondition, endState);
+		quanitityState.addTransition(new AnyCondition(), startState);
+
+		endState.addTransition(new AnyCondition(), startState);
+
+		return m;
+	}
+
+	/**
+	 * Executes the finite state machines.
+	 * 
+	 * @param tokens
+	 * @return Set of RangeToken objects.
+	 * @throws Exception
+	 */
+	public Set<MeasurementToken> execute(List<? extends BaseToken> tokens,
+			Set<? extends BaseToken> overrideSet) throws Exception {
+		Set<MeasurementToken> measurementSet = new HashSet<MeasurementToken>();
+
+		// maps a fsm to a token start index
+		// key = fsm , value = token start index
+		Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
+
+		Iterator<? extends BaseToken> overrideTokenItr = overrideSet.iterator();
+		// key = start offset, value = override BaseToken object
+		Map<Integer, BaseToken> overrideTokenMap = new HashMap<Integer, BaseToken>();
+		while (overrideTokenItr.hasNext()) {
+			BaseToken t = overrideTokenItr.next();
+			Integer key = new Integer(t.getStartOffset());
+			overrideTokenMap.put(key, t);
+		}
+
+		boolean overrideOn = false;
+		int overrideEndOffset = -1;
+		for (int i = 0; i < tokens.size(); i++) {
+			BaseToken token = tokens.get(i);
+
+			Integer key = new Integer(token.getStartOffset());
+
+			if (overrideOn) {
+				if (token.getStartOffset() >= overrideEndOffset) {
+					overrideOn = false;
+					overrideEndOffset = -1;
+				} else {
+					// step to next iteration of for loop
+					continue;
+				}
+			} else {
+				if (overrideTokenMap.containsKey(key)) {
+					// override one or more tokens until the override
+					// token is complete
+					token = overrideTokenMap.get(key);
+					overrideOn = true;
+					overrideEndOffset = token.getEndOffset();
+				}
+			}
+
+			Iterator<Machine> machineItr = iv_machineSet.iterator();
+			while (machineItr.hasNext()) {
+				Machine fsm = machineItr.next();
+
+				fsm.input(token);
+
+				State currentState = fsm.getCurrentState();
+				if (currentState.getStartStateFlag()) {
+					tokenStartMap.put(fsm, new Integer(i));
+				}
+				if (currentState.getEndStateFlag()) {
+					Object o = tokenStartMap.get(fsm);
+					int tokenStartIndex;
+					if (o == null) {
+						// By default, all machines start with
+						// token zero.
+						tokenStartIndex = 0;
+					} else {
+						tokenStartIndex = ((Integer) o).intValue();
+						// skip ahead over single token we don't want
+						tokenStartIndex++;
+					}
+					BaseToken startToken = tokens.get(tokenStartIndex);
+					BaseToken endToken = token;
+					MeasurementToken measurementToken = new MeasurementToken(
+							startToken.getStartOffset(), endToken
+									.getEndOffset());
+					measurementSet.add(measurementToken);
+					fsm.reset();
+				}
+			}
+		}
+
+		// cleanup
+		tokenStartMap.clear();
+
+		// reset machines
+		Iterator<Machine> itr = iv_machineSet.iterator();
+		while (itr.hasNext()) {
+			Machine fsm = itr.next();
+			fsm.reset();
+		}
+
+		return measurementSet;
+	}
+}

Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/NegationFSM.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/NegationFSM.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/NegationFSM.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/NegationFSM.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
 /*
- * Copyright: (c) 2009   Mayo Foundation for Medical Education and 
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify 
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0 
  * 
  * Unless required by applicable law or agreed to in writing, software
@@ -21,415 +14,415 @@
  * See the License for the specific language governing permissions and 
  * limitations under the License. 
  */
-package edu.mayo.bmi.fsm.machine;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import net.openai.util.fsm.AnyCondition;
-import net.openai.util.fsm.Condition;
-import net.openai.util.fsm.Machine;
-import net.openai.util.fsm.State;
-import edu.mayo.bmi.fsm.condition.DisjoinCondition;
-import edu.mayo.bmi.fsm.condition.NegateCondition;
-import edu.mayo.bmi.fsm.condition.TextSetCondition;
-import edu.mayo.bmi.fsm.output.NegationIndicator;
-import edu.mayo.bmi.fsm.state.NamedState;
-import edu.mayo.bmi.fsm.state.NonTerminalEndState;
-import edu.mayo.bmi.fsm.token.BaseToken;
-
-/**
- * Uses one or more finite state machines to detect dates in the given input of
- * tokens.
- * 
- * @author Mayo Clinic
- */
-public class NegationFSM {
-
-	// regular modal verb
-	private Set<String> iv_modalVerbsSet = new HashSet<String>();
-	// negative particle
-	private Set<String> iv_negParticlesSet = new HashSet<String>();
-	// regular verbs requiring negation particle
-	private Set<String> iv_regVerbsSet = new HashSet<String>();
-	// neagive verbs that contain negation in them
-	private Set<String> iv_negVerbsSet = new HashSet<String>();
-	// negation preposition
-	private Set<String> iv_negPrepositionsSet = new HashSet<String>();
-	// negatively charged determiners
-	private Set<String> iv_negDeterminersSet = new HashSet<String>();
-	// regular nouns - indicators
-	private Set<String> iv_regNounsSet = new HashSet<String>();
-	// regular prepositions
-	private Set<String> iv_regPrepositionsSet = new HashSet<String>();
-	// negative adjectives
-	private Set<String> iv_negAdjectivesSet = new HashSet<String>();
-	// negative collocations
-	private Set<String> iv_negCollocSet = new HashSet<String>();
-	// NEGATIVE COLLOCATION PARTICLE
-	private Set<String> iv_negColPartSet = new HashSet<String>();
-
-	// contains the finite state machines
-	private Set<Machine> iv_machineSet = new HashSet<Machine>();
-
-	/**
-	 * 
-	 * Constructor
-	 * 
-	 */
-	public NegationFSM() {
-		iv_modalVerbsSet.add("can");
-		iv_modalVerbsSet.add("ca");
-		iv_modalVerbsSet.add("will");
-		iv_modalVerbsSet.add("must");
-		iv_modalVerbsSet.add("could");
-		iv_modalVerbsSet.add("would");
-		iv_modalVerbsSet.add("should");
-		iv_modalVerbsSet.add("shall");
-		iv_modalVerbsSet.add("did");
-
-		iv_negParticlesSet.add("not");
-		iv_negColPartSet.add("out");
-		iv_negParticlesSet.add("n't");
-		iv_negParticlesSet.add("'t");
-
-		iv_negCollocSet.add("rule");
-		iv_negCollocSet.add("rules");
-		iv_negCollocSet.add("ruled");
-		iv_negCollocSet.add("ruling");
-		iv_negCollocSet.add("rule-out");
-
-		iv_regVerbsSet.add("reveal");
-		iv_regVerbsSet.add("reveals");
-		iv_regVerbsSet.add("revealed");
-		iv_regVerbsSet.add("revealing");
-		iv_regVerbsSet.add("have");
-		iv_regVerbsSet.add("had");
-		iv_regVerbsSet.add("has");
-		iv_regVerbsSet.add("feel");
-		iv_regVerbsSet.add("feels");
-		iv_regVerbsSet.add("felt");
-		iv_regVerbsSet.add("feeling");
-		iv_regVerbsSet.add("complain");
-		iv_regVerbsSet.add("complains");
-		iv_regVerbsSet.add("complained");
-		iv_regVerbsSet.add("complaining");
-		iv_regVerbsSet.add("demonstrate");
-		iv_regVerbsSet.add("demonstrates");
-		iv_regVerbsSet.add("demonstrated");
-		iv_regVerbsSet.add("demonstrating");
-		iv_regVerbsSet.add("appear");
-		iv_regVerbsSet.add("appears");
-		iv_regVerbsSet.add("appeared");
-		iv_regVerbsSet.add("appearing");
-		iv_regVerbsSet.add("caused");
-		iv_regVerbsSet.add("cause");
-		iv_regVerbsSet.add("causing");
-		iv_regVerbsSet.add("causes");
-		iv_regVerbsSet.add("find");
-		iv_regVerbsSet.add("finds");
-		iv_regVerbsSet.add("found");
-		iv_regVerbsSet.add("discover");
-		iv_regVerbsSet.add("discovered");
-		iv_regVerbsSet.add("discovers");
-
-		iv_negVerbsSet.add("deny");
-		iv_negVerbsSet.add("denies");
-		iv_negVerbsSet.add("denied");
-		iv_negVerbsSet.add("denying");
-		iv_negVerbsSet.add("fail");
-		iv_negVerbsSet.add("fails");
-		iv_negVerbsSet.add("failed");
-		iv_negVerbsSet.add("failing");
-		iv_negVerbsSet.add("decline");
-		iv_negVerbsSet.add("declines");
-		iv_negVerbsSet.add("declined");
-		iv_negVerbsSet.add("declining");
-		iv_negVerbsSet.add("exclude");
-		iv_negVerbsSet.add("excludes");
-		iv_negVerbsSet.add("excluding");
-		iv_negVerbsSet.add("excluded");
-
-		iv_negPrepositionsSet.add("without");
-		iv_negPrepositionsSet.add("absent");
-		iv_negPrepositionsSet.add("none");
-
-		iv_negDeterminersSet.add("no");
-		iv_negDeterminersSet.add("any");
-		iv_negDeterminersSet.add("neither");
-		iv_negDeterminersSet.add("nor");
-		iv_negDeterminersSet.add("never");
-
-		iv_regNounsSet.add("evidence");
-		iv_regNounsSet.add("indication");
-		iv_regNounsSet.add("indications");
-		iv_regNounsSet.add("sign");
-		iv_regNounsSet.add("signs");
-		iv_regNounsSet.add("symptoms");
-		iv_regNounsSet.add("symptom");
-		iv_regNounsSet.add("sx");
-		iv_regNounsSet.add("dx");
-		iv_regNounsSet.add("diagnosis");
-		iv_regNounsSet.add("history");
-		iv_regNounsSet.add("hx");
-		iv_regNounsSet.add("findings");
-
-		iv_regPrepositionsSet.add("of");
-		iv_regPrepositionsSet.add("in");
-		iv_regPrepositionsSet.add("for");
-		iv_regPrepositionsSet.add("with");
-
-		iv_negAdjectivesSet.add("unremarkable");
-		iv_negAdjectivesSet.add("unlikely");
-		iv_negAdjectivesSet.add("negative");
-
-		iv_machineSet.add(getAspectualNegIndicatorMachine());
-		iv_machineSet.add(getNominalNegIndicatorMachine());
-		iv_machineSet.add(getAdjNegIndicatorMachine());
-
-	}
-
-	private Machine getAspectualNegIndicatorMachine() {
-		State startState = new NamedState("START");
-		State endState = new NamedState("END");
-		State anyState = new NamedState("ANY");
-
-		State ntEndState = new NonTerminalEndState("NON TERMINAL END");
-		endState.setEndStateFlag(true);
-		ntEndState.setEndStateFlag(true);
-
-		Machine m = new Machine(startState);
-		State regModalState = new NamedState("REG_MODAL");
-		State negPartState = new NamedState("NEG_PART");
-		State negVerbState = new NamedState("NEG_VERB");
-		State negCollocState = new NamedState("NEG_COLLOC");
-		State negColPartState = new NamedState("NEG_COLPART");
-
-		Condition regModalC = new TextSetCondition(iv_modalVerbsSet, false);
-		Condition negPartC = new TextSetCondition(iv_negParticlesSet, false);
-		Condition regVerbC = new TextSetCondition(iv_regVerbsSet, false);
-		Condition negVerbC = new TextSetCondition(iv_negVerbsSet, false);
-		Condition negDetC = new TextSetCondition(iv_negDeterminersSet, false);
-		Condition negCollocC = new TextSetCondition(iv_negCollocSet, false);
-		Condition negColPartC = new TextSetCondition(iv_negColPartSet, false);
-
-		Condition notCollocC = new NegateCondition(negCollocC);
-
-		startState.addTransition(negVerbC, negVerbState);
-		startState.addTransition(negCollocC, negCollocState); // rule
-
-		startState.addTransition(new DisjoinCondition(regModalC, regVerbC),
-				regModalState); // start with a modal
-		startState.addTransition(new DisjoinCondition(negPartC, negDetC),
-				negPartState);
-
-		startState.addTransition(new AnyCondition(), startState);
-
-		regModalState.addTransition(negCollocC, negCollocState);
-		negCollocState.addTransition(negColPartC, negColPartState); // out
-		negColPartState.addTransition(new AnyCondition(), ntEndState);
-		negCollocState.addTransition(new AnyCondition(), startState);
-
-		regModalState.addTransition(new DisjoinCondition(negPartC, negDetC),
-				negPartState);
-		regModalState.addTransition(new AnyCondition(), anyState);
-
-		anyState.addTransition(new DisjoinCondition(negPartC, negDetC),
-				negPartState);
-		anyState.addTransition(new AnyCondition(), startState);
-
-		negPartState.addTransition(notCollocC, ntEndState);
-		negVerbState.addTransition(notCollocC, ntEndState);
-		negPartState.addTransition(new AnyCondition(), startState);
-		negVerbState.addTransition(new AnyCondition(), startState);
-
-		negPartState.addTransition(new AnyCondition(), ntEndState);
-		negVerbState.addTransition(new AnyCondition(), ntEndState);
-
-		ntEndState.addTransition(new AnyCondition(), endState);
-
-		return m;
-	}
-
-	
-	/**
-	 *  should recognize:
-	 *  <ul><li>A</li>
-	 *  	<li>B</li>
-	 *  	<li>B C</li>
-	 *  	<li>B D* C</li>
-	 *  </ul>
-	 *  <p>where A is one of
-	 *  <ul><li>without</li>
-	 *  	<li>absent</li>
-	 *  	<li>none</li>
-	 *  </ul>
-	 *  <p> and B is one of
-	 *  <ul>
-	 *   	<li>no</li>
-	 *  	<li>any</li>
-	 *  	<li>neither</li>
-	 *  	<li>nor</li>
-	 *  	<li>never</li>
-	 *  </ul>
-	 *  <p> and C is one of
-	 *  <ul>
-	 *   	<li>evidence</li>
-	 *  	<li>indication</li>
-	 *  	<li>indications</li>
-	 *  	<li>sign</li>
-	 *  	<li>signs</li>
-	 *  	<li>symptoms</li>
-	 *  	<li>symptom</li>
-	 *  	<li>sx</li>
-	 *  	<li>dx</li>
-	 *  	<li>diagnosis</li>
-	 *  	<li>history</li>
-	 *  	<li>hx</li>
-	 *  	<li>findings</li>
-	 *  </ul>
-	 * <p> and D is anything
-	 * @return
-	 */
-	private Machine getNominalNegIndicatorMachine() {
-		State startState = new NamedState("START");
-		State endState = new NamedState("END");
-		State anyState = new NamedState("ANY");
-		State ntEndState = new NonTerminalEndState("NON TERMINAL END");
-		endState.setEndStateFlag(true);
-		ntEndState.setEndStateFlag(true);
-
-		Machine m = new Machine(startState);
-		State negPrepState = new NamedState("NEG_PREP");
-		State negDetState = new NamedState("NEG_DET");
-		State regNounState = new NamedState("REG_NOUN");
-
-		Condition negPrepC = new TextSetCondition(iv_negPrepositionsSet, false);
-		Condition negDetC = new TextSetCondition(iv_negDeterminersSet, false);
-		Condition regNounC = new TextSetCondition(iv_regNounsSet, false);
-
-		startState.addTransition(negDetC, negDetState); // start with a modal
-		startState.addTransition(negPrepC, negPrepState);
-		startState.addTransition(new AnyCondition(), startState);
-
-		negPrepState.addTransition(new AnyCondition(), ntEndState);
-		negDetState.addTransition(regNounC, regNounState);
-		negDetState.addTransition(new AnyCondition(), ntEndState);
-		negDetState.addTransition(new AnyCondition(), anyState);
-
-		anyState.addTransition(regNounC, regNounState);
-		anyState.addTransition(new AnyCondition(), anyState);
-
-		regNounState.addTransition(new AnyCondition(), ntEndState);
-
-		ntEndState.addTransition(new AnyCondition(), endState);
-
-		return m;
-	}
-
-	
-	/**
-	 * recognizes "A B ..."
-	 * where A is unremarkable, unlikely, or negative
-	 * and B is of, in, for, or with
-	 */
-
-	private Machine getAdjNegIndicatorMachine() {
-		State startState = new NamedState("START");
-		State endState = new NamedState("END");
-		State ntEndState = new NonTerminalEndState("NON TERMINAL END");
-		endState.setEndStateFlag(true);
-		ntEndState.setEndStateFlag(true);
-
-		Machine m = new Machine(startState);
-		State regPrepState = new NamedState("REG_PREP");
-		State negAdjState = new NamedState("NEG_ADJ");
-
-		Condition regPrepC = new TextSetCondition(iv_regPrepositionsSet, false);
-		Condition negAdjC = new TextSetCondition(iv_negAdjectivesSet, false);
-
-		startState.addTransition(negAdjC, negAdjState); // start with a modal
-		startState.addTransition(new AnyCondition(), startState);
-
-		negAdjState.addTransition(regPrepC, regPrepState);
-		regPrepState.addTransition(new AnyCondition(), ntEndState);
-		negAdjState.addTransition(new AnyCondition(), startState);
-
-		ntEndState.addTransition(new AnyCondition(), endState);
-
-		return m;
-	}
-
-	/**
-	 * Executes the finite state machines.
-	 * 
-	 * @param tokens
-	 * @return Set of DateToken objects.
-	 * @throws Exception
-	 */
-	public Set<NegationIndicator> execute(List<?> tokens) throws Exception {
-		Set<NegationIndicator> outSet = new HashSet<NegationIndicator>();
-
-		// maps a fsm to a token start index
-		// key = fsm , value = token start index
-		Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
-
-		for (int i = 0; i < tokens.size(); i++) {
-			BaseToken token = (BaseToken) tokens.get(i);
-
-			Iterator<Machine> machineItr = iv_machineSet.iterator();
-			while (machineItr.hasNext()) {
-				Machine fsm = machineItr.next();
-
-				fsm.input(token);
-
-				State currentState = fsm.getCurrentState();
-				if (currentState.getStartStateFlag()) {
-					tokenStartMap.put(fsm, new Integer(i));
-				}
-				if (currentState.getEndStateFlag()) {
-					Object o = tokenStartMap.get(fsm);
-					int tokenStartIndex;
-					if (o == null) {
-						// By default, all machines start with
-						// token zero.
-						tokenStartIndex = 0;
-					} else {
-						tokenStartIndex = ((Integer) o).intValue();
-						// skip ahead over single token we don't want
-						tokenStartIndex++;
-					}
-					BaseToken endToken = null;
-					if (currentState instanceof NonTerminalEndState) {
-						endToken = (BaseToken) tokens.get(i - 1);
-					} else {
-						endToken = token;
-					}
-
-					BaseToken startToken = (BaseToken) tokens
-							.get(tokenStartIndex);
-					NegationIndicator neg = new NegationIndicator(startToken
-							.getStartOffset(), endToken.getEndOffset());
-					outSet.add(neg);
-					fsm.reset();
-				}
-			}
-		}
-
-		// cleanup
-		tokenStartMap.clear();
-
-		// reset machines
-		Iterator<Machine> itr = iv_machineSet.iterator();
-		while (itr.hasNext()) {
-			Machine fsm = itr.next();
-			fsm.reset();
-		}
-
-		return outSet;
-	}
-}
\ No newline at end of file
+package edu.mayo.bmi.fsm.machine;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import net.openai.util.fsm.AnyCondition;
+import net.openai.util.fsm.Condition;
+import net.openai.util.fsm.Machine;
+import net.openai.util.fsm.State;
+import edu.mayo.bmi.fsm.condition.DisjoinCondition;
+import edu.mayo.bmi.fsm.condition.NegateCondition;
+import edu.mayo.bmi.fsm.condition.TextSetCondition;
+import edu.mayo.bmi.fsm.output.NegationIndicator;
+import edu.mayo.bmi.fsm.state.NamedState;
+import edu.mayo.bmi.fsm.state.NonTerminalEndState;
+import edu.mayo.bmi.fsm.token.BaseToken;
+
+/**
+ * Uses one or more finite state machines to detect dates in the given input of
+ * tokens.
+ * 
+ * @author Mayo Clinic
+ */
+public class NegationFSM {
+
+	// regular modal verb
+	private Set<String> iv_modalVerbsSet = new HashSet<String>();
+	// negative particle
+	private Set<String> iv_negParticlesSet = new HashSet<String>();
+	// regular verbs requiring negation particle
+	private Set<String> iv_regVerbsSet = new HashSet<String>();
+	// neagive verbs that contain negation in them
+	private Set<String> iv_negVerbsSet = new HashSet<String>();
+	// negation preposition
+	private Set<String> iv_negPrepositionsSet = new HashSet<String>();
+	// negatively charged determiners
+	private Set<String> iv_negDeterminersSet = new HashSet<String>();
+	// regular nouns - indicators
+	private Set<String> iv_regNounsSet = new HashSet<String>();
+	// regular prepositions
+	private Set<String> iv_regPrepositionsSet = new HashSet<String>();
+	// negative adjectives
+	private Set<String> iv_negAdjectivesSet = new HashSet<String>();
+	// negative collocations
+	private Set<String> iv_negCollocSet = new HashSet<String>();
+	// NEGATIVE COLLOCATION PARTICLE
+	private Set<String> iv_negColPartSet = new HashSet<String>();
+
+	// contains the finite state machines
+	private Set<Machine> iv_machineSet = new HashSet<Machine>();
+
+	/**
+	 * 
+	 * Constructor
+	 * 
+	 */
+	public NegationFSM() {
+		iv_modalVerbsSet.add("can");
+		iv_modalVerbsSet.add("ca");
+		iv_modalVerbsSet.add("will");
+		iv_modalVerbsSet.add("must");
+		iv_modalVerbsSet.add("could");
+		iv_modalVerbsSet.add("would");
+		iv_modalVerbsSet.add("should");
+		iv_modalVerbsSet.add("shall");
+		iv_modalVerbsSet.add("did");
+
+		iv_negParticlesSet.add("not");
+		iv_negColPartSet.add("out");
+		iv_negParticlesSet.add("n't");
+		iv_negParticlesSet.add("'t");
+
+		iv_negCollocSet.add("rule");
+		iv_negCollocSet.add("rules");
+		iv_negCollocSet.add("ruled");
+		iv_negCollocSet.add("ruling");
+		iv_negCollocSet.add("rule-out");
+
+		iv_regVerbsSet.add("reveal");
+		iv_regVerbsSet.add("reveals");
+		iv_regVerbsSet.add("revealed");
+		iv_regVerbsSet.add("revealing");
+		iv_regVerbsSet.add("have");
+		iv_regVerbsSet.add("had");
+		iv_regVerbsSet.add("has");
+		iv_regVerbsSet.add("feel");
+		iv_regVerbsSet.add("feels");
+		iv_regVerbsSet.add("felt");
+		iv_regVerbsSet.add("feeling");
+		iv_regVerbsSet.add("complain");
+		iv_regVerbsSet.add("complains");
+		iv_regVerbsSet.add("complained");
+		iv_regVerbsSet.add("complaining");
+		iv_regVerbsSet.add("demonstrate");
+		iv_regVerbsSet.add("demonstrates");
+		iv_regVerbsSet.add("demonstrated");
+		iv_regVerbsSet.add("demonstrating");
+		iv_regVerbsSet.add("appear");
+		iv_regVerbsSet.add("appears");
+		iv_regVerbsSet.add("appeared");
+		iv_regVerbsSet.add("appearing");
+		iv_regVerbsSet.add("caused");
+		iv_regVerbsSet.add("cause");
+		iv_regVerbsSet.add("causing");
+		iv_regVerbsSet.add("causes");
+		iv_regVerbsSet.add("find");
+		iv_regVerbsSet.add("finds");
+		iv_regVerbsSet.add("found");
+		iv_regVerbsSet.add("discover");
+		iv_regVerbsSet.add("discovered");
+		iv_regVerbsSet.add("discovers");
+
+		iv_negVerbsSet.add("deny");
+		iv_negVerbsSet.add("denies");
+		iv_negVerbsSet.add("denied");
+		iv_negVerbsSet.add("denying");
+		iv_negVerbsSet.add("fail");
+		iv_negVerbsSet.add("fails");
+		iv_negVerbsSet.add("failed");
+		iv_negVerbsSet.add("failing");
+		iv_negVerbsSet.add("decline");
+		iv_negVerbsSet.add("declines");
+		iv_negVerbsSet.add("declined");
+		iv_negVerbsSet.add("declining");
+		iv_negVerbsSet.add("exclude");
+		iv_negVerbsSet.add("excludes");
+		iv_negVerbsSet.add("excluding");
+		iv_negVerbsSet.add("excluded");
+
+		iv_negPrepositionsSet.add("without");
+		iv_negPrepositionsSet.add("absent");
+		iv_negPrepositionsSet.add("none");
+
+		iv_negDeterminersSet.add("no");
+		iv_negDeterminersSet.add("any");
+		iv_negDeterminersSet.add("neither");
+		iv_negDeterminersSet.add("nor");
+		iv_negDeterminersSet.add("never");
+
+		iv_regNounsSet.add("evidence");
+		iv_regNounsSet.add("indication");
+		iv_regNounsSet.add("indications");
+		iv_regNounsSet.add("sign");
+		iv_regNounsSet.add("signs");
+		iv_regNounsSet.add("symptoms");
+		iv_regNounsSet.add("symptom");
+		iv_regNounsSet.add("sx");
+		iv_regNounsSet.add("dx");
+		iv_regNounsSet.add("diagnosis");
+		iv_regNounsSet.add("history");
+		iv_regNounsSet.add("hx");
+		iv_regNounsSet.add("findings");
+
+		iv_regPrepositionsSet.add("of");
+		iv_regPrepositionsSet.add("in");
+		iv_regPrepositionsSet.add("for");
+		iv_regPrepositionsSet.add("with");
+
+		iv_negAdjectivesSet.add("unremarkable");
+		iv_negAdjectivesSet.add("unlikely");
+		iv_negAdjectivesSet.add("negative");
+
+		iv_machineSet.add(getAspectualNegIndicatorMachine());
+		iv_machineSet.add(getNominalNegIndicatorMachine());
+		iv_machineSet.add(getAdjNegIndicatorMachine());
+
+	}
+
+	private Machine getAspectualNegIndicatorMachine() {
+		State startState = new NamedState("START");
+		State endState = new NamedState("END");
+		State anyState = new NamedState("ANY");
+
+		State ntEndState = new NonTerminalEndState("NON TERMINAL END");
+		endState.setEndStateFlag(true);
+		ntEndState.setEndStateFlag(true);
+
+		Machine m = new Machine(startState);
+		State regModalState = new NamedState("REG_MODAL");
+		State negPartState = new NamedState("NEG_PART");
+		State negVerbState = new NamedState("NEG_VERB");
+		State negCollocState = new NamedState("NEG_COLLOC");
+		State negColPartState = new NamedState("NEG_COLPART");
+
+		Condition regModalC = new TextSetCondition(iv_modalVerbsSet, false);
+		Condition negPartC = new TextSetCondition(iv_negParticlesSet, false);
+		Condition regVerbC = new TextSetCondition(iv_regVerbsSet, false);
+		Condition negVerbC = new TextSetCondition(iv_negVerbsSet, false);
+		Condition negDetC = new TextSetCondition(iv_negDeterminersSet, false);
+		Condition negCollocC = new TextSetCondition(iv_negCollocSet, false);
+		Condition negColPartC = new TextSetCondition(iv_negColPartSet, false);
+
+		Condition notCollocC = new NegateCondition(negCollocC);
+
+		startState.addTransition(negVerbC, negVerbState);
+		startState.addTransition(negCollocC, negCollocState); // rule
+
+		startState.addTransition(new DisjoinCondition(regModalC, regVerbC),
+				regModalState); // start with a modal
+		startState.addTransition(new DisjoinCondition(negPartC, negDetC),
+				negPartState);
+
+		startState.addTransition(new AnyCondition(), startState);
+
+		regModalState.addTransition(negCollocC, negCollocState);
+		negCollocState.addTransition(negColPartC, negColPartState); // out
+		negColPartState.addTransition(new AnyCondition(), ntEndState);
+		negCollocState.addTransition(new AnyCondition(), startState);
+
+		regModalState.addTransition(new DisjoinCondition(negPartC, negDetC),
+				negPartState);
+		regModalState.addTransition(new AnyCondition(), anyState);
+
+		anyState.addTransition(new DisjoinCondition(negPartC, negDetC),
+				negPartState);
+		anyState.addTransition(new AnyCondition(), startState);
+
+		negPartState.addTransition(notCollocC, ntEndState);
+		negVerbState.addTransition(notCollocC, ntEndState);
+		negPartState.addTransition(new AnyCondition(), startState);
+		negVerbState.addTransition(new AnyCondition(), startState);
+
+		negPartState.addTransition(new AnyCondition(), ntEndState);
+		negVerbState.addTransition(new AnyCondition(), ntEndState);
+
+		ntEndState.addTransition(new AnyCondition(), endState);
+
+		return m;
+	}
+
+	
+	/**
+	 *  should recognize:
+	 *  <ul><li>A</li>
+	 *  	<li>B</li>
+	 *  	<li>B C</li>
+	 *  	<li>B D* C</li>
+	 *  </ul>
+	 *  <p>where A is one of
+	 *  <ul><li>without</li>
+	 *  	<li>absent</li>
+	 *  	<li>none</li>
+	 *  </ul>
+	 *  <p> and B is one of
+	 *  <ul>
+	 *   	<li>no</li>
+	 *  	<li>any</li>
+	 *  	<li>neither</li>
+	 *  	<li>nor</li>
+	 *  	<li>never</li>
+	 *  </ul>
+	 *  <p> and C is one of
+	 *  <ul>
+	 *   	<li>evidence</li>
+	 *  	<li>indication</li>
+	 *  	<li>indications</li>
+	 *  	<li>sign</li>
+	 *  	<li>signs</li>
+	 *  	<li>symptoms</li>
+	 *  	<li>symptom</li>
+	 *  	<li>sx</li>
+	 *  	<li>dx</li>
+	 *  	<li>diagnosis</li>
+	 *  	<li>history</li>
+	 *  	<li>hx</li>
+	 *  	<li>findings</li>
+	 *  </ul>
+	 * <p> and D is anything
+	 * @return
+	 */
+	private Machine getNominalNegIndicatorMachine() {
+		State startState = new NamedState("START");
+		State endState = new NamedState("END");
+		State anyState = new NamedState("ANY");
+		State ntEndState = new NonTerminalEndState("NON TERMINAL END");
+		endState.setEndStateFlag(true);
+		ntEndState.setEndStateFlag(true);
+
+		Machine m = new Machine(startState);
+		State negPrepState = new NamedState("NEG_PREP");
+		State negDetState = new NamedState("NEG_DET");
+		State regNounState = new NamedState("REG_NOUN");
+
+		Condition negPrepC = new TextSetCondition(iv_negPrepositionsSet, false);
+		Condition negDetC = new TextSetCondition(iv_negDeterminersSet, false);
+		Condition regNounC = new TextSetCondition(iv_regNounsSet, false);
+
+		startState.addTransition(negDetC, negDetState); // start with a modal
+		startState.addTransition(negPrepC, negPrepState);
+		startState.addTransition(new AnyCondition(), startState);
+
+		negPrepState.addTransition(new AnyCondition(), ntEndState);
+		negDetState.addTransition(regNounC, regNounState);
+		negDetState.addTransition(new AnyCondition(), ntEndState);
+		negDetState.addTransition(new AnyCondition(), anyState);
+
+		anyState.addTransition(regNounC, regNounState);
+		anyState.addTransition(new AnyCondition(), anyState);
+
+		regNounState.addTransition(new AnyCondition(), ntEndState);
+
+		ntEndState.addTransition(new AnyCondition(), endState);
+
+		return m;
+	}
+
+	
+	/**
+	 * recognizes "A B ..."
+	 * where A is unremarkable, unlikely, or negative
+	 * and B is of, in, for, or with
+	 */
+
+	private Machine getAdjNegIndicatorMachine() {
+		State startState = new NamedState("START");
+		State endState = new NamedState("END");
+		State ntEndState = new NonTerminalEndState("NON TERMINAL END");
+		endState.setEndStateFlag(true);
+		ntEndState.setEndStateFlag(true);
+
+		Machine m = new Machine(startState);
+		State regPrepState = new NamedState("REG_PREP");
+		State negAdjState = new NamedState("NEG_ADJ");
+
+		Condition regPrepC = new TextSetCondition(iv_regPrepositionsSet, false);
+		Condition negAdjC = new TextSetCondition(iv_negAdjectivesSet, false);
+
+		startState.addTransition(negAdjC, negAdjState); // start with a modal
+		startState.addTransition(new AnyCondition(), startState);
+
+		negAdjState.addTransition(regPrepC, regPrepState);
+		regPrepState.addTransition(new AnyCondition(), ntEndState);
+		negAdjState.addTransition(new AnyCondition(), startState);
+
+		ntEndState.addTransition(new AnyCondition(), endState);
+
+		return m;
+	}
+
+	/**
+	 * Executes the finite state machines.
+	 * 
+	 * @param tokens
+	 * @return Set of DateToken objects.
+	 * @throws Exception
+	 */
+	public Set<NegationIndicator> execute(List<?> tokens) throws Exception {
+		Set<NegationIndicator> outSet = new HashSet<NegationIndicator>();
+
+		// maps a fsm to a token start index
+		// key = fsm , value = token start index
+		Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
+
+		for (int i = 0; i < tokens.size(); i++) {
+			BaseToken token = (BaseToken) tokens.get(i);
+
+			Iterator<Machine> machineItr = iv_machineSet.iterator();
+			while (machineItr.hasNext()) {
+				Machine fsm = machineItr.next();
+
+				fsm.input(token);
+
+				State currentState = fsm.getCurrentState();
+				if (currentState.getStartStateFlag()) {
+					tokenStartMap.put(fsm, new Integer(i));
+				}
+				if (currentState.getEndStateFlag()) {
+					Object o = tokenStartMap.get(fsm);
+					int tokenStartIndex;
+					if (o == null) {
+						// By default, all machines start with
+						// token zero.
+						tokenStartIndex = 0;
+					} else {
+						tokenStartIndex = ((Integer) o).intValue();
+						// skip ahead over single token we don't want
+						tokenStartIndex++;
+					}
+					BaseToken endToken = null;
+					if (currentState instanceof NonTerminalEndState) {
+						endToken = (BaseToken) tokens.get(i - 1);
+					} else {
+						endToken = token;
+					}
+
+					BaseToken startToken = (BaseToken) tokens
+							.get(tokenStartIndex);
+					NegationIndicator neg = new NegationIndicator(startToken
+							.getStartOffset(), endToken.getEndOffset());
+					outSet.add(neg);
+					fsm.reset();
+				}
+			}
+		}
+
+		// cleanup
+		tokenStartMap.clear();
+
+		// reset machines
+		Iterator<Machine> itr = iv_machineSet.iterator();
+		while (itr.hasNext()) {
+			Machine fsm = itr.next();
+			fsm.reset();
+		}
+
+		return outSet;
+	}
+}

Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/PersonTitleFSM.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/PersonTitleFSM.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/PersonTitleFSM.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/PersonTitleFSM.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
 /*
- * Copyright: (c) 2009   Mayo Foundation for Medical Education and 
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify 
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0 
  * 
  * Unless required by applicable law or agreed to in writing, software
@@ -21,143 +14,143 @@
  * See the License for the specific language governing permissions and 
  * limitations under the License. 
  */
-package edu.mayo.bmi.fsm.machine;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import net.openai.util.fsm.AnyCondition;
-import net.openai.util.fsm.Condition;
-import net.openai.util.fsm.Machine;
-import net.openai.util.fsm.State;
-import edu.mayo.bmi.fsm.condition.PunctuationValueCondition;
-import edu.mayo.bmi.fsm.condition.WordSetCondition;
-import edu.mayo.bmi.fsm.output.PersonTitleToken;
-import edu.mayo.bmi.fsm.state.NamedState;
-import edu.mayo.bmi.fsm.token.BaseToken;
-
-/**
- * Uses one or more finite state machines to detect measurements in the given
- * input of tokens.
- * 
- * @author Mayo Clinic
- */
-public class PersonTitleFSM {
-	// text fractions
-	Set<String> iv_fullTextSet = new HashSet<String>();
-
-	// contains the finite state machines
-	private Set<Machine> iv_machineSet = new HashSet<Machine>();
-
-	/**
-	 * 
-	 * Constructor
-	 * 
-	 */
-	public PersonTitleFSM() {
-		iv_fullTextSet.add("mr");
-		iv_fullTextSet.add("ms");
-		iv_fullTextSet.add("mrs");
-		iv_fullTextSet.add("dr");
-
-		iv_machineSet.add(getTitleMachine());
-	}
-
-	/**
-	 * Gets a finite state machine that detects the following:
-	 * <ol>
-	 * <li>Dr.</li>
-	 * </ol>
-	 * 
-	 * @return
-	 */
-	private Machine getTitleMachine() {
-		State startState = new NamedState("START");
-		State endState = new NamedState("END");
-		endState.setEndStateFlag(true);
-
-		Machine m = new Machine(startState);
-		State titleNameState = new NamedState("TITLENAME");
-
-		Condition titleNameCondition = new WordSetCondition(iv_fullTextSet,
-				false);
-		Condition periodCondition = new PunctuationValueCondition('.');
-
-		startState.addTransition(titleNameCondition, titleNameState);
-		startState.addTransition(new AnyCondition(), startState);
-
-		titleNameState.addTransition(periodCondition, endState);
-		titleNameState.addTransition(new AnyCondition(), startState);
-
-		endState.addTransition(new AnyCondition(), startState);
-
-		return m;
-	}
-
-	/**
-	 * Executes the finite state machines.
-	 * 
-	 * @param tokens
-	 * @return Set of FractionToken objects.
-	 * @throws Exception
-	 */
-	public Set<PersonTitleToken> execute(List<? extends BaseToken> tokens)
-			throws Exception {
-		Set<PersonTitleToken> personTitleSet = new HashSet<PersonTitleToken>();
-
-		// maps a fsm to a token start index
-		// key = fsm , value = token start index
-		Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
-
-		for (int i = 0; i < tokens.size(); i++) {
-			BaseToken token = tokens.get(i);
-
-			Iterator<Machine> machineItr = iv_machineSet.iterator();
-			while (machineItr.hasNext()) {
-				Machine fsm = machineItr.next();
-
-				fsm.input(token);
-
-				State currentState = fsm.getCurrentState();
-				if (currentState.getStartStateFlag()) {
-					tokenStartMap.put(fsm, new Integer(i));
-				}
-				if (currentState.getEndStateFlag()) {
-					Object o = tokenStartMap.get(fsm);
-					int tokenStartIndex;
-					if (o == null) {
-						// By default, all machines start with
-						// token zero.
-						tokenStartIndex = 0;
-					} else {
-						tokenStartIndex = ((Integer) o).intValue();
-						// skip ahead over single token we don't want
-						tokenStartIndex++;
-					}
-					BaseToken startToken = tokens.get(tokenStartIndex);
-					BaseToken endToken = token;
-					PersonTitleToken ptToken = new PersonTitleToken(startToken
-							.getStartOffset(), endToken.getEndOffset());
-					personTitleSet.add(ptToken);
-					fsm.reset();
-				}
-			}
-		}
-
-		// cleanup
-		tokenStartMap.clear();
-
-		// reset machines
-		Iterator<Machine> itr = iv_machineSet.iterator();
-		while (itr.hasNext()) {
-			Machine fsm = itr.next();
-			fsm.reset();
-		}
-
-		return personTitleSet;
-	}
-}
+package edu.mayo.bmi.fsm.machine;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import net.openai.util.fsm.AnyCondition;
+import net.openai.util.fsm.Condition;
+import net.openai.util.fsm.Machine;
+import net.openai.util.fsm.State;
+import edu.mayo.bmi.fsm.condition.PunctuationValueCondition;
+import edu.mayo.bmi.fsm.condition.WordSetCondition;
+import edu.mayo.bmi.fsm.output.PersonTitleToken;
+import edu.mayo.bmi.fsm.state.NamedState;
+import edu.mayo.bmi.fsm.token.BaseToken;
+
+/**
+ * Uses one or more finite state machines to detect measurements in the given
+ * input of tokens.
+ * 
+ * @author Mayo Clinic
+ */
+public class PersonTitleFSM {
+	// text fractions
+	Set<String> iv_fullTextSet = new HashSet<String>();
+
+	// contains the finite state machines
+	private Set<Machine> iv_machineSet = new HashSet<Machine>();
+
+	/**
+	 * 
+	 * Constructor
+	 * 
+	 */
+	public PersonTitleFSM() {
+		iv_fullTextSet.add("mr");
+		iv_fullTextSet.add("ms");
+		iv_fullTextSet.add("mrs");
+		iv_fullTextSet.add("dr");
+
+		iv_machineSet.add(getTitleMachine());
+	}
+
+	/**
+	 * Gets a finite state machine that detects the following:
+	 * <ol>
+	 * <li>Dr.</li>
+	 * </ol>
+	 * 
+	 * @return
+	 */
+	private Machine getTitleMachine() {
+		State startState = new NamedState("START");
+		State endState = new NamedState("END");
+		endState.setEndStateFlag(true);
+
+		Machine m = new Machine(startState);
+		State titleNameState = new NamedState("TITLENAME");
+
+		Condition titleNameCondition = new WordSetCondition(iv_fullTextSet,
+				false);
+		Condition periodCondition = new PunctuationValueCondition('.');
+
+		startState.addTransition(titleNameCondition, titleNameState);
+		startState.addTransition(new AnyCondition(), startState);
+
+		titleNameState.addTransition(periodCondition, endState);
+		titleNameState.addTransition(new AnyCondition(), startState);
+
+		endState.addTransition(new AnyCondition(), startState);
+
+		return m;
+	}
+
+	/**
+	 * Executes the finite state machines.
+	 * 
+	 * @param tokens
+	 * @return Set of FractionToken objects.
+	 * @throws Exception
+	 */
+	public Set<PersonTitleToken> execute(List<? extends BaseToken> tokens)
+			throws Exception {
+		Set<PersonTitleToken> personTitleSet = new HashSet<PersonTitleToken>();
+
+		// maps a fsm to a token start index
+		// key = fsm , value = token start index
+		Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
+
+		for (int i = 0; i < tokens.size(); i++) {
+			BaseToken token = tokens.get(i);
+
+			Iterator<Machine> machineItr = iv_machineSet.iterator();
+			while (machineItr.hasNext()) {
+				Machine fsm = machineItr.next();
+
+				fsm.input(token);
+
+				State currentState = fsm.getCurrentState();
+				if (currentState.getStartStateFlag()) {
+					tokenStartMap.put(fsm, new Integer(i));
+				}
+				if (currentState.getEndStateFlag()) {
+					Object o = tokenStartMap.get(fsm);
+					int tokenStartIndex;
+					if (o == null) {
+						// By default, all machines start with
+						// token zero.
+						tokenStartIndex = 0;
+					} else {
+						tokenStartIndex = ((Integer) o).intValue();
+						// skip ahead over single token we don't want
+						tokenStartIndex++;
+					}
+					BaseToken startToken = tokens.get(tokenStartIndex);
+					BaseToken endToken = token;
+					PersonTitleToken ptToken = new PersonTitleToken(startToken
+							.getStartOffset(), endToken.getEndOffset());
+					personTitleSet.add(ptToken);
+					fsm.reset();
+				}
+			}
+		}
+
+		// cleanup
+		tokenStartMap.clear();
+
+		// reset machines
+		Iterator<Machine> itr = iv_machineSet.iterator();
+		while (itr.hasNext()) {
+			Machine fsm = itr.next();
+			fsm.reset();
+		}
+
+		return personTitleSet;
+	}
+}

Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RangeFSM.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RangeFSM.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RangeFSM.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RangeFSM.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
 /*
- * Copyright: (c) 2009   Mayo Foundation for Medical Education and 
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify 
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0 
  * 
  * Unless required by applicable law or agreed to in writing, software
@@ -21,206 +14,206 @@
  * See the License for the specific language governing permissions and 
  * limitations under the License. 
  */
-package edu.mayo.bmi.fsm.machine;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import net.openai.util.fsm.AnyCondition;
-import net.openai.util.fsm.Condition;
-import net.openai.util.fsm.Machine;
-import net.openai.util.fsm.State;
-import edu.mayo.bmi.fsm.condition.IntegerCondition;
-import edu.mayo.bmi.fsm.condition.PunctuationValueCondition;
-import edu.mayo.bmi.fsm.condition.RomanNumeralCondition;
-import edu.mayo.bmi.fsm.condition.WordSetCondition;
-import edu.mayo.bmi.fsm.output.RangeToken;
-import edu.mayo.bmi.fsm.state.NamedState;
-import edu.mayo.bmi.fsm.token.BaseToken;
-
-/**
- * Uses one or more finite state machines to detect ranges in the given input of
- * tokens.
- * 
- * @author Mayo Clinic
- */
-public class RangeFSM {
-	// text fractions
-	Set<String> iv_textNumberSet = new HashSet<String>();
-
-	// contains the finite state machines
-	private Set<Machine> iv_machineSet = new HashSet<Machine>();
-
-	/**
-	 * 
-	 * Constructor
-	 * 
-	 */
-	public RangeFSM() {
-		iv_textNumberSet.add("one");
-		iv_textNumberSet.add("two");
-		iv_textNumberSet.add("three");
-		iv_textNumberSet.add("four");
-		iv_textNumberSet.add("five");
-		iv_textNumberSet.add("six");
-		iv_textNumberSet.add("seven");
-		iv_textNumberSet.add("eight");
-		iv_textNumberSet.add("nine");
-		iv_textNumberSet.add("ten");
-
-		iv_machineSet.add(getMachine());
-	}
-
-	/**
-	 * Gets a finite state machine that detects the following:
-	 * <ol>
-	 * <li>250-300</li>
-	 * <li>I-IV</li>
-	 * <li>two-three</li>
-	 * </ol>
-	 * 
-	 * @return
-	 */
-	private Machine getMachine() {
-		State startState = new NamedState("START");
-		State endState = new NamedState("END");
-		endState.setEndStateFlag(true);
-
-		Machine m = new Machine(startState);
-		State leftNumIntegerState = new NamedState("LEFT_NUM_INTEGER");
-		State leftNumRomanState = new NamedState("LEFT_NUM_ROMAN");
-		State leftNumTextState = new NamedState("LEFT_NUM_TEXT");
-		State dashState = new NamedState("DASH");
-
-		Condition leftIntCondition = new IntegerCondition();
-		Condition rightIntCondition = new IntegerCondition();
-		Condition dashCondition = new PunctuationValueCondition('-');
-		Condition leftRomanNumeralCondition = new RomanNumeralCondition();
-		Condition rightRomanNumeralCondition = new RomanNumeralCondition();
-		Condition leftNumTextCondition = new WordSetCondition(iv_textNumberSet,
-				false);
-		Condition rightNumTextCondition = new WordSetCondition(
-				iv_textNumberSet, false);
-
-		startState.addTransition(leftIntCondition, leftNumIntegerState);
-		startState.addTransition(leftRomanNumeralCondition, leftNumRomanState);
-		startState.addTransition(leftNumTextCondition, leftNumTextState);
-		startState.addTransition(new AnyCondition(), startState);
-
-		leftNumIntegerState.addTransition(dashCondition, dashState);
-		leftNumIntegerState.addTransition(new AnyCondition(), startState);
-
-		leftNumRomanState.addTransition(dashCondition, dashState);
-		leftNumRomanState.addTransition(new AnyCondition(), startState);
-
-		leftNumTextState.addTransition(dashCondition, dashState);
-		leftNumTextState.addTransition(new AnyCondition(), startState);
-
-		dashState.addTransition(rightIntCondition, endState);
-		dashState.addTransition(rightRomanNumeralCondition, endState);
-		dashState.addTransition(rightNumTextCondition, endState);
-		dashState.addTransition(new AnyCondition(), startState);
-
-		endState.addTransition(new AnyCondition(), startState);
-
-		return m;
-	}
-
-	/**
-	 * Executes the finite state machines.
-	 * 
-	 * @param tokens
-	 * @return Set of RangeToken objects.
-	 * @throws Exception
-	 */
-	public Set<RangeToken> execute(List<? extends BaseToken> tokens,
-			Set<? extends BaseToken> overrideSet) throws Exception {
-		Set<RangeToken> rangeSet = new HashSet<RangeToken>();
-
-		// maps a fsm to a token start index
-		// key = fsm , value = token start index
-		Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
-
-		Iterator<? extends BaseToken> overrideTokenItr = overrideSet.iterator();
-		// key = start offset, value = override BaseToken object
-		Map<Integer, BaseToken> overrideTokenMap = new HashMap<Integer, BaseToken>();
-		while (overrideTokenItr.hasNext()) {
-			BaseToken t = overrideTokenItr.next();
-			Integer key = new Integer(t.getStartOffset());
-			overrideTokenMap.put(key, t);
-		}
-
-		boolean overrideOn = false;
-		int overrideEndOffset = -1;
-		for (int i = 0; i < tokens.size(); i++) {
-			BaseToken token = tokens.get(i);
-
-			Integer key = new Integer(token.getStartOffset());
-
-			if (overrideOn) {
-				if (token.getStartOffset() >= overrideEndOffset) {
-					overrideOn = false;
-					overrideEndOffset = -1;
-				} else {
-					// step to next iteration of for loop
-					continue;
-				}
-			} else {
-				if (overrideTokenMap.containsKey(key)) {
-					// override one or more tokens until the override
-					// token is complete
-					token = overrideTokenMap.get(key);
-					overrideOn = true;
-					overrideEndOffset = token.getEndOffset();
-				}
-			}
-
-			Iterator<Machine> machineItr = iv_machineSet.iterator();
-			while (machineItr.hasNext()) {
-				Machine fsm = machineItr.next();
-
-				fsm.input(token);
-
-				State currentState = fsm.getCurrentState();
-				if (currentState.getStartStateFlag()) {
-					tokenStartMap.put(fsm, new Integer(i));
-				}
-				if (currentState.getEndStateFlag()) {
-					Object o = tokenStartMap.get(fsm);
-					int tokenStartIndex;
-					if (o == null) {
-						// By default, all machines start with
-						// token zero.
-						tokenStartIndex = 0;
-					} else {
-						tokenStartIndex = ((Integer) o).intValue();
-						// skip ahead over single token we don't want
-						tokenStartIndex++;
-					}
-					BaseToken startToken = tokens.get(tokenStartIndex);
-					BaseToken endToken = token;
-					RangeToken rangeToken = new RangeToken(startToken
-							.getStartOffset(), endToken.getEndOffset());
-					rangeSet.add(rangeToken);
-					fsm.reset();
-				}
-			}
-		}
-
-		// cleanup
-		tokenStartMap.clear();
-
-		// reset machines
-		Iterator<Machine> itr = iv_machineSet.iterator();
-		while (itr.hasNext()) {
-			Machine fsm = itr.next();
-			fsm.reset();
-		}
-
-		return rangeSet;
-	}
-}
+package edu.mayo.bmi.fsm.machine;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import net.openai.util.fsm.AnyCondition;
+import net.openai.util.fsm.Condition;
+import net.openai.util.fsm.Machine;
+import net.openai.util.fsm.State;
+import edu.mayo.bmi.fsm.condition.IntegerCondition;
+import edu.mayo.bmi.fsm.condition.PunctuationValueCondition;
+import edu.mayo.bmi.fsm.condition.RomanNumeralCondition;
+import edu.mayo.bmi.fsm.condition.WordSetCondition;
+import edu.mayo.bmi.fsm.output.RangeToken;
+import edu.mayo.bmi.fsm.state.NamedState;
+import edu.mayo.bmi.fsm.token.BaseToken;
+
+/**
+ * Uses one or more finite state machines to detect ranges in the given input of
+ * tokens.
+ * 
+ * @author Mayo Clinic
+ */
+public class RangeFSM {
+	// text fractions
+	Set<String> iv_textNumberSet = new HashSet<String>();
+
+	// contains the finite state machines
+	private Set<Machine> iv_machineSet = new HashSet<Machine>();
+
+	/**
+	 * 
+	 * Constructor
+	 * 
+	 */
+	public RangeFSM() {
+		iv_textNumberSet.add("one");
+		iv_textNumberSet.add("two");
+		iv_textNumberSet.add("three");
+		iv_textNumberSet.add("four");
+		iv_textNumberSet.add("five");
+		iv_textNumberSet.add("six");
+		iv_textNumberSet.add("seven");
+		iv_textNumberSet.add("eight");
+		iv_textNumberSet.add("nine");
+		iv_textNumberSet.add("ten");
+
+		iv_machineSet.add(getMachine());
+	}
+
+	/**
+	 * Gets a finite state machine that detects the following:
+	 * <ol>
+	 * <li>250-300</li>
+	 * <li>I-IV</li>
+	 * <li>two-three</li>
+	 * </ol>
+	 * 
+	 * @return
+	 */
+	private Machine getMachine() {
+		State startState = new NamedState("START");
+		State endState = new NamedState("END");
+		endState.setEndStateFlag(true);
+
+		Machine m = new Machine(startState);
+		State leftNumIntegerState = new NamedState("LEFT_NUM_INTEGER");
+		State leftNumRomanState = new NamedState("LEFT_NUM_ROMAN");
+		State leftNumTextState = new NamedState("LEFT_NUM_TEXT");
+		State dashState = new NamedState("DASH");
+
+		Condition leftIntCondition = new IntegerCondition();
+		Condition rightIntCondition = new IntegerCondition();
+		Condition dashCondition = new PunctuationValueCondition('-');
+		Condition leftRomanNumeralCondition = new RomanNumeralCondition();
+		Condition rightRomanNumeralCondition = new RomanNumeralCondition();
+		Condition leftNumTextCondition = new WordSetCondition(iv_textNumberSet,
+				false);
+		Condition rightNumTextCondition = new WordSetCondition(
+				iv_textNumberSet, false);
+
+		startState.addTransition(leftIntCondition, leftNumIntegerState);
+		startState.addTransition(leftRomanNumeralCondition, leftNumRomanState);
+		startState.addTransition(leftNumTextCondition, leftNumTextState);
+		startState.addTransition(new AnyCondition(), startState);
+
+		leftNumIntegerState.addTransition(dashCondition, dashState);
+		leftNumIntegerState.addTransition(new AnyCondition(), startState);
+
+		leftNumRomanState.addTransition(dashCondition, dashState);
+		leftNumRomanState.addTransition(new AnyCondition(), startState);
+
+		leftNumTextState.addTransition(dashCondition, dashState);
+		leftNumTextState.addTransition(new AnyCondition(), startState);
+
+		dashState.addTransition(rightIntCondition, endState);
+		dashState.addTransition(rightRomanNumeralCondition, endState);
+		dashState.addTransition(rightNumTextCondition, endState);
+		dashState.addTransition(new AnyCondition(), startState);
+
+		endState.addTransition(new AnyCondition(), startState);
+
+		return m;
+	}
+
+	/**
+	 * Executes the finite state machines.
+	 * 
+	 * @param tokens
+	 * @return Set of RangeToken objects.
+	 * @throws Exception
+	 */
+	public Set<RangeToken> execute(List<? extends BaseToken> tokens,
+			Set<? extends BaseToken> overrideSet) throws Exception {
+		Set<RangeToken> rangeSet = new HashSet<RangeToken>();
+
+		// maps a fsm to a token start index
+		// key = fsm , value = token start index
+		Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
+
+		Iterator<? extends BaseToken> overrideTokenItr = overrideSet.iterator();
+		// key = start offset, value = override BaseToken object
+		Map<Integer, BaseToken> overrideTokenMap = new HashMap<Integer, BaseToken>();
+		while (overrideTokenItr.hasNext()) {
+			BaseToken t = overrideTokenItr.next();
+			Integer key = new Integer(t.getStartOffset());
+			overrideTokenMap.put(key, t);
+		}
+
+		boolean overrideOn = false;
+		int overrideEndOffset = -1;
+		for (int i = 0; i < tokens.size(); i++) {
+			BaseToken token = tokens.get(i);
+
+			Integer key = new Integer(token.getStartOffset());
+
+			if (overrideOn) {
+				if (token.getStartOffset() >= overrideEndOffset) {
+					overrideOn = false;
+					overrideEndOffset = -1;
+				} else {
+					// step to next iteration of for loop
+					continue;
+				}
+			} else {
+				if (overrideTokenMap.containsKey(key)) {
+					// override one or more tokens until the override
+					// token is complete
+					token = overrideTokenMap.get(key);
+					overrideOn = true;
+					overrideEndOffset = token.getEndOffset();
+				}
+			}
+
+			Iterator<Machine> machineItr = iv_machineSet.iterator();
+			while (machineItr.hasNext()) {
+				Machine fsm = machineItr.next();
+
+				fsm.input(token);
+
+				State currentState = fsm.getCurrentState();
+				if (currentState.getStartStateFlag()) {
+					tokenStartMap.put(fsm, new Integer(i));
+				}
+				if (currentState.getEndStateFlag()) {
+					Object o = tokenStartMap.get(fsm);
+					int tokenStartIndex;
+					if (o == null) {
+						// By default, all machines start with
+						// token zero.
+						tokenStartIndex = 0;
+					} else {
+						tokenStartIndex = ((Integer) o).intValue();
+						// skip ahead over single token we don't want
+						tokenStartIndex++;
+					}
+					BaseToken startToken = tokens.get(tokenStartIndex);
+					BaseToken endToken = token;
+					RangeToken rangeToken = new RangeToken(startToken
+							.getStartOffset(), endToken.getEndOffset());
+					rangeSet.add(rangeToken);
+					fsm.reset();
+				}
+			}
+		}
+
+		// cleanup
+		tokenStartMap.clear();
+
+		// reset machines
+		Iterator<Machine> itr = iv_machineSet.iterator();
+		while (itr.hasNext()) {
+			Machine fsm = itr.next();
+			fsm.reset();
+		}
+
+		return rangeSet;
+	}
+}

Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RomanNumeralFSM.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RomanNumeralFSM.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RomanNumeralFSM.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/machine/RomanNumeralFSM.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
 /*
- * Copyright: (c) 2009   Mayo Foundation for Medical Education and 
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify 
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  * http://www.apache.org/licenses/LICENSE-2.0 
  * 
  * Unless required by applicable law or agreed to in writing, software
@@ -21,163 +14,163 @@
  * See the License for the specific language governing permissions and 
  * limitations under the License. 
  */
-package edu.mayo.bmi.fsm.machine;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import net.openai.util.fsm.AnyCondition;
-import net.openai.util.fsm.Condition;
-import net.openai.util.fsm.Machine;
-import net.openai.util.fsm.State;
-import edu.mayo.bmi.fsm.output.RomanNumeralToken;
-import edu.mayo.bmi.fsm.state.NamedState;
-import edu.mayo.bmi.fsm.token.BaseToken;
-import edu.mayo.bmi.fsm.token.WordToken;
-
-/**
- * Uses one or more finite state machines to detect roman numerals in the given
- * input of tokens.
- * 
- * @author Mayo Clinic
- */
-public class RomanNumeralFSM {
-
-	// contains the finite state machines
-	private Set<Machine> iv_machineSet = new HashSet<Machine>();
-
-	/**
-	 * 
-	 * Constructor
-	 * 
-	 */
-	public RomanNumeralFSM() {
-		iv_machineSet.add(getMachine());
-	}
-
-	/**
-	 * Gets a finite state machine that detects the following:
-	 * <ol>
-	 * <li>III</li>
-	 * <li>iii</li>
-	 * </ol>
-	 * 
-	 * @return
-	 */
-	private Machine getMachine() {
-		State startState = new NamedState("START");
-		State endState = new NamedState("END");
-		endState.setEndStateFlag(true);
-
-		Machine m = new Machine(startState);
-
-		startState.addTransition(new RomanNumeralCondition(), endState);
-		startState.addTransition(new AnyCondition(), startState);
-
-		endState.addTransition(new AnyCondition(), startState);
-
-		return m;
-	}
-
-	/**
-	 * Executes the finite state machines.
-	 * 
-	 * @param tokens
-	 * @return Set of RomanNumeralToken objects.
-	 * @throws Exception
-	 */
-	public Set<RomanNumeralToken> execute(List<? extends BaseToken> tokens)
-			throws Exception {
-		Set<RomanNumeralToken> romanNumeralSet = new HashSet<RomanNumeralToken>();
-
-		// maps a fsm to a token start index
-		// key = fsm , value = token start index
-		Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
-
-		for (int i = 0; i < tokens.size(); i++) {
-			BaseToken token = tokens.get(i);
-
-			Iterator<Machine> machineItr = iv_machineSet.iterator();
-			while (machineItr.hasNext()) {
-				Machine fsm = machineItr.next();
-
-				fsm.input(token);
-
-				State currentState = fsm.getCurrentState();
-				if (currentState.getStartStateFlag()) {
-					tokenStartMap.put(fsm, new Integer(i));
-				}
-				if (currentState.getEndStateFlag()) {
-					Object o = tokenStartMap.get(fsm);
-					int tokenStartIndex;
-					if (o == null) {
-						// By default, all machines start with
-						// token zero.
-						tokenStartIndex = 0;
-					} else {
-						tokenStartIndex = ((Integer) o).intValue();
-						// skip ahead over single token we don't want
-						tokenStartIndex++;
-					}
-
-					BaseToken startToken = tokens.get(tokenStartIndex);
-					BaseToken endToken = token;
-					RomanNumeralToken rnToken = new RomanNumeralToken(
-							startToken.getStartOffset(), endToken
-									.getEndOffset());
-					romanNumeralSet.add(rnToken);
-					fsm.reset();
-				}
-			}
-		}
-
-		// cleanup
-		tokenStartMap.clear();
-
-		// reset machines
-		Iterator<Machine> itr = iv_machineSet.iterator();
-		while (itr.hasNext()) {
-			Machine fsm = itr.next();
-			fsm.reset();
-		}
-
-		return romanNumeralSet;
-	}
-
-	@SuppressWarnings("serial")
-	class RomanNumeralCondition extends Condition {
-		public boolean satisfiedBy(Object conditional) {
-			if (conditional instanceof WordToken) {
-				WordToken wt = (WordToken) conditional;
-				return isRomanNumeral(wt.getText());
-			}
-
-			return false;
-		}
-
-		/**
-		 * Validates whether the given string is a roman numeral.
-		 * 
-		 * @param str
-		 * @return
-		 */
-		private boolean isRomanNumeral(String str) {
-			str = str.toUpperCase();
-			for (int i = 0; i < str.length(); i++) {
-				char currentChar = str.charAt(i);
-				if ((currentChar != 'I') && (currentChar != 'V')
-						&& (currentChar != 'X') && (currentChar != 'L')
-						&& (currentChar != 'C') && (currentChar != 'D')
-						&& (currentChar != 'M')) {
-					return false;
-				}
-			}
-			return true;
-		}
-	}
-
-}
+package edu.mayo.bmi.fsm.machine;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import net.openai.util.fsm.AnyCondition;
+import net.openai.util.fsm.Condition;
+import net.openai.util.fsm.Machine;
+import net.openai.util.fsm.State;
+import edu.mayo.bmi.fsm.output.RomanNumeralToken;
+import edu.mayo.bmi.fsm.state.NamedState;
+import edu.mayo.bmi.fsm.token.BaseToken;
+import edu.mayo.bmi.fsm.token.WordToken;
+
+/**
+ * Uses one or more finite state machines to detect roman numerals in the given
+ * input of tokens.
+ * 
+ * @author Mayo Clinic
+ */
+public class RomanNumeralFSM {
+
+	// contains the finite state machines
+	private Set<Machine> iv_machineSet = new HashSet<Machine>();
+
+	/**
+	 * 
+	 * Constructor
+	 * 
+	 */
+	public RomanNumeralFSM() {
+		iv_machineSet.add(getMachine());
+	}
+
+	/**
+	 * Gets a finite state machine that detects the following:
+	 * <ol>
+	 * <li>III</li>
+	 * <li>iii</li>
+	 * </ol>
+	 * 
+	 * @return
+	 */
+	private Machine getMachine() {
+		State startState = new NamedState("START");
+		State endState = new NamedState("END");
+		endState.setEndStateFlag(true);
+
+		Machine m = new Machine(startState);
+
+		startState.addTransition(new RomanNumeralCondition(), endState);
+		startState.addTransition(new AnyCondition(), startState);
+
+		endState.addTransition(new AnyCondition(), startState);
+
+		return m;
+	}
+
+	/**
+	 * Executes the finite state machines.
+	 * 
+	 * @param tokens
+	 * @return Set of RomanNumeralToken objects.
+	 * @throws Exception
+	 */
+	public Set<RomanNumeralToken> execute(List<? extends BaseToken> tokens)
+			throws Exception {
+		Set<RomanNumeralToken> romanNumeralSet = new HashSet<RomanNumeralToken>();
+
+		// maps a fsm to a token start index
+		// key = fsm , value = token start index
+		Map<Machine, Integer> tokenStartMap = new HashMap<Machine, Integer>();
+
+		for (int i = 0; i < tokens.size(); i++) {
+			BaseToken token = tokens.get(i);
+
+			Iterator<Machine> machineItr = iv_machineSet.iterator();
+			while (machineItr.hasNext()) {
+				Machine fsm = machineItr.next();
+
+				fsm.input(token);
+
+				State currentState = fsm.getCurrentState();
+				if (currentState.getStartStateFlag()) {
+					tokenStartMap.put(fsm, new Integer(i));
+				}
+				if (currentState.getEndStateFlag()) {
+					Object o = tokenStartMap.get(fsm);
+					int tokenStartIndex;
+					if (o == null) {
+						// By default, all machines start with
+						// token zero.
+						tokenStartIndex = 0;
+					} else {
+						tokenStartIndex = ((Integer) o).intValue();
+						// skip ahead over single token we don't want
+						tokenStartIndex++;
+					}
+
+					BaseToken startToken = tokens.get(tokenStartIndex);
+					BaseToken endToken = token;
+					RomanNumeralToken rnToken = new RomanNumeralToken(
+							startToken.getStartOffset(), endToken
+									.getEndOffset());
+					romanNumeralSet.add(rnToken);
+					fsm.reset();
+				}
+			}
+		}
+
+		// cleanup
+		tokenStartMap.clear();
+
+		// reset machines
+		Iterator<Machine> itr = iv_machineSet.iterator();
+		while (itr.hasNext()) {
+			Machine fsm = itr.next();
+			fsm.reset();
+		}
+
+		return romanNumeralSet;
+	}
+
+	@SuppressWarnings("serial")
+	class RomanNumeralCondition extends Condition {
+		public boolean satisfiedBy(Object conditional) {
+			if (conditional instanceof WordToken) {
+				WordToken wt = (WordToken) conditional;
+				return isRomanNumeral(wt.getText());
+			}
+
+			return false;
+		}
+
+		/**
+		 * Validates whether the given string is a roman numeral.
+		 * 
+		 * @param str
+		 * @return
+		 */
+		private boolean isRomanNumeral(String str) {
+			str = str.toUpperCase();
+			for (int i = 0; i < str.length(); i++) {
+				char currentChar = str.charAt(i);
+				if ((currentChar != 'I') && (currentChar != 'V')
+						&& (currentChar != 'X') && (currentChar != 'L')
+						&& (currentChar != 'C') && (currentChar != 'D')
+						&& (currentChar != 'M')) {
+					return false;
+				}
+			}
+			return true;
+		}
+	}
+
+}