You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/01/16 06:15:12 UTC

svn commit: r1433829 - /incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java

Author: swu
Date: Wed Jan 16 05:15:12 2013
New Revision: 1433829

URL: http://svn.apache.org/viewvc?rev=1433829&view=rev
Log:
added back in subjectattributeclassifier, which is key

Added:
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java?rev=1433829&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java Wed Jan 16 05:15:12 2013
@@ -0,0 +1,253 @@
+/*
+ * Copyright: (c) 2012   Mayo Foundation for Medical Education and 
+ * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
+ * triple-shield Mayo logo are trademarks and service marks of MFMER.
+ *
+ * Except as contained in the copyright notice above, or as used to identify 
+ * MFMER as the author of this software, the trade names, trademarks, service
+ * marks, or product names of the copyright holder shall not be used in
+ * advertising, promotion or otherwise in connection with this software without
+ * prior written authorization of the copyright holder.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0 
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and 
+ * limitations under the License. 
+ */
+package org.apache.ctakes.assertion.attributes.subject;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.ctakes.dependency.parser.util.DependencyPath;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Predicate;
+import org.apache.ctakes.typesystem.type.textsem.SemanticArgument;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.uimafit.util.JCasUtil;
+
+
+/**
+ * @author stephenwu
+ *
+ */
+public class SubjectAttributeClassifier {
+
+	public static final String DONOR_TOKEN = "donor_token"; 
+	public static final String DONOR_SRLARG = "donor_srlarg";
+	public static final String DONOR_DEPPATH = "donor_deppath";
+	public static final String DONOR_DEPTOK = "donor_depsrl";
+	public static final String DONOR_OR = "donor_or";
+	public static final String FAMILY_TOKEN = "family_token"; 
+	public static final String FAMILY_SRLARG = "family_srlarg";
+	public static final String FAMILY_DEPPATH = "family_deppath";
+	public static final String FAMILY_DEPTOK = "family_depsrl";
+	public static final String FAMILY_OR = "family_or";
+	public static final String OTHER_TOKEN = "other_token"; 
+	public static final String OTHER_SRLARG = "other_srlarg"; 
+	public static final String OTHER_DEPPATH = "other_deppath"; 
+	public static final String OTHER_DEPTOK = "other_depsrl";
+	public static final String OTHER_OR = "other_or";
+    public static ArrayList<String> FeatureIndex = new ArrayList<String>();
+    static{
+            FeatureIndex.add(DONOR_TOKEN);
+            FeatureIndex.add(DONOR_SRLARG);
+            FeatureIndex.add(DONOR_DEPPATH);
+            FeatureIndex.add(DONOR_DEPTOK);
+            FeatureIndex.add(DONOR_OR);
+            FeatureIndex.add(FAMILY_TOKEN);
+            FeatureIndex.add(FAMILY_SRLARG);
+            FeatureIndex.add(FAMILY_DEPPATH);
+            FeatureIndex.add(FAMILY_DEPTOK);
+            FeatureIndex.add(FAMILY_OR);
+            FeatureIndex.add(OTHER_TOKEN);
+            FeatureIndex.add(OTHER_SRLARG);
+            FeatureIndex.add(OTHER_DEPPATH);
+            FeatureIndex.add(OTHER_DEPTOK);
+            FeatureIndex.add(OTHER_OR);
+    }
+
+	// currently goes from entityMention to Sentence to SemanticArgument
+	public static String getSubject(JCas jCas, IdentifiedAnnotation mention) {
+		
+		// Extract the stuff into features
+		HashMap<String, Boolean> vfeat = extract(jCas, mention);
+		
+		// Logic to identify cases, may be replaced by learned classification
+		return classifyWithLogic(vfeat);
+			
+	}
+
+
+	public static HashMap<String, Boolean> extract(JCas jCas,
+			Annotation mention) {
+		HashMap<String,Boolean> vfeat = new HashMap<String,Boolean>();
+		for (String feat : FeatureIndex) {
+			vfeat.put(feat, false);
+		}
+		
+		// find the sentence that entityMention is in
+		Sentence sEntity = null;
+		Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
+		for (Sentence s : sentences) {
+			if ( s.getBegin()<=mention.getBegin() && s.getEnd()>=mention.getEnd()) {
+				sEntity = s;
+				break;
+			}
+		}
+//		if (sEntity==null)
+//			return null;
+				
+		// get any SRL arguments
+		List<SemanticArgument> args = JCasUtil.selectCovered(jCas, SemanticArgument.class, sEntity);
+		for (SemanticArgument arg : args) {
+			
+			// look in SRL arguments for a family or other subject 
+			if (arg.getLabel().matches("A[01]")) {
+				if ( isDonorTerm(arg) ) {
+					vfeat.put(DONOR_SRLARG, true);
+				}
+				if ( isFamilyTerm(arg) ) {
+					vfeat.put(FAMILY_SRLARG, true);
+				}
+				if ( isOtherTerm(arg) ) {
+					vfeat.put(OTHER_SRLARG, true);
+				}
+			}
+
+		}
+
+		// get any SRL predicates
+		List<Predicate> preds = JCasUtil.selectCovered(jCas, Predicate.class, sEntity);
+
+		
+		// search dependency paths for stuff
+		List<ConllDependencyNode> depnodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class, mention);
+		if (!depnodes.isEmpty()) {
+			ConllDependencyNode depnode = DependencyUtility.getNominalHeadNode(depnodes);
+			for (ConllDependencyNode dn : DependencyUtility.getPathToTop(jCas, depnode)) {
+				if ( isDonorTerm(dn) ) {
+					vfeat.put(DONOR_DEPPATH, true);
+				}
+				if ( isFamilyTerm(dn) ) {
+					vfeat.put(FAMILY_DEPPATH, true);
+				}
+				if ( isOtherTerm(dn) ) {
+					vfeat.put(OTHER_DEPPATH, true);
+				}
+				
+			}
+		}
+
+		// look for mentions of "donor" in the tokens
+		List<BaseToken> toks = JCasUtil.selectCovered(jCas, BaseToken.class, sEntity);
+		for (BaseToken tok : toks) {
+			
+			if ( isDonorTerm(tok) ) {
+				vfeat.put(DONOR_TOKEN, true);
+				
+				// check if there are one-removed dependencies on the dependency path
+				DependencyPath path = DependencyUtility.getPath(jCas, DependencyUtility.getNominalHeadNode(jCas,tok), 
+						DependencyUtility.getNominalHeadNode(jCas,mention));
+				int commonInd = path.indexOf(path.getCommonNode());
+				if (commonInd==1 || commonInd==path.size()-2) {
+					vfeat.put(DONOR_DEPTOK, true);
+				}
+			}
+			if ( isFamilyTerm(tok) ) {
+				vfeat.put(FAMILY_TOKEN, true);
+
+				// check if there are one-removed dependencies on the dependency path
+				DependencyPath path = DependencyUtility.getPath(jCas, DependencyUtility.getNominalHeadNode(jCas,tok), 
+						DependencyUtility.getNominalHeadNode(jCas,mention));
+				int commonInd = path.indexOf(path.getCommonNode());
+				if (commonInd==1 || commonInd==path.size()-2) {
+					vfeat.put(FAMILY_DEPTOK, true);
+				}
+			}
+			
+			if ( isOtherTerm(tok) ) {
+				vfeat.put(OTHER_TOKEN, true);
+
+				// check if there are one-removed dependencies on the dependency path
+				DependencyPath path = DependencyUtility.getPath(jCas, DependencyUtility.getNominalHeadNode(jCas,tok), 
+						DependencyUtility.getNominalHeadNode(jCas,mention));
+				int commonInd = path.indexOf(path.getCommonNode());
+				if (commonInd==1 || commonInd==path.size()-2) {
+					vfeat.put(OTHER_DEPTOK, true);
+				}
+			}
+		}
+		return vfeat;
+	}
+	
+	
+	public static String classifyWithLogic(HashMap<String, Boolean> vfeat) {
+		Boolean donor_summary = new Boolean(vfeat.get(DONOR_TOKEN) || vfeat.get(DONOR_DEPPATH) || 
+				vfeat.get(DONOR_DEPTOK) || vfeat.get(DONOR_SRLARG));
+		Boolean family_summary = new Boolean(                         vfeat.get(FAMILY_DEPPATH) || 
+				vfeat.get(FAMILY_DEPTOK) || vfeat.get(FAMILY_SRLARG));
+		Boolean other_summary = new Boolean(                          vfeat.get(OTHER_DEPPATH) || 
+				vfeat.get(OTHER_DEPTOK) || vfeat.get(OTHER_SRLARG));
+		vfeat.put(DONOR_OR, donor_summary);
+		vfeat.put(FAMILY_OR, family_summary);
+		vfeat.put(OTHER_OR, other_summary);
+		
+		if (vfeat.get(DONOR_OR) && vfeat.get(FAMILY_OR)) {
+			return CONST.ATTR_SUBJECT_DONOR_FAMILY_MEMBER;
+		} else if (vfeat.get(DONOR_OR) && !vfeat.get(FAMILY_OR)) {
+			return CONST.ATTR_SUBJECT_DONOR_OTHER;
+		} else if (!vfeat.get(DONOR_OR) && !vfeat.get(FAMILY_OR) && vfeat.get(OTHER_OR)) {
+			return CONST.ATTR_SUBJECT_OTHER;
+		} else if (!vfeat.get(DONOR_OR) && vfeat.get(FAMILY_OR)) {
+			return (CONST.ATTR_SUBJECT_FAMILY_MEMBER);
+		} else {
+			return CONST.ATTR_SUBJECT_PATIENT;
+		}
+	}
+
+
+	public static boolean isDonorTerm(Annotation arg) {
+		return arg.getCoveredText().toLowerCase()
+		.matches("(donor).*");
+	}
+
+	
+	public static boolean isFamilyTerm(Annotation arg) {
+		return arg.getCoveredText().toLowerCase()
+		.matches("(father|dad|mother|mom|bro|sis|sib|cousin|aunt|uncle|grandm|grandp|grandf|" +
+				"wife|spouse|husband|child|offspring|progeny|son|daughter|nephew|niece|kin|family).*");
+	}
+
+
+	public static boolean isOtherTerm(Annotation arg) {
+		return arg.getCoveredText().toLowerCase()
+		.matches(".*(in-law|stepc|stepd|stepso|stepf|stepm|step-).*");
+	}
+
+
+	// a main method for regex testing
+	public static void main(String[] args) {
+		String s = "steps";
+		if (s.toLowerCase().matches(".*(in-law|stepc|stepd|stepso|stepf|stepm|step-).*")) {
+			System.out.println("match");
+		} else {
+			System.out.println("no match");
+		}
+	}
+}