You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sw...@apache.org on 2013/01/16 06:15:12 UTC
svn commit: r1433829 -
/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java
Author: swu
Date: Wed Jan 16 05:15:12 2013
New Revision: 1433829
URL: http://svn.apache.org/viewvc?rev=1433829&view=rev
Log:
added back in subjectattributeclassifier, which is key
Added:
incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java
Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java?rev=1433829&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java (added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java Wed Jan 16 05:15:12 2013
@@ -0,0 +1,253 @@
+/*
+ * Copyright: (c) 2012 Mayo Foundation for Medical Education and
+ * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
+ * triple-shield Mayo logo are trademarks and service marks of MFMER.
+ *
+ * Except as contained in the copyright notice above, or as used to identify
+ * MFMER as the author of this software, the trade names, trademarks, service
+ * marks, or product names of the copyright holder shall not be used in
+ * advertising, promotion or otherwise in connection with this software without
+ * prior written authorization of the copyright holder.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ctakes.assertion.attributes.subject;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.ctakes.dependency.parser.util.DependencyPath;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Predicate;
+import org.apache.ctakes.typesystem.type.textsem.SemanticArgument;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.uimafit.util.JCasUtil;
+
+
+/**
+ * @author stephenwu
+ *
+ */
+public class SubjectAttributeClassifier {
+
+ public static final String DONOR_TOKEN = "donor_token";
+ public static final String DONOR_SRLARG = "donor_srlarg";
+ public static final String DONOR_DEPPATH = "donor_deppath";
+ public static final String DONOR_DEPTOK = "donor_depsrl";
+ public static final String DONOR_OR = "donor_or";
+ public static final String FAMILY_TOKEN = "family_token";
+ public static final String FAMILY_SRLARG = "family_srlarg";
+ public static final String FAMILY_DEPPATH = "family_deppath";
+ public static final String FAMILY_DEPTOK = "family_depsrl";
+ public static final String FAMILY_OR = "family_or";
+ public static final String OTHER_TOKEN = "other_token";
+ public static final String OTHER_SRLARG = "other_srlarg";
+ public static final String OTHER_DEPPATH = "other_deppath";
+ public static final String OTHER_DEPTOK = "other_depsrl";
+ public static final String OTHER_OR = "other_or";
+ public static ArrayList<String> FeatureIndex = new ArrayList<String>();
+ static{
+ FeatureIndex.add(DONOR_TOKEN);
+ FeatureIndex.add(DONOR_SRLARG);
+ FeatureIndex.add(DONOR_DEPPATH);
+ FeatureIndex.add(DONOR_DEPTOK);
+ FeatureIndex.add(DONOR_OR);
+ FeatureIndex.add(FAMILY_TOKEN);
+ FeatureIndex.add(FAMILY_SRLARG);
+ FeatureIndex.add(FAMILY_DEPPATH);
+ FeatureIndex.add(FAMILY_DEPTOK);
+ FeatureIndex.add(FAMILY_OR);
+ FeatureIndex.add(OTHER_TOKEN);
+ FeatureIndex.add(OTHER_SRLARG);
+ FeatureIndex.add(OTHER_DEPPATH);
+ FeatureIndex.add(OTHER_DEPTOK);
+ FeatureIndex.add(OTHER_OR);
+ }
+
+ // currently goes from entityMention to Sentence to SemanticArgument
+ public static String getSubject(JCas jCas, IdentifiedAnnotation mention) {
+
+ // Extract the stuff into features
+ HashMap<String, Boolean> vfeat = extract(jCas, mention);
+
+ // Logic to identify cases, may be replaced by learned classification
+ return classifyWithLogic(vfeat);
+
+ }
+
+
+ public static HashMap<String, Boolean> extract(JCas jCas,
+ Annotation mention) {
+ HashMap<String,Boolean> vfeat = new HashMap<String,Boolean>();
+ for (String feat : FeatureIndex) {
+ vfeat.put(feat, false);
+ }
+
+ // find the sentence that entityMention is in
+ Sentence sEntity = null;
+ Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
+ for (Sentence s : sentences) {
+ if ( s.getBegin()<=mention.getBegin() && s.getEnd()>=mention.getEnd()) {
+ sEntity = s;
+ break;
+ }
+ }
+// if (sEntity==null)
+// return null;
+
+ // get any SRL arguments
+ List<SemanticArgument> args = JCasUtil.selectCovered(jCas, SemanticArgument.class, sEntity);
+ for (SemanticArgument arg : args) {
+
+ // look in SRL arguments for a family or other subject
+ if (arg.getLabel().matches("A[01]")) {
+ if ( isDonorTerm(arg) ) {
+ vfeat.put(DONOR_SRLARG, true);
+ }
+ if ( isFamilyTerm(arg) ) {
+ vfeat.put(FAMILY_SRLARG, true);
+ }
+ if ( isOtherTerm(arg) ) {
+ vfeat.put(OTHER_SRLARG, true);
+ }
+ }
+
+ }
+
+ // get any SRL predicates
+ List<Predicate> preds = JCasUtil.selectCovered(jCas, Predicate.class, sEntity);
+
+
+ // search dependency paths for stuff
+ List<ConllDependencyNode> depnodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class, mention);
+ if (!depnodes.isEmpty()) {
+ ConllDependencyNode depnode = DependencyUtility.getNominalHeadNode(depnodes);
+ for (ConllDependencyNode dn : DependencyUtility.getPathToTop(jCas, depnode)) {
+ if ( isDonorTerm(dn) ) {
+ vfeat.put(DONOR_DEPPATH, true);
+ }
+ if ( isFamilyTerm(dn) ) {
+ vfeat.put(FAMILY_DEPPATH, true);
+ }
+ if ( isOtherTerm(dn) ) {
+ vfeat.put(OTHER_DEPPATH, true);
+ }
+
+ }
+ }
+
+ // look for mentions of "donor" in the tokens
+ List<BaseToken> toks = JCasUtil.selectCovered(jCas, BaseToken.class, sEntity);
+ for (BaseToken tok : toks) {
+
+ if ( isDonorTerm(tok) ) {
+ vfeat.put(DONOR_TOKEN, true);
+
+ // check if there are one-removed dependencies on the dependency path
+ DependencyPath path = DependencyUtility.getPath(jCas, DependencyUtility.getNominalHeadNode(jCas,tok),
+ DependencyUtility.getNominalHeadNode(jCas,mention));
+ int commonInd = path.indexOf(path.getCommonNode());
+ if (commonInd==1 || commonInd==path.size()-2) {
+ vfeat.put(DONOR_DEPTOK, true);
+ }
+ }
+ if ( isFamilyTerm(tok) ) {
+ vfeat.put(FAMILY_TOKEN, true);
+
+ // check if there are one-removed dependencies on the dependency path
+ DependencyPath path = DependencyUtility.getPath(jCas, DependencyUtility.getNominalHeadNode(jCas,tok),
+ DependencyUtility.getNominalHeadNode(jCas,mention));
+ int commonInd = path.indexOf(path.getCommonNode());
+ if (commonInd==1 || commonInd==path.size()-2) {
+ vfeat.put(FAMILY_DEPTOK, true);
+ }
+ }
+
+ if ( isOtherTerm(tok) ) {
+ vfeat.put(OTHER_TOKEN, true);
+
+ // check if there are one-removed dependencies on the dependency path
+ DependencyPath path = DependencyUtility.getPath(jCas, DependencyUtility.getNominalHeadNode(jCas,tok),
+ DependencyUtility.getNominalHeadNode(jCas,mention));
+ int commonInd = path.indexOf(path.getCommonNode());
+ if (commonInd==1 || commonInd==path.size()-2) {
+ vfeat.put(OTHER_DEPTOK, true);
+ }
+ }
+ }
+ return vfeat;
+ }
+
+
+ public static String classifyWithLogic(HashMap<String, Boolean> vfeat) {
+ Boolean donor_summary = new Boolean(vfeat.get(DONOR_TOKEN) || vfeat.get(DONOR_DEPPATH) ||
+ vfeat.get(DONOR_DEPTOK) || vfeat.get(DONOR_SRLARG));
+ Boolean family_summary = new Boolean( vfeat.get(FAMILY_DEPPATH) ||
+ vfeat.get(FAMILY_DEPTOK) || vfeat.get(FAMILY_SRLARG));
+ Boolean other_summary = new Boolean( vfeat.get(OTHER_DEPPATH) ||
+ vfeat.get(OTHER_DEPTOK) || vfeat.get(OTHER_SRLARG));
+ vfeat.put(DONOR_OR, donor_summary);
+ vfeat.put(FAMILY_OR, family_summary);
+ vfeat.put(OTHER_OR, other_summary);
+
+ if (vfeat.get(DONOR_OR) && vfeat.get(FAMILY_OR)) {
+ return CONST.ATTR_SUBJECT_DONOR_FAMILY_MEMBER;
+ } else if (vfeat.get(DONOR_OR) && !vfeat.get(FAMILY_OR)) {
+ return CONST.ATTR_SUBJECT_DONOR_OTHER;
+ } else if (!vfeat.get(DONOR_OR) && !vfeat.get(FAMILY_OR) && vfeat.get(OTHER_OR)) {
+ return CONST.ATTR_SUBJECT_OTHER;
+ } else if (!vfeat.get(DONOR_OR) && vfeat.get(FAMILY_OR)) {
+ return (CONST.ATTR_SUBJECT_FAMILY_MEMBER);
+ } else {
+ return CONST.ATTR_SUBJECT_PATIENT;
+ }
+ }
+
+
+ public static boolean isDonorTerm(Annotation arg) {
+ return arg.getCoveredText().toLowerCase()
+ .matches("(donor).*");
+ }
+
+
+ public static boolean isFamilyTerm(Annotation arg) {
+ return arg.getCoveredText().toLowerCase()
+ .matches("(father|dad|mother|mom|bro|sis|sib|cousin|aunt|uncle|grandm|grandp|grandf|" +
+ "wife|spouse|husband|child|offspring|progeny|son|daughter|nephew|niece|kin|family).*");
+ }
+
+
+ public static boolean isOtherTerm(Annotation arg) {
+ return arg.getCoveredText().toLowerCase()
+ .matches(".*(in-law|stepc|stepd|stepso|stepf|stepm|step-).*");
+ }
+
+
+ // a main method for regex testing
+ public static void main(String[] args) {
+ String s = "steps";
+ if (s.toLowerCase().matches(".*(in-law|stepc|stepd|stepso|stepf|stepm|step-).*")) {
+ System.out.println("match");
+ } else {
+ System.out.println("no match");
+ }
+ }
+}