You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by sh...@apache.org on 2013/09/06 19:00:44 UTC
svn commit: r1520631 - in /ctakes/trunk:
ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/
ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/
ctakes-assertion/src/main/java/org/apache/ctakes/assertion/att...
Author: shalgrim
Date: Fri Sep 6 17:00:43 2013
New Revision: 1520631
URL: http://svn.apache.org/r1520631
Log:
Added post as cue word in historyOf.psv
Added In History Sections feature
Added:
ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/historyOf.psv
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/HistoryAttributeClassifier.java
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java
Added: ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/historyOf.psv
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/historyOf.psv?rev=1520631&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/historyOf.psv (added)
+++ ctakes/trunk/ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/historyOf.psv Fri Sep 6 17:00:43 2013
@@ -0,0 +1,11 @@
+has been|historyOf|historyOf
+had|historyOf|historyOf
+hx of recent|historyOf|historyOf
+hx of|historyOf|historyOf
+hx|historyOf|historyOf
+recent|historyOf|historyOf
+s/p|historyOf|historyOf
+status post|historyOf|historyOf
+post|historyOf|historyOf
+previous|historyOf|historyOf
+prior|historyOf|historyOf
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java?rev=1520631&r1=1520630&r2=1520631&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java Fri Sep 6 17:00:43 2013
@@ -63,9 +63,12 @@ public class HistoryFeaturesExtractor im
// Pull in all the features that were used for the rule-based module
features.addAll( hashToFeatureList(featsMap) );
+
// Pull in the result of the rule-based module as well
features.add(new Feature("HISTORY_CLASSIFIER_LOGIC", HistoryAttributeClassifier.classifyWithLogic(featsMap)));
-
+
+ // Add whether it is token preceded by "h/o"
+ //features.add(new Feature("PRECEDED_BY_H_SLASH_O", HistoryAttributeClassifier.precededByH_O(jCas, arg)));
return features;
}
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/HistoryAttributeClassifier.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/HistoryAttributeClassifier.java?rev=1520631&r1=1520630&r2=1520631&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/HistoryAttributeClassifier.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/HistoryAttributeClassifier.java Fri Sep 6 17:00:43 2013
@@ -21,6 +21,8 @@ package org.apache.ctakes.assertion.attr
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
@@ -29,6 +31,7 @@ import java.util.regex.Pattern;
import org.apache.uima.UIMAException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.jcas.tcas.DocumentAnnotation;
import org.uimafit.util.JCasUtil;
import org.xml.sax.SAXException;
@@ -53,7 +56,8 @@ public class HistoryAttributeClassifier
private static final String POSTCOORD_NMOD = "donor_srlarg";
private static final String DISCUSSION_DEPPATH = "discussion_deppath";
private static final String SUBSUMED_CHUNK = "other_token";
- private static final String SUBSUMED_ANNOT = "other_deppath";
+ private static final String SUBSUMED_ANNOT = "other_deppath";
+ private static final String IN_HIST_SECTION = "in_history_section";
public static ArrayList<String> FeatureIndex = new ArrayList<String>();
static{
FeatureIndex.add(POSTCOORD_NMOD);
@@ -61,6 +65,19 @@ public class HistoryAttributeClassifier
FeatureIndex.add(SUBSUMED_CHUNK);
FeatureIndex.add(SUBSUMED_ANNOT);
}
+
+ private static final String[] GHC_HIST_SECTIONS =
+ {
+ "fh",
+ "sh",
+ //"hpi", // based on 8/30 review of errors, this is a pretty lousy indicator of history
+ "pmh", // missed a bunch of these in 8/20 run reviewed on 8/30. am i forgetting lowercase?
+ "psh",
+ "social history:",
+ "family history",
+ "past medical history",
+ "pmh/psh" // missed a bunch of these in 8/20 run reviewed on 8/30. am i forgetting lowercase?
+ };
// currently goes from entityMention to Sentence to SemanticArgument
public static Boolean getHIstory(JCas jCas, IdentifiedAnnotation mention) {
@@ -96,6 +113,49 @@ public class HistoryAttributeClassifier
}
return false;
}
+
+ /*
+ * SRH adding 8/19/13
+ * Idea is that I want to know if I am in a "sentence" that starts with
+ * a GH history section name.
+ * There's some work to be done here.
+ * Let's define paragraphs as what's delimited by \n in GH docs
+ * Then we can define these sections as I've seen them by what's in a
+ * paragraph.
+ * But a paragraph may have more than one sentence in it.
+ * So I have to actually not find the first part of the sentence that
+ * contains the thing, but the paragraph.
+ * So actually I have to start from the sentence and search backwards
+ * for a newline.
+ * So what's written below works (untested/unerified) in the case that I have
+ * the starting sentence of a paragraph.
+ * But I still have to find that first sentence.
+ */
+ private static boolean isInHistSection(Sentence s) {
+ String sText = s.getCoveredText();
+
+ for (String secStart : GHC_HIST_SECTIONS)
+ {
+ int slen = secStart.length();
+
+ if (sText.length() >= slen)
+ {
+ String sentStart = sText.trim().substring(0, slen).toLowerCase();
+ if (sentStart.equals(secStart))
+ {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ public static class SentComparator implements Comparator<Sentence> {
+ public int compare(Sentence s1, Sentence s2) {
+ return s1.getBegin() - s2.getBegin();
+ }
+ }
public static HashMap<String, Boolean> extract(JCas jCas,
@@ -114,11 +174,89 @@ public class HistoryAttributeClassifier
break;
}
}
-// if (sEntity==null)
-// return null;
+
+ DocumentAnnotation docAnnot = null;
+
+ Collection<DocumentAnnotation> docAnnots =
+ JCasUtil.select(jCas, DocumentAnnotation.class);
+
+ if (!docAnnots.isEmpty())
+ {
+ Object[] docAnnotArray = docAnnots.toArray();
+ docAnnot = (DocumentAnnotation) docAnnotArray[0];
+ }
if (sEntity!=null) {
+ // but I actually need to find out if this sentence is preceded by
+ // a newline or if I have to find the preceding one that does.
+ if (docAnnot != null)
+ {
+ String doctext = docAnnot.getCoveredText();
+ int sentStart = sEntity.getBegin();
+
+ if (sentStart > 0)
+ {
+ boolean argInHistSection = false;
+
+ // TOFIX:
+ // What I have to do here is not get the preceding char
+ // but rather get the chars in between prior sent and this
+ // and then check for newline therein.
+ String precedingChar =
+ doctext.substring(sentStart-1, sentStart);
+
+ // sort the sentences
+ // TODO: make it so you don't sort every time for same sentence.
+ ArrayList<Sentence> sentList = new ArrayList<Sentence>(sentences);
+ Collections.sort(sentList, new SentComparator());
+
+ // get index of sEntity
+ int currind = sentList.indexOf(sEntity);
+
+ if (currind == 0) {
+ argInHistSection = isInHistSection(sEntity);
+ } else {
+ currind--;
+ Sentence prevSent = sentList.get(currind);
+ String tweenSents = doctext.substring(prevSent.getEnd(), sentStart);
+ if (tweenSents.indexOf("\n") != -1) {
+ // there is a newline between this sentence and prior sentence
+ argInHistSection = isInHistSection(sEntity);
+ } else if (currind == 0) {
+ argInHistSection = isInHistSection(prevSent);
+ } else {
+ while (currind > 0) {
+ Sentence currSent = prevSent;
+ currind--;
+ prevSent = sentList.get(currind);
+
+ sentStart = currSent.getBegin();
+ int prevSentEnd = prevSent.getEnd();
+
+ try {
+ tweenSents = doctext.substring(prevSentEnd, sentStart);
+ } catch (StringIndexOutOfBoundsException e) {
+ System.err.println("wtf");
+ }
+
+ if (tweenSents.indexOf("\n") != -1 || currind == 0) {
+ argInHistSection = isInHistSection(currSent);
+ break;
+ } else if (currind == 0) {
+ argInHistSection = isInHistSection(prevSent);
+ break;
+ }
+ }
+ }
+ }
+
+ // and here do something with argInHistSection.
+ // ie, create the feature
+ vfeat.put(IN_HIST_SECTION, argInHistSection);
+ }
+
+ }
// 2) some other identified annotation subsumes this one?
List<IdentifiedAnnotation> lsmentions = JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, arg.getBegin(), arg.getEnd());
@@ -157,15 +295,14 @@ public class HistoryAttributeClassifier
for (Chunk chunk : lschunks) {
if ( chunk.getBegin()>arg.getBegin()) {
break;
- } else {
- if ( chunk.getEnd()<arg.getEnd()) {
- continue;
- } else if ( !DependencyUtility.equalCoverage(
- DependencyUtility.getNominalHeadNode(jCas, chunk),
- DependencyUtility.getNominalHeadNode(jCas, arg)) ) {
- // the case that annot is a superset
- vfeat.put(SUBSUMED_CHUNK, true);
- }
+ }
+ if ( chunk.getEnd()<arg.getEnd()) {
+ continue;
+ } else if ( !DependencyUtility.equalCoverage(
+ DependencyUtility.getNominalHeadNode(jCas, chunk),
+ DependencyUtility.getNominalHeadNode(jCas, arg)) ) {
+ // the case that annot is a superset
+ vfeat.put(SUBSUMED_CHUNK, true);
}
}
}
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java?rev=1520631&r1=1520630&r2=1520631&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/subject/SubjectAttributeClassifier.java Fri Sep 6 17:00:43 2013
@@ -211,7 +211,6 @@ public class SubjectAttributeClassifier
return vfeat;
}
-
public static String classifyWithLogic(HashMap<String, Boolean> vfeat) {
if (vfeat==null) {