You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2019/01/08 03:45:51 UTC
svn commit: r1850705 [2/2] - in
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed:
./ classifier/ context/ context/feature/ context/feature/extractor/
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/classifier/WindowedHistoryAttributeClassifier.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/classifier/WindowedHistoryAttributeClassifier.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/classifier/WindowedHistoryAttributeClassifier.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/classifier/WindowedHistoryAttributeClassifier.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,340 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.classifier;
+
+
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.Chunk;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.jcas.tcas.DocumentAnnotation;
+
+import java.util.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedHistoryAttributeClassifier {
+
+ private static final String POSTCOORD_NMOD = "donor_srlarg";
+ private static final String DISCUSSION_DEPPATH = "discussion_deppath";
+ private static final String SUBSUMED_CHUNK = "other_token";
+ private static final String SUBSUMED_ANNOT = "other_deppath";
+ private static final String IN_HIST_SECTION = "in_history_section";
+ public static ArrayList<String> FeatureIndex = new ArrayList<String>();
+
+ static {
+ FeatureIndex.add( POSTCOORD_NMOD );
+ FeatureIndex.add( DISCUSSION_DEPPATH );
+ FeatureIndex.add( SUBSUMED_CHUNK );
+ FeatureIndex.add( SUBSUMED_ANNOT );
+ }
+
+ // Only enter lower-case strings here for comparison with sentences in isInHistSection
+ private static final String[] GHC_HIST_SECTIONS =
+ {
+ "fh",
+ "sh",
+ //"hpi", // based on 8/30 review of errors, this is a pretty lousy indicator of history
+ "pmh", // missed a bunch of these in 8/20 run reviewed on 8/30. am i forgetting lowercase?
+ "psh",
+ "social history:",
+ "family history",
+ "past medical history",
+ "pmh/psh" // missed a bunch of these in 8/20 run reviewed on 8/30. am i forgetting lowercase?
+ };
+
+ // currently goes from entityMention to Sentence to SemanticArgument
+ public static Boolean getHistory( JCas jCas,
+ final List<Sentence> sentences,
+ final Sentence sentence,
+ final int sentenceIndex,
+ IdentifiedAnnotation mention ) {
+
+ HashMap<String, Boolean> vfeat = extract( jCas, sentences, sentence, sentenceIndex, mention );
+
+ return classifyWithLogic( vfeat );
+
+ }
+
+ /**
+ * @param jCas - the jcas of the document
+ * @param arg - the node getting features added to it
+ * @return whether or not arg is a token preceded by "h/o"
+ */
+ public static Boolean precededByH_O( JCas jCas, Annotation arg ) {
+ Boolean answer = false;
+
+ return answer;
+ }
+
+
+ public static Boolean classifyWithLogic( HashMap<String, Boolean> vfeat ) {
+ // Logic to identify cases, may be replaced by learned classification
+ int subsumectr = 0;
+ if ( vfeat.get( SUBSUMED_CHUNK ) ) {
+ } //subsumectr++; }
+ if ( vfeat.get( SUBSUMED_ANNOT ) ) {
+ subsumectr++;
+ }
+ if ( vfeat.get( POSTCOORD_NMOD ) ) {
+ subsumectr++;
+ }
+ Boolean subsume_summary = (subsumectr > 0);
+ if ( vfeat.get( DISCUSSION_DEPPATH ) || subsume_summary ) {
+ return true;
+ }
+ return false;
+ }
+
+ /*
+ * SRH adding 8/19/13
+ * Idea is that I want to know if I am in a "sentence" that starts with
+ * a GH history section name.
+ * There's some work to be done here.
+ * Let's define paragraphs as what's delimited by \n in GH docs
+ * Then we can define these sections as I've seen them by what's in a
+ * paragraph.
+ * But a paragraph may have more than one sentence in it.
+ * So I have to actually not find the first part of the sentence that
+ * contains the thing, but the paragraph.
+ * So actually I have to start from the sentence and search backwards
+ * for a newline.
+ * So what's written below works (untested/unerified) in the case that I have
+ * the starting sentence of a paragraph.
+ * But I still have to find that first sentence.
+ */
+ private static boolean isInHistSection( Sentence s ) {
+ // We want to trim the covered text before attempting substring, otherwise the substring call indices can be out of bounds
+ String sText = s.getCoveredText().trim();
+
+ for ( String secStart : GHC_HIST_SECTIONS ) {
+ int slen = secStart.length();
+
+ if ( sText.length() >= slen ) {
+ String sentStart = sText.substring( 0, slen ).toLowerCase();
+ if ( sentStart.equals( secStart ) ) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+
+ /*
+ * This comparator compares two Annotations for location for purposes of
+ * sorting. Annotations are equal in location iff begin and end locations are equal.
+ * Otherwise, the annotation that has the earlier begin sorts above later begin.
+ * If begins are equal but ends are not, then that with earlier end sorts higher.
+ */
+ public static class SpanComparator implements Comparator<Annotation> {
+ public int compare( Annotation a1, Annotation a2 ) {
+ final int bDistance = a1.getBegin() - a2.getBegin();
+ if ( bDistance != 0 ) {
+ return bDistance;
+ }
+ return a1.getEnd() - a2.getEnd();
+ }
+ }
+
+
+ public static HashMap<String, Boolean> extract( JCas jCas,
+ final List<Sentence> sentences,
+ final Sentence sentence,
+ final int sentenceIndex,
+ Annotation arg ) {
+ final SpanComparator spanComparator = new SpanComparator();
+ HashMap<String, Boolean> vfeat = new HashMap<>();
+ for ( String feat : FeatureIndex ) {
+ vfeat.put( feat, false );
+ }
+
+ Sentence sEntity = sentence;
+
+ DocumentAnnotation docAnnot = null;
+
+ Collection<DocumentAnnotation> docAnnots =
+ JCasUtil.select( jCas, DocumentAnnotation.class );
+
+ if ( !docAnnots.isEmpty() ) {
+ Object[] docAnnotArray = docAnnots.toArray();
+ docAnnot = (DocumentAnnotation)docAnnotArray[ 0 ];
+ }
+
+ if ( sEntity != null ) {
+
+ // but I actually need to find out if this sentence is preceded by
+ // a newline or if I have to find the preceding one that does.
+ if ( docAnnot != null ) {
+ String doctext = docAnnot.getCoveredText();
+ int sentStart = sEntity.getBegin();
+
+ if ( sentStart > 0 ) {
+ boolean argInHistSection = false;
+
+ List<Sentence> sentList = sentences;
+
+ // get index of sEntity
+ int currind = sentenceIndex;
+
+ if ( currind == 0 ) {
+ argInHistSection = isInHistSection( sEntity );
+ } else {
+ currind--;
+ Sentence prevSent = sentList.get( currind );
+ String tweenSents = "";
+ try {
+ tweenSents = doctext.substring( prevSent.getEnd(), sentStart );
+ } catch ( IndexOutOfBoundsException e ) {
+ // this is of no consequence
+ tweenSents = "";
+ }
+
+ if ( tweenSents.indexOf( "\n" ) != -1 ) {
+ // there is a newline between this sentence and prior sentence
+ argInHistSection = isInHistSection( sEntity );
+ } else if ( currind == 0 ) {
+ argInHistSection = isInHistSection( prevSent );
+ } else {
+ while ( currind > 0 ) {
+ Sentence currSent = prevSent;
+ currind--;
+ prevSent = sentList.get( currind );
+
+ sentStart = currSent.getBegin();
+ int prevSentEnd = prevSent.getEnd();
+
+ try {
+ tweenSents = doctext.substring( prevSentEnd, sentStart );
+ } catch ( StringIndexOutOfBoundsException e ) {
+ tweenSents = "";
+ }
+
+ if ( tweenSents.indexOf( "\n" ) != -1 || currind == 0 ) {
+ argInHistSection = isInHistSection( currSent );
+ break;
+ } else if ( currind == 0 ) {
+ argInHistSection = isInHistSection( prevSent );
+ break;
+ }
+ }
+ }
+ }
+
+ // and here do something with argInHistSection.
+ // ie, create the feature
+ vfeat.put( IN_HIST_SECTION, argInHistSection );
+ }
+
+ }
+
+ // 2) some other identified annotation subsumes this one?
+
+ // Get all IdentifiedAnnotations covering the boundaries of the
+ // annotation
+ List<IdentifiedAnnotation> lsmentions = JCasUtil.selectCovering( jCas,
+ IdentifiedAnnotation.class, arg.getBegin(),
+ arg.getEnd() );
+
+ Collections.sort( lsmentions, spanComparator );
+
+ // NB: arg is annotation input to this method. annot is current
+ // lsmentions in loop
+ for ( IdentifiedAnnotation annot : lsmentions ) {
+ if ( annot.getBegin() > arg.getBegin() ) {
+ // annot starts after our arg, so if ordered correctly(?)
+ // then I break b/c I won't find any more that cover arg
+ break;
+ }
+
+ // INVARIANT: arg starts at or after annot begins
+ if ( annot.getEnd() < arg.getEnd() ) {
+ // INVARIANT: arg ends at or after annot ends
+ continue;
+ } else if ( !DependencyUtility.equalCoverage(
+ DependencyUtility.getNominalHeadNode( jCas, annot ),
+ DependencyUtility.getNominalHeadNode( jCas, arg ) ) ) {
+ // INVARIANT: arg start at or before annot starts
+ // INVARIANT: arg ends at or before annot ends
+ // INVARIANT: ergo, arg falls within bounds of annot
+ // now verify that annot is an EventMention or EntityMention
+ if ( (annot instanceof EntityMention) || (annot instanceof EventMention) ) {
+ // annot has boundaries at or exceeding those of arg.
+ // They also have different head nodes (I guess)
+ // and annot is either an EntityMention of EventMention
+ vfeat.put( SUBSUMED_ANNOT, true );
+ break; // no reason to keep checking
+ }
+ }
+ }
+
+ // 3) some chunk subsumes this?
+ List<Chunk> lschunks = JCasUtil.selectPreceding( jCas, Chunk.class, arg, 5 );
+ lschunks.addAll( JCasUtil.selectFollowing( jCas, Chunk.class, arg, 5 ) );
+ for ( Chunk chunk : lschunks ) {
+ if ( chunk.getBegin() > arg.getBegin() ) {
+ break;
+ }
+ if ( chunk.getEnd() < arg.getEnd() ) {
+ continue;
+ } else if ( !DependencyUtility.equalCoverage(
+ DependencyUtility.getNominalHeadNode( jCas, chunk ),
+ DependencyUtility.getNominalHeadNode( jCas, arg ) ) ) {
+ // the case that annot is a superset
+ vfeat.put( SUBSUMED_CHUNK, true );
+ }
+ }
+ }
+
+
+ List<ConllDependencyNode> depnodes = JCasUtil.selectCovered( jCas, ConllDependencyNode.class, arg );
+ if ( !depnodes.isEmpty() ) {
+ ConllDependencyNode depnode = DependencyUtility.getNominalHeadNode( depnodes );
+
+ // 1) check if the head node of the entity mention is really just part of a larger noun phrase
+ if ( depnode.getDeprel().matches( "(NMOD|amod|nmod|det|predet|nn|poss|possessive|infmod|partmod|rcmod)" ) ) {
+ vfeat.put( POSTCOORD_NMOD, true );
+ }
+
+ // 4) search dependency paths for discussion context
+ for ( ConllDependencyNode dn : DependencyUtility.getPathToTop( jCas, depnode ) ) {
+ if ( isDiscussionContext( dn ) ) {
+ vfeat.put( DISCUSSION_DEPPATH, true );
+ }
+ }
+ }
+ return vfeat;
+ }
+
+
+ private static boolean isDonorTerm( Annotation arg ) {
+ return arg.getCoveredText().toLowerCase()
+ .matches( "(donor).*" );
+ }
+
+
+ private static boolean isDiscussionContext( Annotation arg ) {
+ return arg.getCoveredText().toLowerCase()
+ .matches( "(discuss|ask|understand|understood|tell|told|mention|talk|speak|spoke|address).*" );
+ }
+
+
+ // a main method for regex testing
+ public static void main( String[] args ) {
+ String s = "steps";
+ if ( s.toLowerCase().matches( ".*(in-law|stepc|stepd|stepso|stepf|stepm|step-).*" ) ) {
+ System.out.println( "match" );
+ } else {
+ System.out.println( "no match" );
+ }
+ }
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractLeftToRightContext.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractLeftToRightContext.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractLeftToRightContext.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractLeftToRightContext.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,62 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+
+import org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.WindowedContextFeature;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractor;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+import org.cleartk.ml.feature.extractor.NamedFeatureExtractor1;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+abstract public class AbstractLeftToRightContext extends AbstractWindowedContext {
+
+ public AbstractLeftToRightContext( int begin, int end ) {
+ super( begin, end );
+ }
+
+ public <SEARCH_T extends Annotation> List<Feature> extract( JCas jCas, Annotation focusAnnotation,
+ CleartkExtractor.Bounds bounds,
+ Class<SEARCH_T> annotationClass,
+ FeatureExtractor1<SEARCH_T> extractor ) throws
+ CleartkExtractorException {
+ String featureName = extractor instanceof NamedFeatureExtractor1
+ ? ((NamedFeatureExtractor1<SEARCH_T>)extractor).getFeatureName()
+ : null;
+ List<SEARCH_T> anns = this.select( jCas, focusAnnotation, annotationClass, this.end );
+ int oobStart;
+ if ( this.begin <= anns.size() ) {
+ oobStart = 1;
+ anns = anns.subList( this.begin, anns.size() );
+ } else {
+ oobStart = this.begin - anns.size() + 1;
+ anns = new ArrayList<>();
+ }
+ List<Feature> features = new ArrayList<>();
+ Iterator<SEARCH_T> iter = anns.iterator();
+ for ( int pos = this.begin, oobPos = oobStart; pos < this.end; pos += 1 ) {
+ SEARCH_T ann = iter.hasNext() ? iter.next() : null;
+ if ( ann != null && bounds.contains( ann ) ) {
+ for ( Feature feature : extractor.extract( jCas, ann ) ) {
+ features.add( new WindowedContextFeature( this.getName(), pos, feature ) );
+ }
+ } else {
+ features.add( new WindowedContextFeature( this.getName(), pos, oobPos, featureName ) );
+ oobPos += 1;
+ }
+ }
+ return features;
+ }
+
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractRightToLeftContext.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractRightToLeftContext.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractRightToLeftContext.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractRightToLeftContext.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,71 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+import org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.WindowedContextFeature;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractor;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+import org.cleartk.ml.feature.extractor.NamedFeatureExtractor1;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+abstract public class AbstractRightToLeftContext extends AbstractWindowedContext {
+
+ public AbstractRightToLeftContext( int begin, int end ) {
+ super( begin, end );
+ }
+
+ public <SEARCH_T extends Annotation> List<Feature> extract( JCas jCas, Annotation focusAnnotation,
+ CleartkExtractor.Bounds bounds,
+ Class<SEARCH_T> annotationClass,
+ FeatureExtractor1<SEARCH_T> extractor )
+ throws CleartkExtractorException {
+ String featureName = extractor instanceof NamedFeatureExtractor1
+ ? ((NamedFeatureExtractor1<SEARCH_T>)extractor).getFeatureName()
+ : null;
+
+ // slice the appropriate annotations from the CAS
+ List<SEARCH_T> anns = this.select( jCas, focusAnnotation, annotationClass, this.end );
+ int missing = this.end - anns.size();
+ anns = anns.subList( 0, Math.max( 0, anns.size() - this.begin ) );
+
+ // figure out how many items are out of bounds
+ int oobPos = missing;
+ for ( SEARCH_T ann : anns ) {
+ if ( !bounds.contains( ann ) ) {
+ oobPos += 1;
+ }
+ }
+
+ // extract features at each position
+ List<Feature> features = new ArrayList<Feature>();
+ for ( int pos = this.end - 1; pos >= this.begin; pos -= 1 ) {
+
+ // if the annotation at the current position is in bounds, extract features from it
+ int adjustedPos = this.end - 1 - pos - missing;
+ SEARCH_T ann = adjustedPos >= 0 ? anns.get( adjustedPos ) : null;
+ if ( ann != null && bounds.contains( ann ) ) {
+ for ( Feature feature : extractor.extract( jCas, ann ) ) {
+ features.add( new WindowedContextFeature( this.getName(), pos, feature ) );
+ }
+ }
+
+ // if the annotation at the current position is out of bounds, add an out-of-bounds feature
+ else {
+ features.add( new WindowedContextFeature( this.getName(), pos, oobPos, featureName ) );
+ oobPos -= 1;
+ }
+ }
+ return features;
+ }
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractWindowedContext.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractWindowedContext.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractWindowedContext.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractWindowedContext.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,67 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+abstract public class AbstractWindowedContext implements CleartkExtractor.Context {
+
+ protected int begin;
+ protected int end;
+ private String name;
+
+ protected List<Annotation> _windowCovered = new ArrayList<>();
+
+ public <T extends Annotation> void setWindow( final List<T> windowCovered ) {
+ _windowCovered.clear();
+ _windowCovered.addAll( windowCovered );
+ }
+
+
+ public AbstractWindowedContext( int begin, int end ) {
+ if ( begin > end ) {
+ String message = "expected begin < end, found begin=%d end=%d";
+ throw new IllegalArgumentException( String.format( message, begin, end ) );
+ } else {
+ this.begin = begin;
+ this.end = end;
+ this.name = Feature.createName( this.getClass().getSimpleName(),
+ String.valueOf( this.begin ), String.valueOf( this.end ) );
+ }
+ }
+
+ public String getName() {
+ return this.name;
+ }
+
+ protected <T extends Annotation> List<T> selectCovered( final Annotation covering, final Class<T> coveredClass ) {
+// System.out.print( "LastCoveredContext focusAnnotation "
+// + covering.getClass().getName() + " " + covering.getCoveredText()
+// + " want covering " + coveredClass.getName() );
+ final List<T> covered = new ArrayList<>();
+ for ( Annotation annotation : _windowCovered ) {
+ if ( coveredClass.isInstance( annotation )
+ && annotation.getBegin() >= covering.getBegin()
+ && annotation.getEnd() <= covering.getEnd() ) {
+// System.out.print( " Yes " + annotation.getClass().getName() + " " + annotation.getCoveredText() );
+ covered.add( (T)annotation );
+ }
+// System.out.print( " No " + annotation.getClass().getName() + " " + annotation.getCoveredText() );
+ }
+// System.out.println();
+ return covered;
+ }
+
+ protected abstract <T extends Annotation> List<T> select( JCas var1, Annotation var2, Class<T> var3, int var4 );
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/FollowingContext.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/FollowingContext.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/FollowingContext.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/FollowingContext.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,68 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A {@link org.cleartk.ml.feature.extractor.CleartkExtractor.Context} for extracting annotations appearing after the focus annotation.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class FollowingContext extends AbstractLeftToRightContext {
+
+ /**
+ * Constructs a context that will extract features over the following N annotations.
+ *
+ * @param end The number of annotations to extract.
+ */
+ public FollowingContext( int end ) {
+ super( 0, end );
+ }
+
+ /**
+ * Constructs a context that will extract features over a slice of the following N annotations.
+ * <p>
+ * The {@code begin} and {@code end} indexes count from 0, where index 0 identifies the
+ * annotation immediately following the focus annotation. If either index is greater than the
+ * index of the last possible annotation, special "out of bounds" features will be added for
+ * each annotation that was requested but absent.
+ *
+ * @param begin The index of the first annotation to include.
+ * @param end The index of the last annotation to include. Must be greater than {@code begin}.
+ */
+ public FollowingContext( int begin, int end ) {
+ super( begin, end );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ protected <T extends Annotation> List<T> select( final JCas jCas,
+ final Annotation focusAnnotation,
+ final Class<T> annotationClass,
+ final int count ) {
+
+// return JCasUtil.selectFollowing(jCas, annotationClass, focusAnnotation, count);
+
+ final List<T> following = new ArrayList<>( count );
+
+ for ( int i = 0; i < _windowCovered.size(); i++ ) {
+ if ( annotationClass.isInstance( _windowCovered.get( i ) )
+ && _windowCovered.get( i ).getBegin() >= focusAnnotation.getEnd() ) {
+ following.add( (T)_windowCovered.get( i ) );
+ if ( following.size() == count ) {
+ break;
+ }
+ }
+ }
+ return following;
+ }
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/LastCoveredContext.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/LastCoveredContext.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/LastCoveredContext.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/LastCoveredContext.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,32 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+
+import java.util.List;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class LastCoveredContext extends AbstractRightToLeftContext {
+ public LastCoveredContext( int end ) {
+ super( 0, end );
+ }
+
+ public LastCoveredContext( int begin, int end ) {
+ super( begin, end );
+ }
+
+
+ protected <T extends Annotation> List<T> select( final JCas jCas,
+ final Annotation focusAnnotation,
+ final Class<T> annotationClass,
+ final int count ) {
+ final List<T> annotations = selectCovered( focusAnnotation, annotationClass );
+ return annotations.subList( Math.max( annotations.size() - count, 0 ), annotations.size() );
+ }
+
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/PrecedingContext.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/PrecedingContext.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/PrecedingContext.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/PrecedingContext.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,45 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class PrecedingContext extends AbstractRightToLeftContext {
+
+ public PrecedingContext( int end ) {
+ super( 0, end );
+ }
+
+ public PrecedingContext( int begin, int end ) {
+ super( begin, end );
+ }
+
+ protected <T extends Annotation> List<T> select( final JCas jCas,
+ final Annotation focusAnnotation,
+ final Class<T> annotationClass,
+ final int count ) {
+ final List<T> preceding = new ArrayList<>( count );
+
+ for ( int i = _windowCovered.size() - 1; i >= 0; i-- ) {
+ if ( annotationClass.isInstance( _windowCovered.get( i ) )
+ && _windowCovered.get( i ).getEnd() <= focusAnnotation.getBegin() ) {
+ preceding.add( (T)_windowCovered.get( i ) );
+ if ( preceding.size() == count ) {
+ break;
+ }
+ }
+ }
+ Collections.reverse( preceding );
+ return preceding;
+ }
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/WindowedBag.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/WindowedBag.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/WindowedBag.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/WindowedBag.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,74 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+
+import org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.WindowedContextFeature;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractor;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * * A {@link CleartkExtractor.Context} that aggregates the features of other contexts into a "bag" where position
+ * * information of each individual feature is no longer maintained. Position information is not
+ * * entirely lost - the span of the bag is encoded as part of the feature name that is shared by
+ * * all of the features within the bag.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedBag implements CleartkExtractor.Context {
+ private CleartkExtractor.Context[] contexts;
+
+ private String name;
+
+ /**
+ * Constructs a {@link CleartkExtractor.Context} which converts the features extracted by the argument contexts
+ * into a bag of features where all features have the same name.
+ *
+ * @param contexts The contexts which should be combined into a bag.
+ */
+ public WindowedBag( CleartkExtractor.Context... contexts ) {
+ this.contexts = contexts;
+ String[] names = new String[ contexts.length + 1 ];
+ names[ 0 ] = "Bag";
+ for ( int i = 1; i < names.length; ++i ) {
+ names[ i ] = contexts[ i - 1 ].getName();
+ }
+ this.name = Feature.createName( names );
+ }
+
+ @Override
+ public String getName() {
+ return this.name;
+ }
+
+ @Override
+ public <SEARCH_T extends Annotation> List<Feature> extract(
+ JCas jCas,
+ Annotation focusAnnotation,
+ CleartkExtractor.Bounds bounds,
+ Class<SEARCH_T> annotationClass,
+ FeatureExtractor1<SEARCH_T> extractor ) throws CleartkExtractorException {
+ List<Feature> features = new ArrayList<>();
+ for ( CleartkExtractor.Context context : this.contexts ) {
+ for ( Feature feature : context.extract(
+ jCas,
+ focusAnnotation,
+ bounds,
+ annotationClass,
+ extractor ) ) {
+ WindowedContextFeature contextFeature = (WindowedContextFeature)feature;
+ Feature f2 = new Feature( contextFeature.feature.getName(), feature.getValue() );
+ features.add( new WindowedContextFeature( this.getName(), f2 ) );
+ }
+ }
+ return features;
+ }
+
+}
\ No newline at end of file
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/WindowedContextFeature.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/WindowedContextFeature.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/WindowedContextFeature.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/WindowedContextFeature.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,34 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature;
+
+import org.cleartk.ml.Feature;
+
+import java.util.Locale;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedContextFeature extends Feature {
+ private static final long serialVersionUID = 1L;
+ public Feature feature;
+
+ public WindowedContextFeature( String baseName, Feature feature ) {
+ this.feature = feature;
+ this.setName( Feature.createName( baseName, feature.getName() ) );
+ this.setValue( this.feature.getValue() );
+ }
+
+ public WindowedContextFeature( String baseName, int position, Feature feature ) {
+ this.feature = feature;
+ this.setName( Feature.createName( baseName, String.valueOf( position ), feature.getName() ) );
+ this.setValue( feature.getValue() );
+ }
+
+ public WindowedContextFeature( String baseName, int position, int oobPosition, String featureName ) {
+ this.feature = new Feature( featureName, String.format( Locale.ROOT, "OOB%d", oobPosition ) );
+ this.setName( Feature.createName( baseName, String.valueOf( position ), featureName ) );
+ this.setValue( this.feature.getValue() );
+ }
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractTreeFragmentFeatureExtractor1.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractTreeFragmentFeatureExtractor1.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractTreeFragmentFeatureExtractor1.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractTreeFragmentFeatureExtractor1.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,62 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+
+import org.apache.ctakes.assertion.util.SemanticClasses;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.utils.tree.FragmentUtils;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.util.CleartkInitializationException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Scanner;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+abstract public class AbstractTreeFragmentFeatureExtractor1
+ extends AbstractWindowedFeatureExtractor1<IdentifiedAnnotation> {
+
+ public static final String PARAM_OUTPUTDIR = "outputDir";
+ public static final String PARAM_SEMDIR = "semDir";
+ protected HashSet<SimpleTree> frags = null;
+ protected SemanticClasses sems = null;
+ protected String prefix = null;
+
+ public AbstractTreeFragmentFeatureExtractor1( String prefix, String resourceFilename )
+ throws CleartkInitializationException {
+ initializeFrags( resourceFilename );
+ this.prefix = prefix;
+ try {
+ sems = new SemanticClasses( FileLocator.getAsStream( "org/apache/ctakes/assertion/all_cues.txt" ) );
+ } catch ( Exception e ) {
+ throw new CleartkInitializationException( e, "org/apache/ctakes/assertion/all_cues.txt", "Could not find semantic classes resource.", new Object[] {} );
+ }
+ }
+
+ private void initializeFrags( String resourceFilename ) {
+ frags = new HashSet<SimpleTree>();
+ InputStream fragsFilestream = null;
+ try {
+ fragsFilestream = FileLocator.getAsStream( resourceFilename );
+ Scanner scanner = new Scanner( fragsFilestream );
+ while ( scanner.hasNextLine() ) {
+ frags.add( FragmentUtils.frag2tree( scanner.nextLine().trim() ) );
+ }
+ fragsFilestream.close();
+ } catch ( IOException e ) {
+ System.err.println( "Trouble with tree fragment file: " + e );
+ }
+ }
+
+ @Override
+ public abstract List<Feature> extract( JCas jcas, IdentifiedAnnotation annotation );
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractWindowedFeatureExtractor1.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractWindowedFeatureExtractor1.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractWindowedFeatureExtractor1.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractWindowedFeatureExtractor1.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,33 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+
+import java.util.List;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+abstract public class AbstractWindowedFeatureExtractor1<T extends Annotation> implements FeatureExtractor1<T> {
+
+ protected List<Sentence> _sentences;
+ protected Sentence _sentence;
+ protected int _sentenceIndex;
+ protected List<BaseToken> _baseTokens;
+
+ public void setSentences( final List<Sentence> sentences ) {
+ _sentences = sentences;
+ }
+
+ public void setWindow( final Sentence sentence, final int sentenceIndex, final List<BaseToken> baseTokens ) {
+ _sentence = sentence;
+ _sentenceIndex = sentenceIndex;
+ _baseTokens = baseTokens;
+ }
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedAssertionDependencyTreeExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedAssertionDependencyTreeExtractor.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedAssertionDependencyTreeExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedAssertionDependencyTreeExtractor.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,73 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+import com.google.common.collect.Lists;
+import org.apache.ctakes.assertion.pipelines.GenerateDependencyRepresentation;
+import org.apache.ctakes.assertion.util.AssertionDepUtils;
+import org.apache.ctakes.assertion.util.AssertionTreeUtils;
+import org.apache.ctakes.assertion.util.SemanticClasses;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.TreeFeature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.util.CleartkInitializationException;
+
+import java.util.List;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedAssertionDependencyTreeExtractor extends AbstractWindowedFeatureExtractor1<IdentifiedAnnotation> {
+
+ protected SemanticClasses sems = null;
+
+ public WindowedAssertionDependencyTreeExtractor() throws CleartkInitializationException {
+ try {
+ sems = new SemanticClasses( FileLocator.getAsStream( "org/apache/ctakes/assertion/all_cues.txt" ) );
+ } catch ( Exception e ) {
+ throw new CleartkInitializationException( e, "org/apache/ctakes/assertion/semantic_classes", "Could not find semantic classes resource.", new Object[] {} );
+ }
+ }
+
+ @Override
+ public List<Feature> extract( JCas jCas, IdentifiedAnnotation arg1 )
+ throws CleartkExtractorException {
+ List<Feature> feats = Lists.newArrayList();
+ TreeFeature f1 = null;
+ String treeString = null;
+
+// List<Sentence> sents = JCasUtil.selectCovering(jCas, Sentence.class, arg1.getBegin(), arg1.getEnd());
+// if(sents == null || sents.size() == 0){
+// treeString = "(S (no parse))";
+// }else{
+ Sentence sent = _sentence;
+ List<ConllDependencyNode> nodes = JCasUtil.selectCovered( ConllDependencyNode.class, sent );
+
+ //treeString = AnnotationDepUtils.getTokenRelTreeString(jCas, nodes, new Annotation[]{arg1}, new String[]{"CONCEPT"}, true);
+// treeString = AssertionDepUtils.getTokenRelTreeString(jCas, nodes, arg1, "CONCEPT");
+ SimpleTree tree
+ = AssertionDepUtils.getTokenTreeString( jCas, nodes, arg1, GenerateDependencyRepresentation.UP_NODES );
+
+ if ( tree == null ) {
+ treeString = "(S (no parse))";
+ } else {
+ AssertionTreeUtils.replaceDependencyWordsWithSemanticClasses( tree, sems );
+ treeString = tree.toString();
+// treeString = treeString.replaceAll("\\(([^ ]+) \\)", "$1");
+ }
+// }
+
+ f1 = new TreeFeature( "TK_DW", treeString );
+ feats.add( f1 );
+ return feats;
+ }
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedContextWordWindowExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedContextWordWindowExtractor.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedContextWordWindowExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedContextWordWindowExtractor.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,123 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Scanner;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedContextWordWindowExtractor extends AbstractWindowedFeatureExtractor1<IdentifiedAnnotation> {
+
+ private HashMap<String, Double> termVals = null;
+ private static final Pattern linePatt = Pattern.compile( "^([^ ]+) : (.+)$" );
+ private static double[] weights = new double[ 50 ];
+
+ static {
+ weights[ 0 ] = 1.0;
+ for ( int i = 1; i < weights.length; i++ ) {
+ weights[ i ] = 1.0 / i;
+ }
+ }
+
+ public WindowedContextWordWindowExtractor( String resourceFilename ) {
+ termVals = new HashMap<>();
+ InputStream is = getClass().getClassLoader().getResourceAsStream( resourceFilename );
+
+ Scanner scanner = new Scanner( is );
+ Matcher m = null;
+ double max = 0.0;
+ double maxNeg = 0.0;
+ while ( scanner.hasNextLine() ) {
+ String line = scanner.nextLine().trim();
+ m = linePatt.matcher( line );
+ if ( m.matches() ) {
+ double val = Double.parseDouble( m.group( 2 ) );
+ termVals.put( m.group( 1 ), val );
+ if ( Math.abs( val ) > max ) {
+ max = Math.abs( val );
+ }
+ if ( val < maxNeg ) {
+ maxNeg = val;
+ }
+ }
+ }
+ try {
+ is.close();
+ } catch ( IOException e ) {
+ e.printStackTrace();
+ }
+ max = max - maxNeg;
+ for ( String key : termVals.keySet() ) {
+ termVals.put( key, (termVals.get( key ) - maxNeg) / max );
+ }
+ }
+
+ @Override
+ public List<Feature> extract( JCas view, IdentifiedAnnotation mention )
+ throws CleartkExtractorException {
+ ArrayList<Feature> feats = new ArrayList<>();
+
+ final List<BaseToken> tokens = _baseTokens;
+
+ int startIndex = -1;
+ int endIndex = -1;
+
+ for ( int i = 0; i < tokens.size(); i++ ) {
+ if ( tokens.get( i ).getBegin() == mention.getBegin() ) {
+ startIndex = i;
+ }
+ if ( tokens.get( i ).getEnd() == mention.getEnd() ) {
+ endIndex = i;
+ }
+ }
+
+ double score = 0.0;
+ double z = 0.0;
+ String key = null;
+ double weight;
+ for ( int i = 0; i < tokens.size(); i++ ) {
+ key = tokens.get( i ).getCoveredText().toLowerCase();
+ int dist = Math.min( Math.abs( startIndex - i ), Math.abs( endIndex - i ) );
+ weight = weightFunction( dist );
+ z += weight;
+ if ( termVals.containsKey( key ) ) {
+ score += (weight * termVals.get( key ));
+ }
+ }
+
+ score /= z; // weight by actual amount of context so we don't penalize begin/end of sentence.
+ feats.add( new Feature( "WORD_SCORE", score ) );
+ return feats;
+ }
+
+ private static final double weightFunction( int dist ) {
+ if ( dist >= weights.length ) {
+ return 0.0;
+ }
+
+ // quick decay
+// return 1.0 / dist;
+
+ // linear decay
+// return 1.0 - dist * (1.0/50.0);
+
+ // no decay:
+ return 1.0;
+ }
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedDependencyWordsFragmentExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedDependencyWordsFragmentExtractor.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedDependencyWordsFragmentExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedDependencyWordsFragmentExtractor.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,59 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+import org.apache.ctakes.assertion.pipelines.GenerateDependencyRepresentation;
+import org.apache.ctakes.assertion.util.AssertionDepUtils;
+import org.apache.ctakes.assertion.util.AssertionTreeUtils;
+import org.apache.ctakes.constituency.parser.util.TreeUtils;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.util.CleartkInitializationException;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedDependencyWordsFragmentExtractor extends AbstractTreeFragmentFeatureExtractor1 {
+
+ public WindowedDependencyWordsFragmentExtractor( String prefix, String fragsPath )
+ throws CleartkInitializationException {
+ super( prefix, fragsPath );
+ }
+
+ @Override
+ public List<Feature> extract( JCas jCas, IdentifiedAnnotation mention ) {
+ List<Feature> features = new ArrayList<Feature>();
+
+// List<Sentence> sents = JCasUtil.selectCovering(jCas, Sentence.class, mention.getBegin(), mention.getEnd());
+// if(sents != null && sents.size() > 0){
+
+ Sentence sent = _sentence;
+ List<ConllDependencyNode> nodes = JCasUtil.selectCovered( ConllDependencyNode.class, sent );
+
+ SimpleTree tree
+ = AssertionDepUtils.getTokenTreeString( jCas, nodes, mention, GenerateDependencyRepresentation.UP_NODES );
+ if ( tree == null ) {
+ System.err.println( "Tree is null!" );
+ } else {
+ AssertionTreeUtils.replaceDependencyWordsWithSemanticClasses( tree, sems );
+ for ( SimpleTree frag : frags ) {
+ if ( TreeUtils.containsDepFragIgnoreCase( tree, frag ) ) {
+ features.add( new Feature( "TreeFrag_" + prefix, frag.toString() ) );
+ }
+ }
+ }
+
+// }
+ return features;
+ }
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedGenericFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedGenericFeaturesExtractor.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedGenericFeaturesExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedGenericFeaturesExtractor.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,54 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+import org.apache.ctakes.assertion.medfacts.cleartk.windowed.classifier.WindowedGenericAttributeClassifier;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+
+import java.util.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedGenericFeaturesExtractor extends AbstractWindowedFeatureExtractor1<IdentifiedAnnotation> {
+
+
+ @Override
+ public List<Feature> extract( JCas jCas, IdentifiedAnnotation arg ) {
+
+ List<Feature> features = new ArrayList<>();
+
+ // Pull in general dependency-based features -- externalize to another extractor?
+ ConllDependencyNode node = DependencyUtility.getNominalHeadNode( jCas, arg );
+ if ( node != null ) {
+ features.add( new Feature( "DEPENDENCY_HEAD", node.getCoveredText() ) );
+ features.add( new Feature( "DEPENDENCY_HEAD_deprel", node.getDeprel() ) );
+ }
+
+ HashMap<String, Boolean> featsMap = WindowedGenericAttributeClassifier.extract( jCas, _sentence, arg );
+
+ // Pull in all the features that were used for the rule-based module
+ features.addAll( hashToFeatureList( featsMap ) );
+ // Pull in the result of the rule-based module as well
+ features.add( new Feature( "GENERIC_CLASSIFIER_LOGIC", WindowedGenericAttributeClassifier.classifyWithLogic( featsMap ) ) );
+
+
+ return features;
+ }
+
+ private static Collection<? extends Feature> hashToFeatureList(
+ HashMap<String, Boolean> featsIn ) {
+
+ Collection<Feature> featsOut = new HashSet<>();
+ for ( String featName : featsIn.keySet() ) {
+ featsOut.add( new Feature( featName, featsIn.get( featName ) ) );
+ }
+
+ return featsOut;
+ }
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedHistoryFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedHistoryFeatureExtractor.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedHistoryFeatureExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedHistoryFeatureExtractor.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,62 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+import org.apache.ctakes.assertion.medfacts.cleartk.windowed.classifier.WindowedHistoryAttributeClassifier;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+
+import java.util.*;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedHistoryFeatureExtractor extends AbstractWindowedFeatureExtractor1<IdentifiedAnnotation> {
+
+
+ @Override
+ public List<Feature> extract( JCas jCas, IdentifiedAnnotation arg ) {
+
+ List<Feature> features = new ArrayList<>();
+
+ // Pull in general dependency-based features -- externalize to another extractor?
+ ConllDependencyNode node = DependencyUtility.getNominalHeadNode( jCas, arg );
+ if ( node != null ) {
+// features.add(new Feature("DEPENDENCY_HEAD", node));
+ features.add( new Feature( "DEPENDENCY_HEAD_word", node.getCoveredText() ) );
+// features.add(new Feature("DEPENDENCY_HEAD_pos", node.getPostag()));
+ features.add( new Feature( "DEPENDENCY_HEAD_deprel", node.getDeprel() ) );
+// features.add(new Feature("DEPENDENCY_HEAD_lemma", node.getLemma()));
+ }
+
+ HashMap<String, Boolean> featsMap
+ = WindowedHistoryAttributeClassifier.extract( jCas, _sentences, _sentence, _sentenceIndex, arg );
+
+ // Pull in all the features that were used for the rule-based module
+ features.addAll( hashToFeatureList( featsMap ) );
+
+ // Pull in the result of the rule-based module as well
+ features.add( new Feature( "HISTORY_CLASSIFIER_LOGIC", WindowedHistoryAttributeClassifier.classifyWithLogic( featsMap ) ) );
+
+ // Add whether it is token preceded by "h/o"
+ //features.add(new Feature("PRECEDED_BY_H_SLASH_O", HistoryAttributeClassifier.precededByH_O(jCas, arg)));
+
+ return features;
+ }
+
+ private static Collection<? extends Feature> hashToFeatureList(
+ HashMap<String, Boolean> featsIn ) {
+
+ Collection<Feature> featsOut = new HashSet<>();
+ for ( String featName : featsIn.keySet() ) {
+ featsOut.add( new Feature( featName, featsIn.get( featName ) ) );
+ }
+
+ return featsOut;
+ }
+
+}
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedNegationDependencyFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedNegationDependencyFeatureExtractor.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedNegationDependencyFeatureExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedNegationDependencyFeatureExtractor.java Tue Jan 8 03:45:51 2019
@@ -0,0 +1,51 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+import org.apache.ctakes.assertion.util.NegationManualDepContextAnalyzer;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedNegationDependencyFeatureExtractor extends AbstractWindowedFeatureExtractor1<IdentifiedAnnotation> {
+
+ NegationManualDepContextAnalyzer conAnal = null;
+
+ public WindowedNegationDependencyFeatureExtractor(){
+ conAnal = new NegationManualDepContextAnalyzer();
+ }
+
+ @Override
+ public List<Feature> extract( JCas jcas, IdentifiedAnnotation focusAnnotation)
+ throws CleartkExtractorException {
+ List<Feature> feats = new ArrayList<>();
+ Sentence sent = _sentence;
+
+ List<ConllDependencyNode> nodes = DependencyUtility.getDependencyNodes(jcas, sent);
+ ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jcas, focusAnnotation);
+ try {
+ boolean[] regexFeats = conAnal.findNegationContext(nodes, headNode);
+ for(int j = 0; j < regexFeats.length; j++){
+ if(regexFeats[j]){
+ feats.add(new Feature("DepPath_" + conAnal.getRegexName(j))); //"NEG_DEP_REGEX_"+j));
+ }
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new CleartkExtractorException(e);
+ }
+ return feats;
+ }
+
+}