You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2019/01/08 03:45:51 UTC

svn commit: r1850705 [2/2] - in /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed: ./ classifier/ context/ context/feature/ context/feature/extractor/

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/classifier/WindowedHistoryAttributeClassifier.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/classifier/WindowedHistoryAttributeClassifier.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/classifier/WindowedHistoryAttributeClassifier.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/classifier/WindowedHistoryAttributeClassifier.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,340 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.classifier;
+
+
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.Chunk;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.jcas.tcas.DocumentAnnotation;
+
+import java.util.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedHistoryAttributeClassifier {
+
+   private static final String POSTCOORD_NMOD = "donor_srlarg";
+   private static final String DISCUSSION_DEPPATH = "discussion_deppath";
+   private static final String SUBSUMED_CHUNK = "other_token";
+   private static final String SUBSUMED_ANNOT = "other_deppath";
+   private static final String IN_HIST_SECTION = "in_history_section";
+   public static ArrayList<String> FeatureIndex = new ArrayList<String>();
+
+   static {
+      FeatureIndex.add( POSTCOORD_NMOD );
+      FeatureIndex.add( DISCUSSION_DEPPATH );
+      FeatureIndex.add( SUBSUMED_CHUNK );
+      FeatureIndex.add( SUBSUMED_ANNOT );
+   }
+
+   // Only enter lower-case strings here for comparison with sentences in isInHistSection
+   private static final String[] GHC_HIST_SECTIONS =
+         {
+               "fh",
+               "sh",
+               //"hpi",  // based on 8/30 review of errors, this is a pretty lousy indicator of history
+               "pmh", // missed a bunch of these in 8/20 run reviewed on 8/30. am i forgetting lowercase?
+               "psh",
+               "social history:",
+               "family history",
+               "past medical history",
+               "pmh/psh" // missed a bunch of these in 8/20 run reviewed on 8/30. am i forgetting lowercase?
+         };
+
+   // currently goes from entityMention to Sentence to SemanticArgument
+   public static Boolean getHistory( JCas jCas,
+                                     final List<Sentence> sentences,
+                                     final Sentence sentence,
+                                     final int sentenceIndex,
+                                     IdentifiedAnnotation mention ) {
+
+      HashMap<String, Boolean> vfeat = extract( jCas, sentences, sentence, sentenceIndex, mention );
+
+      return classifyWithLogic( vfeat );
+
+   }
+
+   /**
+    * @param jCas - the jcas of the document
+    * @param arg  - the node getting features added to it
+    * @return whether or not arg is a token preceded by "h/o"
+    */
+   public static Boolean precededByH_O( JCas jCas, Annotation arg ) {
+      Boolean answer = false;
+
+      return answer;
+   }
+
+
+   public static Boolean classifyWithLogic( HashMap<String, Boolean> vfeat ) {
+      // Logic to identify cases, may be replaced by learned classification
+      int subsumectr = 0;
+      if ( vfeat.get( SUBSUMED_CHUNK ) ) {
+      } //subsumectr++; }
+      if ( vfeat.get( SUBSUMED_ANNOT ) ) {
+         subsumectr++;
+      }
+      if ( vfeat.get( POSTCOORD_NMOD ) ) {
+         subsumectr++;
+      }
+      Boolean subsume_summary = (subsumectr > 0);
+      if ( vfeat.get( DISCUSSION_DEPPATH ) || subsume_summary ) {
+         return true;
+      }
+      return false;
+   }
+
+   /*
+    * SRH adding 8/19/13
+    * Idea is that I want to know if I am in a "sentence" that starts with
+    * a GH history section name.
+    * There's some work to be done here.
+    * Let's define paragraphs as what's delimited by \n in GH docs
+    * Then we can define these sections as I've seen them by what's in a
+    * paragraph.
+    * But a paragraph may have more than one sentence in it.
+    * So I have to actually not find the first part of the sentence that
+    * contains the thing, but the paragraph.
+    * So actually I have to start from the sentence and search backwards
+    * for a newline.
+    * So what's written below works (untested/unerified) in the case that I have
+    * the starting sentence of a paragraph.
+    * But I still have to find that first sentence.
+    */
+   private static boolean isInHistSection( Sentence s ) {
+      // We want to trim the covered text before attempting substring, otherwise the substring call indices can be out of bounds
+      String sText = s.getCoveredText().trim();
+
+      for ( String secStart : GHC_HIST_SECTIONS ) {
+         int slen = secStart.length();
+
+         if ( sText.length() >= slen ) {
+            String sentStart = sText.substring( 0, slen ).toLowerCase();
+            if ( sentStart.equals( secStart ) ) {
+               return true;
+            }
+         }
+      }
+
+      return false;
+   }
+
+
+   /*
+    * This comparator compares two Annotations for location for purposes of
+    * sorting. Annotations are equal in location iff begin and end locations are equal.
+    * Otherwise, the annotation that has the earlier begin sorts above later begin.
+    * If begins are equal but ends are not, then that with earlier end sorts higher.
+    */
+   public static class SpanComparator implements Comparator<Annotation> {
+      public int compare( Annotation a1, Annotation a2 ) {
+         final int bDistance = a1.getBegin() - a2.getBegin();
+         if ( bDistance != 0 ) {
+            return bDistance;
+         }
+         return a1.getEnd() - a2.getEnd();
+      }
+   }
+
+
+   public static HashMap<String, Boolean> extract( JCas jCas,
+                                                   final List<Sentence> sentences,
+                                                   final Sentence sentence,
+                                                   final int sentenceIndex,
+                                                   Annotation arg ) {
+      final SpanComparator spanComparator = new SpanComparator();
+      HashMap<String, Boolean> vfeat = new HashMap<>();
+      for ( String feat : FeatureIndex ) {
+         vfeat.put( feat, false );
+      }
+
+      Sentence sEntity = sentence;
+
+      DocumentAnnotation docAnnot = null;
+
+      Collection<DocumentAnnotation> docAnnots =
+            JCasUtil.select( jCas, DocumentAnnotation.class );
+
+      if ( !docAnnots.isEmpty() ) {
+         Object[] docAnnotArray = docAnnots.toArray();
+         docAnnot = (DocumentAnnotation)docAnnotArray[ 0 ];
+      }
+
+      if ( sEntity != null ) {
+
+         // but I actually need to find out if this sentence is preceded by
+         // a newline or if I have to find the preceding one that does.
+         if ( docAnnot != null ) {
+            String doctext = docAnnot.getCoveredText();
+            int sentStart = sEntity.getBegin();
+
+            if ( sentStart > 0 ) {
+               boolean argInHistSection = false;
+
+               List<Sentence> sentList = sentences;
+
+               // get index of sEntity
+               int currind = sentenceIndex;
+
+               if ( currind == 0 ) {
+                  argInHistSection = isInHistSection( sEntity );
+               } else {
+                  currind--;
+                  Sentence prevSent = sentList.get( currind );
+                  String tweenSents = "";
+                  try {
+                     tweenSents = doctext.substring( prevSent.getEnd(), sentStart );
+                  } catch ( IndexOutOfBoundsException e ) {
+                     // this is of no consequence
+                     tweenSents = "";
+                  }
+
+                  if ( tweenSents.indexOf( "\n" ) != -1 ) {
+                     // there is a newline between this sentence and prior sentence
+                     argInHistSection = isInHistSection( sEntity );
+                  } else if ( currind == 0 ) {
+                     argInHistSection = isInHistSection( prevSent );
+                  } else {
+                     while ( currind > 0 ) {
+                        Sentence currSent = prevSent;
+                        currind--;
+                        prevSent = sentList.get( currind );
+
+                        sentStart = currSent.getBegin();
+                        int prevSentEnd = prevSent.getEnd();
+
+                        try {
+                           tweenSents = doctext.substring( prevSentEnd, sentStart );
+                        } catch ( StringIndexOutOfBoundsException e ) {
+                           tweenSents = "";
+                        }
+
+                        if ( tweenSents.indexOf( "\n" ) != -1 || currind == 0 ) {
+                           argInHistSection = isInHistSection( currSent );
+                           break;
+                        } else if ( currind == 0 ) {
+                           argInHistSection = isInHistSection( prevSent );
+                           break;
+                        }
+                     }
+                  }
+               }
+
+               // and here do something with argInHistSection.
+               // ie, create the feature
+               vfeat.put( IN_HIST_SECTION, argInHistSection );
+            }
+
+         }
+
+         // 2) some other identified annotation subsumes this one?
+
+         // Get all IdentifiedAnnotations covering the boundaries of the
+         // annotation
+         List<IdentifiedAnnotation> lsmentions = JCasUtil.selectCovering( jCas,
+               IdentifiedAnnotation.class, arg.getBegin(),
+               arg.getEnd() );
+
+         Collections.sort( lsmentions, spanComparator );
+
+         // NB: arg is annotation input to this method. annot is current
+         // lsmentions in loop
+         for ( IdentifiedAnnotation annot : lsmentions ) {
+            if ( annot.getBegin() > arg.getBegin() ) {
+               // annot starts after our arg, so if ordered correctly(?)
+               // then I break b/c I won't find any more that cover arg
+               break;
+            }
+
+            // INVARIANT: arg starts at or after annot begins
+            if ( annot.getEnd() < arg.getEnd() ) {
+               // INVARIANT: arg ends at or after annot ends
+               continue;
+            } else if ( !DependencyUtility.equalCoverage(
+                  DependencyUtility.getNominalHeadNode( jCas, annot ),
+                  DependencyUtility.getNominalHeadNode( jCas, arg ) ) ) {
+               // INVARIANT: arg start at or before annot starts
+               // INVARIANT: arg ends at or before annot ends
+               // INVARIANT: ergo, arg falls within bounds of annot
+               // now verify that annot is an EventMention or EntityMention
+               if ( (annot instanceof EntityMention) || (annot instanceof EventMention) ) {
+                  // annot has boundaries at or exceeding those of arg.
+                  // They also have different head nodes (I guess)
+                  // and annot is either an EntityMention of EventMention
+                  vfeat.put( SUBSUMED_ANNOT, true );
+                  break; // no reason to keep checking
+               }
+            }
+         }
+
+         // 3) some chunk subsumes this?
+         List<Chunk> lschunks = JCasUtil.selectPreceding( jCas, Chunk.class, arg, 5 );
+         lschunks.addAll( JCasUtil.selectFollowing( jCas, Chunk.class, arg, 5 ) );
+         for ( Chunk chunk : lschunks ) {
+            if ( chunk.getBegin() > arg.getBegin() ) {
+               break;
+            }
+            if ( chunk.getEnd() < arg.getEnd() ) {
+               continue;
+            } else if ( !DependencyUtility.equalCoverage(
+                  DependencyUtility.getNominalHeadNode( jCas, chunk ),
+                  DependencyUtility.getNominalHeadNode( jCas, arg ) ) ) {
+               // the case that annot is a superset
+               vfeat.put( SUBSUMED_CHUNK, true );
+            }
+         }
+      }
+
+
+      List<ConllDependencyNode> depnodes = JCasUtil.selectCovered( jCas, ConllDependencyNode.class, arg );
+      if ( !depnodes.isEmpty() ) {
+         ConllDependencyNode depnode = DependencyUtility.getNominalHeadNode( depnodes );
+
+         // 1) check if the head node of the entity mention is really just part of a larger noun phrase
+         if ( depnode.getDeprel().matches( "(NMOD|amod|nmod|det|predet|nn|poss|possessive|infmod|partmod|rcmod)" ) ) {
+            vfeat.put( POSTCOORD_NMOD, true );
+         }
+
+         // 4) search dependency paths for discussion context
+         for ( ConllDependencyNode dn : DependencyUtility.getPathToTop( jCas, depnode ) ) {
+            if ( isDiscussionContext( dn ) ) {
+               vfeat.put( DISCUSSION_DEPPATH, true );
+            }
+         }
+      }
+      return vfeat;
+   }
+
+
+   private static boolean isDonorTerm( Annotation arg ) {
+      return arg.getCoveredText().toLowerCase()
+                .matches( "(donor).*" );
+   }
+
+
+   private static boolean isDiscussionContext( Annotation arg ) {
+      return arg.getCoveredText().toLowerCase()
+                .matches( "(discuss|ask|understand|understood|tell|told|mention|talk|speak|spoke|address).*" );
+   }
+
+
+   // a main method for regex testing
+   public static void main( String[] args ) {
+      String s = "steps";
+      if ( s.toLowerCase().matches( ".*(in-law|stepc|stepd|stepso|stepf|stepm|step-).*" ) ) {
+         System.out.println( "match" );
+      } else {
+         System.out.println( "no match" );
+      }
+   }
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractLeftToRightContext.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractLeftToRightContext.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractLeftToRightContext.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractLeftToRightContext.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,62 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+
+import org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.WindowedContextFeature;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractor;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+import org.cleartk.ml.feature.extractor.NamedFeatureExtractor1;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+abstract public class AbstractLeftToRightContext extends AbstractWindowedContext {
+
+   public AbstractLeftToRightContext( int begin, int end ) {
+      super( begin, end );
+   }
+
+   public <SEARCH_T extends Annotation> List<Feature> extract( JCas jCas, Annotation focusAnnotation,
+                                                               CleartkExtractor.Bounds bounds,
+                                                               Class<SEARCH_T> annotationClass,
+                                                               FeatureExtractor1<SEARCH_T> extractor ) throws
+                                                                                                       CleartkExtractorException {
+      String featureName = extractor instanceof NamedFeatureExtractor1
+                           ? ((NamedFeatureExtractor1<SEARCH_T>)extractor).getFeatureName()
+                           : null;
+      List<SEARCH_T> anns = this.select( jCas, focusAnnotation, annotationClass, this.end );
+      int oobStart;
+      if ( this.begin <= anns.size() ) {
+         oobStart = 1;
+         anns = anns.subList( this.begin, anns.size() );
+      } else {
+         oobStart = this.begin - anns.size() + 1;
+         anns = new ArrayList<>();
+      }
+      List<Feature> features = new ArrayList<>();
+      Iterator<SEARCH_T> iter = anns.iterator();
+      for ( int pos = this.begin, oobPos = oobStart; pos < this.end; pos += 1 ) {
+         SEARCH_T ann = iter.hasNext() ? iter.next() : null;
+         if ( ann != null && bounds.contains( ann ) ) {
+            for ( Feature feature : extractor.extract( jCas, ann ) ) {
+               features.add( new WindowedContextFeature( this.getName(), pos, feature ) );
+            }
+         } else {
+            features.add( new WindowedContextFeature( this.getName(), pos, oobPos, featureName ) );
+            oobPos += 1;
+         }
+      }
+      return features;
+   }
+
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractRightToLeftContext.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractRightToLeftContext.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractRightToLeftContext.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractRightToLeftContext.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,71 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+import org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.WindowedContextFeature;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractor;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+import org.cleartk.ml.feature.extractor.NamedFeatureExtractor1;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+abstract public class AbstractRightToLeftContext extends AbstractWindowedContext {
+
+   public AbstractRightToLeftContext( int begin, int end ) {
+      super( begin, end );
+   }
+
+   public <SEARCH_T extends Annotation> List<Feature> extract( JCas jCas, Annotation focusAnnotation,
+                                                               CleartkExtractor.Bounds bounds,
+                                                               Class<SEARCH_T> annotationClass,
+                                                               FeatureExtractor1<SEARCH_T> extractor )
+         throws CleartkExtractorException {
+      String featureName = extractor instanceof NamedFeatureExtractor1
+                           ? ((NamedFeatureExtractor1<SEARCH_T>)extractor).getFeatureName()
+                           : null;
+
+      // slice the appropriate annotations from the CAS
+      List<SEARCH_T> anns = this.select( jCas, focusAnnotation, annotationClass, this.end );
+      int missing = this.end - anns.size();
+      anns = anns.subList( 0, Math.max( 0, anns.size() - this.begin ) );
+
+      // figure out how many items are out of bounds
+      int oobPos = missing;
+      for ( SEARCH_T ann : anns ) {
+         if ( !bounds.contains( ann ) ) {
+            oobPos += 1;
+         }
+      }
+
+      // extract features at each position
+      List<Feature> features = new ArrayList<Feature>();
+      for ( int pos = this.end - 1; pos >= this.begin; pos -= 1 ) {
+
+         // if the annotation at the current position is in bounds, extract features from it
+         int adjustedPos = this.end - 1 - pos - missing;
+         SEARCH_T ann = adjustedPos >= 0 ? anns.get( adjustedPos ) : null;
+         if ( ann != null && bounds.contains( ann ) ) {
+            for ( Feature feature : extractor.extract( jCas, ann ) ) {
+               features.add( new WindowedContextFeature( this.getName(), pos, feature ) );
+            }
+         }
+
+         // if the annotation at the current position is out of bounds, add an out-of-bounds feature
+         else {
+            features.add( new WindowedContextFeature( this.getName(), pos, oobPos, featureName ) );
+            oobPos -= 1;
+         }
+      }
+      return features;
+   }
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractWindowedContext.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractWindowedContext.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractWindowedContext.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/AbstractWindowedContext.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,67 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+abstract public class AbstractWindowedContext implements CleartkExtractor.Context {
+
+   protected int begin;
+   protected int end;
+   private String name;
+
+   protected List<Annotation> _windowCovered = new ArrayList<>();
+
+   public <T extends Annotation> void setWindow( final List<T> windowCovered ) {
+      _windowCovered.clear();
+      _windowCovered.addAll( windowCovered );
+   }
+
+
+   public AbstractWindowedContext( int begin, int end ) {
+      if ( begin > end ) {
+         String message = "expected begin < end, found begin=%d end=%d";
+         throw new IllegalArgumentException( String.format( message, begin, end ) );
+      } else {
+         this.begin = begin;
+         this.end = end;
+         this.name = Feature.createName( this.getClass().getSimpleName(),
+               String.valueOf( this.begin ), String.valueOf( this.end ) );
+      }
+   }
+
+   public String getName() {
+      return this.name;
+   }
+
+   protected <T extends Annotation> List<T> selectCovered( final Annotation covering, final Class<T> coveredClass ) {
+//      System.out.print( "LastCoveredContext focusAnnotation "
+//                          + covering.getClass().getName() + " " + covering.getCoveredText()
+//                          + "   want covering " + coveredClass.getName() );
+      final List<T> covered = new ArrayList<>();
+      for ( Annotation annotation : _windowCovered ) {
+         if ( coveredClass.isInstance( annotation )
+              && annotation.getBegin() >= covering.getBegin()
+              && annotation.getEnd() <= covering.getEnd() ) {
+//            System.out.print( "  Yes " + annotation.getClass().getName() + " " + annotation.getCoveredText() );
+            covered.add( (T)annotation );
+         }
+//         System.out.print( "  No " + annotation.getClass().getName() + " " + annotation.getCoveredText() );
+      }
+//      System.out.println();
+      return covered;
+   }
+
+   protected abstract <T extends Annotation> List<T> select( JCas var1, Annotation var2, Class<T> var3, int var4 );
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/FollowingContext.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/FollowingContext.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/FollowingContext.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/FollowingContext.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,68 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A {@link org.cleartk.ml.feature.extractor.CleartkExtractor.Context} for extracting annotations appearing after the focus annotation.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class FollowingContext extends AbstractLeftToRightContext {
+
+   /**
+    * Constructs a context that will extract features over the following N annotations.
+    *
+    * @param end The number of annotations to extract.
+    */
+   public FollowingContext( int end ) {
+      super( 0, end );
+   }
+
+   /**
+    * Constructs a context that will extract features over a slice of the following N annotations.
+    * <p>
+    * The {@code begin} and {@code end} indexes count from 0, where index 0 identifies the
+    * annotation immediately following the focus annotation. If either index is greater than the
+    * index of the last possible annotation, special "out of bounds" features will be added for
+    * each annotation that was requested but absent.
+    *
+    * @param begin The index of the first annotation to include.
+    * @param end   The index of the last annotation to include. Must be greater than {@code begin}.
+    */
+   public FollowingContext( int begin, int end ) {
+      super( begin, end );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   protected <T extends Annotation> List<T> select( final JCas jCas,
+                                                    final Annotation focusAnnotation,
+                                                    final Class<T> annotationClass,
+                                                    final int count ) {
+
+//         return JCasUtil.selectFollowing(jCas, annotationClass, focusAnnotation, count);
+
+      final List<T> following = new ArrayList<>( count );
+
+      for ( int i = 0; i < _windowCovered.size(); i++ ) {
+         if ( annotationClass.isInstance( _windowCovered.get( i ) )
+              && _windowCovered.get( i ).getBegin() >= focusAnnotation.getEnd() ) {
+            following.add( (T)_windowCovered.get( i ) );
+            if ( following.size() == count ) {
+               break;
+            }
+         }
+      }
+      return following;
+   }
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/LastCoveredContext.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/LastCoveredContext.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/LastCoveredContext.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/LastCoveredContext.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,32 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+
+import java.util.List;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class LastCoveredContext extends AbstractRightToLeftContext {
+   public LastCoveredContext( int end ) {
+      super( 0, end );
+   }
+
+   public LastCoveredContext( int begin, int end ) {
+      super( begin, end );
+   }
+
+
+   protected <T extends Annotation> List<T> select( final JCas jCas,
+                                                    final Annotation focusAnnotation,
+                                                    final Class<T> annotationClass,
+                                                    final int count ) {
+      final List<T> annotations = selectCovered( focusAnnotation, annotationClass );
+      return annotations.subList( Math.max( annotations.size() - count, 0 ), annotations.size() );
+   }
+
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/PrecedingContext.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/PrecedingContext.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/PrecedingContext.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/PrecedingContext.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,45 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class PrecedingContext extends AbstractRightToLeftContext {
+
+   public PrecedingContext( int end ) {
+      super( 0, end );
+   }
+
+   public PrecedingContext( int begin, int end ) {
+      super( begin, end );
+   }
+
+   protected <T extends Annotation> List<T> select( final JCas jCas,
+                                                    final Annotation focusAnnotation,
+                                                    final Class<T> annotationClass,
+                                                    final int count ) {
+      final List<T> preceding = new ArrayList<>( count );
+
+      for ( int i = _windowCovered.size() - 1; i >= 0; i-- ) {
+         if ( annotationClass.isInstance( _windowCovered.get( i ) )
+              && _windowCovered.get( i ).getEnd() <= focusAnnotation.getBegin() ) {
+            preceding.add( (T)_windowCovered.get( i ) );
+            if ( preceding.size() == count ) {
+               break;
+            }
+         }
+      }
+      Collections.reverse( preceding );
+      return preceding;
+   }
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/WindowedBag.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/WindowedBag.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/WindowedBag.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/WindowedBag.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,74 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context;
+
+
+import org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.WindowedContextFeature;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractor;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * * A {@link CleartkExtractor.Context} that aggregates the features of other contexts into a "bag" where position
+ * * information of each individual feature is no longer maintained. Position information is not
+ * * entirely lost - the span of the bag is encoded as part of the feature name that is shared by
+ * * all of the features within the bag.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedBag implements CleartkExtractor.Context {
+   private CleartkExtractor.Context[] contexts;
+
+   private String name;
+
+   /**
+    * Constructs a {@link CleartkExtractor.Context} which converts the features extracted by the argument contexts
+    * into a bag of features where all features have the same name.
+    *
+    * @param contexts The contexts which should be combined into a bag.
+    */
+   public WindowedBag( CleartkExtractor.Context... contexts ) {
+      this.contexts = contexts;
+      String[] names = new String[ contexts.length + 1 ];
+      names[ 0 ] = "Bag";
+      for ( int i = 1; i < names.length; ++i ) {
+         names[ i ] = contexts[ i - 1 ].getName();
+      }
+      this.name = Feature.createName( names );
+   }
+
+   @Override
+   public String getName() {
+      return this.name;
+   }
+
+   @Override
+   public <SEARCH_T extends Annotation> List<Feature> extract(
+         JCas jCas,
+         Annotation focusAnnotation,
+         CleartkExtractor.Bounds bounds,
+         Class<SEARCH_T> annotationClass,
+         FeatureExtractor1<SEARCH_T> extractor ) throws CleartkExtractorException {
+      List<Feature> features = new ArrayList<>();
+      for ( CleartkExtractor.Context context : this.contexts ) {
+         for ( Feature feature : context.extract(
+               jCas,
+               focusAnnotation,
+               bounds,
+               annotationClass,
+               extractor ) ) {
+            WindowedContextFeature contextFeature = (WindowedContextFeature)feature;
+            Feature f2 = new Feature( contextFeature.feature.getName(), feature.getValue() );
+            features.add( new WindowedContextFeature( this.getName(), f2 ) );
+         }
+      }
+      return features;
+   }
+
+}
\ No newline at end of file

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/WindowedContextFeature.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/WindowedContextFeature.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/WindowedContextFeature.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/WindowedContextFeature.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,34 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature;
+
+import org.cleartk.ml.Feature;
+
+import java.util.Locale;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedContextFeature extends Feature {
+   private static final long serialVersionUID = 1L;
+   public Feature feature;
+
+   public WindowedContextFeature( String baseName, Feature feature ) {
+      this.feature = feature;
+      this.setName( Feature.createName( baseName, feature.getName() ) );
+      this.setValue( this.feature.getValue() );
+   }
+
+   public WindowedContextFeature( String baseName, int position, Feature feature ) {
+      this.feature = feature;
+      this.setName( Feature.createName( baseName, String.valueOf( position ), feature.getName() ) );
+      this.setValue( feature.getValue() );
+   }
+
+   public WindowedContextFeature( String baseName, int position, int oobPosition, String featureName ) {
+      this.feature = new Feature( featureName, String.format( Locale.ROOT, "OOB%d", oobPosition ) );
+      this.setName( Feature.createName( baseName, String.valueOf( position ), featureName ) );
+      this.setValue( this.feature.getValue() );
+   }
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractTreeFragmentFeatureExtractor1.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractTreeFragmentFeatureExtractor1.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractTreeFragmentFeatureExtractor1.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractTreeFragmentFeatureExtractor1.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,62 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+
+import org.apache.ctakes.assertion.util.SemanticClasses;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.utils.tree.FragmentUtils;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.util.CleartkInitializationException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Scanner;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+abstract public class AbstractTreeFragmentFeatureExtractor1
+      extends AbstractWindowedFeatureExtractor1<IdentifiedAnnotation> {
+
+   public static final String PARAM_OUTPUTDIR = "outputDir";
+   public static final String PARAM_SEMDIR = "semDir";
+   protected HashSet<SimpleTree> frags = null;
+   protected SemanticClasses sems = null;
+   protected String prefix = null;
+
+   public AbstractTreeFragmentFeatureExtractor1( String prefix, String resourceFilename )
+         throws CleartkInitializationException {
+      initializeFrags( resourceFilename );
+      this.prefix = prefix;
+      try {
+         sems = new SemanticClasses( FileLocator.getAsStream( "org/apache/ctakes/assertion/all_cues.txt" ) );
+      } catch ( Exception e ) {
+         throw new CleartkInitializationException( e, "org/apache/ctakes/assertion/all_cues.txt", "Could not find semantic classes resource.", new Object[] {} );
+      }
+   }
+
+   private void initializeFrags( String resourceFilename ) {
+      frags = new HashSet<SimpleTree>();
+      InputStream fragsFilestream = null;
+      try {
+         fragsFilestream = FileLocator.getAsStream( resourceFilename );
+         Scanner scanner = new Scanner( fragsFilestream );
+         while ( scanner.hasNextLine() ) {
+            frags.add( FragmentUtils.frag2tree( scanner.nextLine().trim() ) );
+         }
+         fragsFilestream.close();
+      } catch ( IOException e ) {
+         System.err.println( "Trouble with tree fragment file: " + e );
+      }
+   }
+
+   @Override
+   public abstract List<Feature> extract( JCas jcas, IdentifiedAnnotation annotation );
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractWindowedFeatureExtractor1.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractWindowedFeatureExtractor1.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractWindowedFeatureExtractor1.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/AbstractWindowedFeatureExtractor1.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,33 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+
+import java.util.List;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+abstract public class AbstractWindowedFeatureExtractor1<T extends Annotation> implements FeatureExtractor1<T> {
+
+   protected List<Sentence> _sentences;
+   protected Sentence _sentence;
+   protected int _sentenceIndex;
+   protected List<BaseToken> _baseTokens;
+
+   public void setSentences( final List<Sentence> sentences ) {
+      _sentences = sentences;
+   }
+
+   public void setWindow( final Sentence sentence, final int sentenceIndex, final List<BaseToken> baseTokens ) {
+      _sentence = sentence;
+      _sentenceIndex = sentenceIndex;
+      _baseTokens = baseTokens;
+   }
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedAssertionDependencyTreeExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedAssertionDependencyTreeExtractor.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedAssertionDependencyTreeExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedAssertionDependencyTreeExtractor.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,73 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+import com.google.common.collect.Lists;
+import org.apache.ctakes.assertion.pipelines.GenerateDependencyRepresentation;
+import org.apache.ctakes.assertion.util.AssertionDepUtils;
+import org.apache.ctakes.assertion.util.AssertionTreeUtils;
+import org.apache.ctakes.assertion.util.SemanticClasses;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.TreeFeature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.util.CleartkInitializationException;
+
+import java.util.List;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedAssertionDependencyTreeExtractor extends AbstractWindowedFeatureExtractor1<IdentifiedAnnotation> {
+
+   protected SemanticClasses sems = null;
+
+   public WindowedAssertionDependencyTreeExtractor() throws CleartkInitializationException {
+      try {
+         sems = new SemanticClasses( FileLocator.getAsStream( "org/apache/ctakes/assertion/all_cues.txt" ) );
+      } catch ( Exception e ) {
+         throw new CleartkInitializationException( e, "org/apache/ctakes/assertion/semantic_classes", "Could not find semantic classes resource.", new Object[] {} );
+      }
+   }
+
+   @Override
+   public List<Feature> extract( JCas jCas, IdentifiedAnnotation arg1 )
+         throws CleartkExtractorException {
+      List<Feature> feats = Lists.newArrayList();
+      TreeFeature f1 = null;
+      String treeString = null;
+
+//      List<Sentence> sents = JCasUtil.selectCovering(jCas, Sentence.class, arg1.getBegin(), arg1.getEnd());
+//      if(sents == null || sents.size() == 0){
+//         treeString = "(S (no parse))";
+//      }else{
+      Sentence sent = _sentence;
+      List<ConllDependencyNode> nodes = JCasUtil.selectCovered( ConllDependencyNode.class, sent );
+
+      //treeString = AnnotationDepUtils.getTokenRelTreeString(jCas, nodes, new Annotation[]{arg1}, new String[]{"CONCEPT"}, true);
+//      treeString = AssertionDepUtils.getTokenRelTreeString(jCas, nodes, arg1, "CONCEPT");
+      SimpleTree tree
+            = AssertionDepUtils.getTokenTreeString( jCas, nodes, arg1, GenerateDependencyRepresentation.UP_NODES );
+
+      if ( tree == null ) {
+         treeString = "(S (no parse))";
+      } else {
+         AssertionTreeUtils.replaceDependencyWordsWithSemanticClasses( tree, sems );
+         treeString = tree.toString();
+//        treeString = treeString.replaceAll("\\(([^ ]+) \\)", "$1");
+      }
+//      }
+
+      f1 = new TreeFeature( "TK_DW", treeString );
+      feats.add( f1 );
+      return feats;
+   }
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedContextWordWindowExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedContextWordWindowExtractor.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedContextWordWindowExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedContextWordWindowExtractor.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,123 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Scanner;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedContextWordWindowExtractor extends AbstractWindowedFeatureExtractor1<IdentifiedAnnotation> {
+
+   private HashMap<String, Double> termVals = null;
+   private static final Pattern linePatt = Pattern.compile( "^([^ ]+) : (.+)$" );
+   private static double[] weights = new double[ 50 ];
+
+   static {
+      weights[ 0 ] = 1.0;
+      for ( int i = 1; i < weights.length; i++ ) {
+         weights[ i ] = 1.0 / i;
+      }
+   }
+
+   public WindowedContextWordWindowExtractor( String resourceFilename ) {
+      termVals = new HashMap<>();
+      InputStream is = getClass().getClassLoader().getResourceAsStream( resourceFilename );
+
+      Scanner scanner = new Scanner( is );
+      Matcher m = null;
+      double max = 0.0;
+      double maxNeg = 0.0;
+      while ( scanner.hasNextLine() ) {
+         String line = scanner.nextLine().trim();
+         m = linePatt.matcher( line );
+         if ( m.matches() ) {
+            double val = Double.parseDouble( m.group( 2 ) );
+            termVals.put( m.group( 1 ), val );
+            if ( Math.abs( val ) > max ) {
+               max = Math.abs( val );
+            }
+            if ( val < maxNeg ) {
+               maxNeg = val;
+            }
+         }
+      }
+      try {
+         is.close();
+      } catch ( IOException e ) {
+         e.printStackTrace();
+      }
+      max = max - maxNeg;
+      for ( String key : termVals.keySet() ) {
+         termVals.put( key, (termVals.get( key ) - maxNeg) / max );
+      }
+   }
+
+   @Override
+   public List<Feature> extract( JCas view, IdentifiedAnnotation mention )
+         throws CleartkExtractorException {
+      ArrayList<Feature> feats = new ArrayList<>();
+
+      final List<BaseToken> tokens = _baseTokens;
+
+      int startIndex = -1;
+      int endIndex = -1;
+
+      for ( int i = 0; i < tokens.size(); i++ ) {
+         if ( tokens.get( i ).getBegin() == mention.getBegin() ) {
+            startIndex = i;
+         }
+         if ( tokens.get( i ).getEnd() == mention.getEnd() ) {
+            endIndex = i;
+         }
+      }
+
+      double score = 0.0;
+      double z = 0.0;
+      String key = null;
+      double weight;
+      for ( int i = 0; i < tokens.size(); i++ ) {
+         key = tokens.get( i ).getCoveredText().toLowerCase();
+         int dist = Math.min( Math.abs( startIndex - i ), Math.abs( endIndex - i ) );
+         weight = weightFunction( dist );
+         z += weight;
+         if ( termVals.containsKey( key ) ) {
+            score += (weight * termVals.get( key ));
+         }
+      }
+
+      score /= z;  // weight by actual amount of context so we don't penalize begin/end of sentence.
+      feats.add( new Feature( "WORD_SCORE", score ) );
+      return feats;
+   }
+
+   private static final double weightFunction( int dist ) {
+      if ( dist >= weights.length ) {
+         return 0.0;
+      }
+
+      // quick decay
+//		return 1.0 / dist;
+
+      // linear decay
+//		return 1.0 - dist * (1.0/50.0);
+
+      // no decay:
+      return 1.0;
+   }
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedDependencyWordsFragmentExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedDependencyWordsFragmentExtractor.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedDependencyWordsFragmentExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedDependencyWordsFragmentExtractor.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,59 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+import org.apache.ctakes.assertion.pipelines.GenerateDependencyRepresentation;
+import org.apache.ctakes.assertion.util.AssertionDepUtils;
+import org.apache.ctakes.assertion.util.AssertionTreeUtils;
+import org.apache.ctakes.constituency.parser.util.TreeUtils;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.util.CleartkInitializationException;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedDependencyWordsFragmentExtractor extends AbstractTreeFragmentFeatureExtractor1 {
+
+   public WindowedDependencyWordsFragmentExtractor( String prefix, String fragsPath )
+         throws CleartkInitializationException {
+      super( prefix, fragsPath );
+   }
+
+   @Override
+   public List<Feature> extract( JCas jCas, IdentifiedAnnotation mention ) {
+      List<Feature> features = new ArrayList<Feature>();
+
+//      List<Sentence> sents = JCasUtil.selectCovering(jCas, Sentence.class, mention.getBegin(), mention.getEnd());
+//      if(sents != null && sents.size() > 0){
+
+      Sentence sent = _sentence;
+      List<ConllDependencyNode> nodes = JCasUtil.selectCovered( ConllDependencyNode.class, sent );
+
+      SimpleTree tree
+            = AssertionDepUtils.getTokenTreeString( jCas, nodes, mention, GenerateDependencyRepresentation.UP_NODES );
+      if ( tree == null ) {
+         System.err.println( "Tree is null!" );
+      } else {
+         AssertionTreeUtils.replaceDependencyWordsWithSemanticClasses( tree, sems );
+         for ( SimpleTree frag : frags ) {
+            if ( TreeUtils.containsDepFragIgnoreCase( tree, frag ) ) {
+               features.add( new Feature( "TreeFrag_" + prefix, frag.toString() ) );
+            }
+         }
+      }
+
+//      }
+      return features;
+   }
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedGenericFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedGenericFeaturesExtractor.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedGenericFeaturesExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedGenericFeaturesExtractor.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,54 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+import org.apache.ctakes.assertion.medfacts.cleartk.windowed.classifier.WindowedGenericAttributeClassifier;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+
+import java.util.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedGenericFeaturesExtractor extends AbstractWindowedFeatureExtractor1<IdentifiedAnnotation> {
+
+
+   @Override
+   public List<Feature> extract( JCas jCas, IdentifiedAnnotation arg ) {
+
+      List<Feature> features = new ArrayList<>();
+
+      // Pull in general dependency-based features -- externalize to another extractor?
+      ConllDependencyNode node = DependencyUtility.getNominalHeadNode( jCas, arg );
+      if ( node != null ) {
+         features.add( new Feature( "DEPENDENCY_HEAD", node.getCoveredText() ) );
+         features.add( new Feature( "DEPENDENCY_HEAD_deprel", node.getDeprel() ) );
+      }
+
+      HashMap<String, Boolean> featsMap = WindowedGenericAttributeClassifier.extract( jCas, _sentence, arg );
+
+      // Pull in all the features that were used for the rule-based module
+      features.addAll( hashToFeatureList( featsMap ) );
+      // Pull in the result of the rule-based module as well
+      features.add( new Feature( "GENERIC_CLASSIFIER_LOGIC", WindowedGenericAttributeClassifier.classifyWithLogic( featsMap ) ) );
+
+
+      return features;
+   }
+
+   private static Collection<? extends Feature> hashToFeatureList(
+         HashMap<String, Boolean> featsIn ) {
+
+      Collection<Feature> featsOut = new HashSet<>();
+      for ( String featName : featsIn.keySet() ) {
+         featsOut.add( new Feature( featName, featsIn.get( featName ) ) );
+      }
+
+      return featsOut;
+   }
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedHistoryFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedHistoryFeatureExtractor.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedHistoryFeatureExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedHistoryFeatureExtractor.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,62 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+import org.apache.ctakes.assertion.medfacts.cleartk.windowed.classifier.WindowedHistoryAttributeClassifier;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+
+import java.util.*;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedHistoryFeatureExtractor extends AbstractWindowedFeatureExtractor1<IdentifiedAnnotation> {
+
+
+   @Override
+   public List<Feature> extract( JCas jCas, IdentifiedAnnotation arg ) {
+
+      List<Feature> features = new ArrayList<>();
+
+      // Pull in general dependency-based features -- externalize to another extractor?
+      ConllDependencyNode node = DependencyUtility.getNominalHeadNode( jCas, arg );
+      if ( node != null ) {
+//	      features.add(new Feature("DEPENDENCY_HEAD", node));
+         features.add( new Feature( "DEPENDENCY_HEAD_word", node.getCoveredText() ) );
+//	    	features.add(new Feature("DEPENDENCY_HEAD_pos", node.getPostag()));
+         features.add( new Feature( "DEPENDENCY_HEAD_deprel", node.getDeprel() ) );
+//	    	features.add(new Feature("DEPENDENCY_HEAD_lemma", node.getLemma()));
+      }
+
+      HashMap<String, Boolean> featsMap
+            = WindowedHistoryAttributeClassifier.extract( jCas, _sentences, _sentence, _sentenceIndex, arg );
+
+      // Pull in all the features that were used for the rule-based module
+      features.addAll( hashToFeatureList( featsMap ) );
+
+      // Pull in the result of the rule-based module as well
+      features.add( new Feature( "HISTORY_CLASSIFIER_LOGIC", WindowedHistoryAttributeClassifier.classifyWithLogic( featsMap ) ) );
+
+      // Add whether it is token preceded by "h/o"
+      //features.add(new Feature("PRECEDED_BY_H_SLASH_O", HistoryAttributeClassifier.precededByH_O(jCas, arg)));
+
+      return features;
+   }
+
+   private static Collection<? extends Feature> hashToFeatureList(
+         HashMap<String, Boolean> featsIn ) {
+
+      Collection<Feature> featsOut = new HashSet<>();
+      for ( String featName : featsIn.keySet() ) {
+         featsOut.add( new Feature( featName, featsIn.get( featName ) ) );
+      }
+
+      return featsOut;
+   }
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedNegationDependencyFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedNegationDependencyFeatureExtractor.java?rev=1850705&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedNegationDependencyFeatureExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/windowed/context/feature/extractor/WindowedNegationDependencyFeatureExtractor.java Tue Jan  8 03:45:51 2019
@@ -0,0 +1,51 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.windowed.context.feature.extractor;
+
+import org.apache.ctakes.assertion.util.NegationManualDepContextAnalyzer;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/26/2018
+ */
+public class WindowedNegationDependencyFeatureExtractor extends AbstractWindowedFeatureExtractor1<IdentifiedAnnotation> {
+
+   NegationManualDepContextAnalyzer conAnal = null;
+
+   public WindowedNegationDependencyFeatureExtractor(){
+      conAnal = new NegationManualDepContextAnalyzer();
+   }
+
+   @Override
+   public List<Feature> extract( JCas jcas, IdentifiedAnnotation focusAnnotation)
+         throws CleartkExtractorException {
+      List<Feature> feats = new ArrayList<>();
+      Sentence sent = _sentence;
+
+      List<ConllDependencyNode> nodes = DependencyUtility.getDependencyNodes(jcas, sent);
+      ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jcas, focusAnnotation);
+      try {
+         boolean[] regexFeats = conAnal.findNegationContext(nodes, headNode);
+         for(int j = 0; j < regexFeats.length; j++){
+            if(regexFeats[j]){
+               feats.add(new Feature("DepPath_" + conAnal.getRegexName(j))); //"NEG_DEP_REGEX_"+j));
+            }
+         }
+      } catch (Exception e) {
+         e.printStackTrace();
+         throw new CleartkExtractorException(e);
+      }
+      return feats;
+   }
+
+}