You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/06/21 20:10:51 UTC

svn commit: r1495529 - /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java

Author: tmill
Date: Fri Jun 21 18:10:51 2013
New Revision: 1495529

URL: http://svn.apache.org/r1495529
Log:
Some new features in all assertion engines.

Modified:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java?rev=1495529&r1=1495528&r2=1495529&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java Fri Jun 21 18:10:51 2013
@@ -32,6 +32,7 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -48,8 +49,6 @@ import org.cleartk.classifier.feature.ex
 import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
 import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
 import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
-import org.cleartk.classifier.feature.function.FeatureFunctionExtractor;
-import org.cleartk.classifier.feature.function.LowerCaseFeatureFunction;
 import org.uimafit.descriptor.ConfigurationParameter;
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.ConfigurationParameterFactory;
@@ -116,7 +115,6 @@ public abstract class AssertionCleartkAn
   protected List<CleartkExtractor> tokenContextFeatureExtractors;
   protected List<CleartkExtractor> tokenCleartkExtractors;
   protected List<SimpleFeatureExtractor> entityFeatureExtractors;
-
   protected CleartkExtractor cuePhraseInWindowExtractor;
   
   @Override
@@ -190,14 +188,17 @@ public abstract class AssertionCleartkAn
           );
     
     cuePhraseInWindowExtractor = new CleartkExtractor(
-          AssertionCuePhraseAnnotation.class,
-          baseExtractorCuePhraseCategory,
-          new CleartkExtractor.Bag(new CleartkExtractor.Preceding(3)),
-          new CleartkExtractor.Bag(new CleartkExtractor.Following(3)),
-          new CleartkExtractor.Bag(new CleartkExtractor.Preceding(5)),
-          new CleartkExtractor.Bag(new CleartkExtractor.Following(5)),
-          new CleartkExtractor.Bag(new CleartkExtractor.Preceding(10)),
-          new CleartkExtractor.Bag(new CleartkExtractor.Following(10))
+        BaseToken.class,
+        new CoveredTextExtractor(),
+        new CleartkExtractor.Bag(new CleartkExtractor.Covered())
+//          AssertionCuePhraseAnnotation.class,
+//          baseExtractorCuePhraseCategory,
+//          new CleartkExtractor.Bag(new CleartkExtractor.Preceding(3)),
+//          new CleartkExtractor.Bag(new CleartkExtractor.Following(3)),
+//          new CleartkExtractor.Bag(new CleartkExtractor.Preceding(5)),
+//          new CleartkExtractor.Bag(new CleartkExtractor.Following(5)),
+//          new CleartkExtractor.Bag(new CleartkExtractor.Preceding(10)),
+//          new CleartkExtractor.Bag(new CleartkExtractor.Following(10))
           );
     
   }
@@ -245,8 +246,10 @@ public abstract class AssertionCleartkAn
 
     Map<IdentifiedAnnotation, Collection<Zone>> coveringZoneMap =
         JCasUtil.indexCovering(jCas, IdentifiedAnnotation.class, Zone.class);
+//    Map<IdentifiedAnnotation, Collection<Sentence>> coveringSents =
+//        JCasUtil.indexCovering(jCas, IdentifiedAnnotation.class, Sentence.class);
     
-    List<Instance<String>> instances = new ArrayList<Instance<String>>();
+//    List<Instance<String>> instances = new ArrayList<Instance<String>>();
     // generate a list of training instances for each sentence in the document
     Collection<IdentifiedAnnotation> entities = JCasUtil.select(identifiedAnnotationView, IdentifiedAnnotation.class);
     for (IdentifiedAnnotation identifiedAnnotation : entities)
@@ -296,17 +299,38 @@ public abstract class AssertionCleartkAn
     	  instance.addAll(extractor.extract(identifiedAnnotationView, entityOrEventMention));
         }
       
-      List<Feature> cuePhraseFeatures = null;
+//      List<Feature> cuePhraseFeatures = null;
 //          cuePhraseInWindowExtractor.extract(jCas, entityOrEventMention);
           //cuePhraseInWindowExtractor.extractWithin(jCas, entityMention, firstCoveringSentence);
-      
-      if (cuePhraseFeatures != null && !cuePhraseFeatures.isEmpty())
-      {
-        instance.addAll(cuePhraseFeatures);
+//      List<Sentence> sents = new ArrayList<Sentence>(coveringSents.get(entityOrEventMention));
+      List<Sentence> sents = new ArrayList<Sentence>(JCasUtil.selectCovering(jCas, Sentence.class, entityOrEventMention.getBegin(), entityOrEventMention.getEnd()));
+      if(sents.size() > 0){
+        Sentence sentence = sents.get(0);
+        List<AssertionCuePhraseAnnotation> cues = JCasUtil.selectCovered(AssertionCuePhraseAnnotation.class, sentence);
+        int closest = Integer.MAX_VALUE;
+        AssertionCuePhraseAnnotation closestCue = null;
+        for(AssertionCuePhraseAnnotation cue : cues){
+          List<BaseToken> tokens = JCasUtil.selectBetween(BaseToken.class, cue, entityOrEventMention);
+          if(tokens.size() < closest){
+            closestCue = cue;
+            closest = tokens.size();
+          }
+//          instance.addAll(cuePhraseInWindowExtractor.extractBetween(jCas, cue, entityOrEventMention));
+        }
+        if(closestCue != null){
+          instance.add(new Feature("ClosestCue_Word", closestCue.getCoveredText()));
+//          instance.add(new Feature("ClosestCue_Phrase", closestCue.getCuePhrase()));
+          instance.add(new Feature("ClosestCue_PhraseFamily", closestCue.getCuePhraseAssertionFamily()));
+          instance.add(new Feature("ClosestCue_PhraseCategory", closestCue.getCuePhraseCategory()));
+        }
       }
+//      if (cuePhraseFeatures != null && !cuePhraseFeatures.isEmpty())
+//      {
+//        instance.addAll(cuePhraseFeatures);
+//      }
 
 
-        
+      instance.add(new Feature("ENTITY_TYPE", entityOrEventMention.getTypeID()));
       
       for (SimpleFeatureExtractor extractor : this.entityFeatureExtractors) {
         instance.addAll(extractor.extract(jCas, entityOrEventMention));