You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2015/11/20 23:04:09 UTC

svn commit: r1715429 - /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/

Author: tmill
Date: Fri Nov 20 22:04:09 2015
New Revision: 1715429

URL: http://svn.apache.org/viewvc?rev=1715429&view=rev
Log:
Made a bunch of feature extractors both relation and mention extractors to separate out mention and pair features.

Added:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java
Removed:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeAgreementFeaturesExtractor.java
Modified:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSectionFeaturesExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSemTypeDepPrefsFeatureExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java Fri Nov 20 22:04:09 2015
@@ -3,57 +3,38 @@ package org.apache.ctakes.coreference.ae
 import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.getGender;
 import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.isDefinite;
 import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.isDemonstrative;
-import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.isNegated;
 import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.numberSingular;
 
 import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.ctakes.core.util.ListIterable;
-import org.apache.ctakes.dependency.parser.util.DependencyUtility;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
-import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
-import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
-import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
-import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
-import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
 import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
 
-public class MentionClusterAgreementFeaturesExtractor implements RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation> {
+public class MentionClusterAgreementFeaturesExtractor implements RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>, FeatureExtractor1<Markable> {
 
   public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
       IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
     List<Feature> features = new ArrayList<>();
     
-    ConllDependencyNode mentionHead = DependencyUtility.getNominalHeadNode(jCas, mention);
-    
     String s = mention.getCoveredText().toLowerCase();
     boolean isDem = isDemonstrative(s);
     boolean isDef = isDefinite(s);
-    features.add(new Feature("MC_MENTION_DEM", isDem));
-    features.add(new Feature("MC_MENTION_DEF", isDef));
-    
     String gender = getGender(s);
-    features.add(new Feature("MC_MENTION_GENDER", gender));
-
     boolean singular = numberSingular(jCas, mention, s);
-    features.add(new Feature("MC_MENTION_NUMBER", singular));
-        
-    boolean mentionTimex = isTimex(mention);
-    features.add(new Feature("MC_MENTION_TIMEX", mentionTimex));
-    
+
     boolean matchDem = false;
     boolean matchDef = false;
     boolean matchGender = false;
     boolean matchNumber = false;
-    boolean clusterTimex = false;  // if any cluster member is timex
     
     for(IdentifiedAnnotation member : new ListIterable<IdentifiedAnnotation>(cluster.getMembers())){
       if(member == null){
@@ -77,76 +58,33 @@ public class MentionClusterAgreementFeat
       if(!matchNumber && numberSingular(jCas, member, m) == singular){
         matchNumber = true;
       }
-      if(isTimex(member)){
-        clusterTimex = true;
-      }
     }
     
     features.add(new Feature("MC_AGREE_DEM", matchDem));
     features.add(new Feature("MC_AGREE_DEF", matchDef));
     features.add(new Feature("MC_AGREE_GEN", matchGender));
     features.add(new Feature("MC_AGREE_NUM", matchNumber));
-    features.add(new Feature("MC_AGREE_TIMEX", clusterTimex == mentionTimex));
-    
-    /// check attributes like location/degree/negation/uncertainty
-    /*
-    Set<String> mentionSites = new HashSet<>();
     
+    return features;
+  }
+
+  @Override
+  public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException {
+    List<Feature> features = new ArrayList<>();
+
+    String s = mention.getCoveredText().toLowerCase();
+
+    boolean isDem = isDemonstrative(s);
+    boolean isDef = isDefinite(s);
+    features.add(new Feature("MC_MENTION_DEM", isDem));
+    features.add(new Feature("MC_MENTION_DEF", isDef));
     
-    if(mentionHead != null){
-      for(IdentifiedAnnotation annot : JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, mentionHead)){
-        LocationOfTextRelation rel = getLocation(annot);
-        if(rel != null){
-          AnatomicalSiteMention site = (AnatomicalSiteMention)rel.getArg2().getArgument();
-          for(UmlsConcept concept : JCasUtil.select(site.getOntologyConceptArr(), UmlsConcept.class)){
-            mentionSites.add(concept.getCui());
-          }
-        }
-      }
-    }
+    String gender = getGender(s);
+    features.add(new Feature("MC_MENTION_GENDER", gender));
+
+    boolean singular = numberSingular(jCas, mention, s);
+    features.add(new Feature("MC_MENTION_NUMBER", singular));
 
-    if(mentionSites.size() > 0){
-      Set<String> memberSites = new HashSet<>();
-      for(Markable member : JCasUtil.select(cluster.getMembers(), Markable.class)){
-        if(mention.getBegin() <= member.getBegin()) break;
-        ConllDependencyNode memberHead = DependencyUtility.getNominalHeadNode(jCas, member);
-        if(memberHead == null) continue;
-        
-        for(IdentifiedAnnotation annot : JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, memberHead)){
-          LocationOfTextRelation rel = getLocation(annot);
-          if(rel != null){
-            boolean conflict = true;
-            AnatomicalSiteMention site = (AnatomicalSiteMention)rel.getArg2().getArgument();
-            for(UmlsConcept concept : JCasUtil.select(site.getOntologyConceptArr(), UmlsConcept.class)){
-              memberSites.add(concept.getCui());
-              if(mentionSites.contains(concept.getCui())){
-                conflict = false;
-              }
-            }
-            if(conflict){
-              features.add(new Feature("MC_LOCATION_CONFLICT", true));
-            }
-          }
-        }
-      }
-    }
-    */
     return features;
   }
-  
-  private LocationOfTextRelation getLocation(IdentifiedAnnotation annot){
-    LocationOfTextRelation rel = null;
-    if(annot instanceof ProcedureMention){
-      rel = ((ProcedureMention)annot).getBodyLocation();
-    }else if(annot instanceof DiseaseDisorderMention){
-      rel = ((DiseaseDisorderMention)annot).getBodyLocation();
-    }else if(annot instanceof SignSymptomMention){
-      rel = ((SignSymptomMention)annot).getBodyLocation();
-    }
-    return rel;
-  }
-  
-  private boolean isTimex(Annotation a){
-    return JCasUtil.selectCovered(TimeMention.class, a).size() > 0;
-  }
 }

Added: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java?rev=1715429&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java (added)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java Fri Nov 20 22:04:09 2015
@@ -0,0 +1,137 @@
+package org.apache.ctakes.coreference.ae.features.cluster;
+
+import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.*;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.core.util.ListIterable;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
+import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
+import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
+import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+
+public class MentionClusterAttributeFeaturesExtractor implements
+    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
+
+  @Override
+  public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
+      IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
+    List<Feature> features = new ArrayList<>();
+    
+    boolean mentionNegated = isNegated(mention);
+    boolean mentionUnc = isUncertain(mention);
+    boolean mentionTimex = isTimex(mention);
+
+    boolean matchNeg = true;
+    boolean clusterTimex = false;  // if any cluster member is timex
+    boolean matchUnc = true;
+    
+    for(IdentifiedAnnotation member : new ListIterable<IdentifiedAnnotation>(cluster.getMembers())){
+      if(mentionNegated != isNegated(member)){
+        matchNeg = false;
+      }
+      if(mentionUnc != isUncertain(member)){
+        matchUnc = false;
+      }
+      if(isTimex(member)){
+        clusterTimex = true;
+      }
+    }
+    
+    features.add(new Feature("MC_AGREE_NEG", matchNeg));
+    features.add(new Feature("MC_AGREE_UNC", matchUnc));
+    
+    features.add(new Feature("MC_AGREE_TIMEX", clusterTimex == mentionTimex));
+
+    /// check attributes like location/degree/negation/uncertainty
+    /*
+    Set<String> mentionSites = new HashSet<>();
+    
+    
+    if(mentionHead != null){
+      for(IdentifiedAnnotation annot : JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, mentionHead)){
+        LocationOfTextRelation rel = getLocation(annot);
+        if(rel != null){
+          AnatomicalSiteMention site = (AnatomicalSiteMention)rel.getArg2().getArgument();
+          for(UmlsConcept concept : JCasUtil.select(site.getOntologyConceptArr(), UmlsConcept.class)){
+            mentionSites.add(concept.getCui());
+          }
+        }
+      }
+    }
+
+    if(mentionSites.size() > 0){
+      Set<String> memberSites = new HashSet<>();
+      for(Markable member : JCasUtil.select(cluster.getMembers(), Markable.class)){
+        if(mention.getBegin() <= member.getBegin()) break;
+        ConllDependencyNode memberHead = DependencyUtility.getNominalHeadNode(jCas, member);
+        if(memberHead == null) continue;
+        
+        for(IdentifiedAnnotation annot : JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, memberHead)){
+          LocationOfTextRelation rel = getLocation(annot);
+          if(rel != null){
+            boolean conflict = true;
+            AnatomicalSiteMention site = (AnatomicalSiteMention)rel.getArg2().getArgument();
+            for(UmlsConcept concept : JCasUtil.select(site.getOntologyConceptArr(), UmlsConcept.class)){
+              memberSites.add(concept.getCui());
+              if(mentionSites.contains(concept.getCui())){
+                conflict = false;
+              }
+            }
+            if(conflict){
+              features.add(new Feature("MC_LOCATION_CONFLICT", true));
+            }
+          }
+        }
+      }
+    }
+    */
+    return features;
+  }
+
+  @Override
+  public List<Feature> extract(JCas view, Markable mention)
+      throws CleartkExtractorException {
+    List<Feature> features = new ArrayList<>();
+    
+    boolean mentionNegated = isNegated(mention);
+    features.add(new Feature("MC_MENTION_NEGATED", mentionNegated));
+    
+    boolean mentionUncertain = isUncertain(mention);
+    features.add(new Feature("MC_MENTION_UNCERTAIN", mentionUncertain));
+    
+    boolean mentionTimex = isTimex(mention);
+    features.add(new Feature("MC_MENTION_TIMEX", mentionTimex));
+
+    return features;
+  }
+  
+  private boolean isTimex(Annotation a){
+    return JCasUtil.selectCovered(TimeMention.class, a).size() > 0;
+  }
+  
+  private LocationOfTextRelation getLocation(IdentifiedAnnotation annot){
+    LocationOfTextRelation rel = null;
+    if(annot instanceof ProcedureMention){
+      rel = ((ProcedureMention)annot).getBodyLocation();
+    }else if(annot instanceof DiseaseDisorderMention){
+      rel = ((DiseaseDisorderMention)annot).getBodyLocation();
+    }else if(annot instanceof SignSymptomMention){
+      rel = ((SignSymptomMention)annot).getBodyLocation();
+    }
+    return rel;
+  }
+}

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java Fri Nov 20 22:04:09 2015
@@ -17,9 +17,11 @@ import org.apache.ctakes.typesystem.type
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
 import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
 
 public class MentionClusterDepHeadExtractor implements
-    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> {
+    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
 
   @Override
   public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
@@ -40,16 +42,16 @@ public class MentionClusterDepHeadExtrac
       }
 //      DependencyPath path = DependencyUtility.getPathToTop(jCas, memberHead);
     }
-    for(String headWord : memberHeads){
+//    for(String headWord : memberHeads){
 //      feats.add(new Feature("MemberHead", headWord));
-    }
+//    }
 //    for(String path : memberPaths){
 //      feats.add(new Feature("MemberRel", path));
 //    }
     
     if(mentionHead != null){
       String headWord = mentionHead.getCoveredText().toLowerCase();
-      feats.add(new Feature("MentionRel", mentionHead.getDeprel()));
+//      feats.add(new Feature("MentionRel", mentionHead.getDeprel()));
 //      feats.add(new Feature("MentionHead", headWord));
       if(memberHeads.contains(headWord) && !StringMatchingFeatureExtractor.isPronoun(mention)){
         feats.add(new Feature("ClusterHeadMatchesMentionHead", true));
@@ -57,6 +59,18 @@ public class MentionClusterDepHeadExtrac
     }
     
     return feats;
+  }
+
+  @Override
+  public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException {
+    List<Feature> feats = new ArrayList<>();
+    ConllDependencyNode mentionHead = DependencyUtility.getNominalHeadNode(jCas, mention);
+
+    if(mentionHead != null){
+      feats.add(new Feature("MentionRel", mentionHead.getDeprel()));
+    }
+    
+    return feats;
   }
 
 }

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java Fri Nov 20 22:04:09 2015
@@ -70,8 +70,6 @@ public class MentionClusterMentionFeatur
     
     feats.addAll(DependencyTreeFeaturesExtractor.extractForNode(view, focusAnnotation, "dep"));
     
-    feats.add(new Feature("MC_MENTION_SALIENCE", focusAnnotation.getConfidence()));
-
     return feats;
   }
 

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java Fri Nov 20 22:04:09 2015
@@ -11,9 +11,11 @@ import org.apache.ctakes.typesystem.type
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
 import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
 
 public class MentionClusterSalienceFeaturesExtractor implements
-    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> {
+    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
 
   @Override
   public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
@@ -52,4 +54,13 @@ public class MentionClusterSalienceFeatu
     return feats;
   }
 
+  @Override
+  public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException {
+    List<Feature> feats = new ArrayList<>();
+    
+    feats.add(new Feature("MC_MENTION_SALIENCE", mention.getConfidence()));
+
+    return feats;
+  }
+
 }

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSectionFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSectionFeaturesExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSectionFeaturesExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSectionFeaturesExtractor.java Fri Nov 20 22:04:09 2015
@@ -16,9 +16,11 @@ import org.apache.uima.analysis_engine.A
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
 
 public class MentionClusterSectionFeaturesExtractor implements
-    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> {
+    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
 
   @Override
   public List<Feature> extract(JCas jcas, CollectionTextRelation cluster,
@@ -60,12 +62,42 @@ public class MentionClusterSectionFeatur
     }
 
     feats.add(new Feature("AnteInHeader", parsWithAnteHeader.size() > 0));
-    feats.add(new Feature("AnaInHeader", anaInHeader));
     if(anteInHeader && parsWithAnteHeader.contains(anaPar-1)){
       feats.add(new Feature("AnteHeaderHeadsAna", true));      
     }
 
     return feats;
   }
+
+  @Override
+  public List<Feature> extract(JCas jcas, Markable mention) throws CleartkExtractorException {
+    List<Feature> feats = new ArrayList<>();
+    
+    boolean anaInHeader = false;
+    int anaPar = -1;
+
+    // Find section headers -- paragraphs 
+    List<Paragraph> pars = new ArrayList<>(JCasUtil.select(jcas, Paragraph.class));
+    for(int i = 0; i < pars.size(); i++){
+      Paragraph par = pars.get(i);
+      if(par.getBegin() > mention.getEnd()){
+        break;
+      }
+      // find the paragraph with the anaphor
+      if(mention.getBegin() >= par.getBegin() && mention.getEnd() <= par.getEnd()){
+        anaPar = i;
+      }
+      List<Sentence> coveredSents = JCasUtil.selectCovered(jcas, Sentence.class, par);
+      if(coveredSents != null && coveredSents.size() == 1){
+        if(anaPar == i){
+          anaInHeader = true;
+          break;
+        }
+      }
+    }
+    feats.add(new Feature("AnaInHeader", anaInHeader));
+
+    return feats;
+  }
 
 }

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSemTypeDepPrefsFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSemTypeDepPrefsFeatureExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSemTypeDepPrefsFeatureExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSemTypeDepPrefsFeatureExtractor.java Fri Nov 20 22:04:09 2015
@@ -52,6 +52,7 @@ public class MentionClusterSemTypeDepPre
       if(semProbs == null) return feats;
 
       for(Markable m : JCasUtil.select(cluster.getMembers(), Markable.class)){
+        // FIXME -- I think this is a bug -- shoud be >
         if(mention.getBegin() < m.getEnd()){
           // during training this might happen -- see a member of a cluster that
           // is actually subsequent to the candidate mention

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java Fri Nov 20 22:04:09 2015
@@ -24,9 +24,11 @@ import org.apache.uima.fit.util.JCasUtil
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
 import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
 
 public class MentionClusterUMLSFeatureExtractor implements
-    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> {
+    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
 
   String docId = null;
   Map<ConllDependencyNode,Collection<IdentifiedAnnotation>> coveringMap = null;
@@ -105,7 +107,7 @@ public class MentionClusterUMLSFeatureEx
         for(IdentifiedAnnotation ent2 : mentionEnts){
           HashSet<String> a2Tuis = new HashSet<>();
           String a2SemType = ent2.getClass().getSimpleName();
-          trueFeats.add("MentionSemType" + a2SemType);
+//          trueFeats.add("MentionSemType" + a2SemType);
           if(alias(ent1, ent2)){
             trueFeats.add("UMLS_ALIAS");
             break;
@@ -142,4 +144,44 @@ public class MentionClusterUMLSFeatureEx
     return feats;
   }
 
+  @Override
+  public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException {
+    List<Feature> feats = new ArrayList<>();
+    Set<String> trueFeats = new HashSet<>();
+    
+    try{
+      if(docId == null || !getDocId(jCas).equals(docId)){
+        docId = getDocId(jCas);
+        coveringMap = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
+      }
+    }catch(AnalysisEngineProcessException e){
+      throw new CleartkExtractorException(e);
+    }
+    
+    ConllDependencyNode head = DependencyUtility.getNominalHeadNode(jCas, mention);
+    
+    List<IdentifiedAnnotation> rmList = new ArrayList<>();
+    // get the entities covering this markable:
+    List<IdentifiedAnnotation> mentionEnts = new ArrayList<>(coveringMap.get(head)); //JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, head1.getBegin(), head1.getEnd());'
+    for(IdentifiedAnnotation ann : mentionEnts){
+      if(!(ann instanceof EntityMention || ann instanceof EventMention)){
+        rmList.add(ann);
+      }
+    }
+    for(IdentifiedAnnotation toRm : rmList){
+      mentionEnts.remove(toRm);
+    }
+
+    for(IdentifiedAnnotation ent : mentionEnts){
+      String a2SemType = ent.getClass().getSimpleName();
+      trueFeats.add("MentionSemType" + a2SemType);
+    }
+    
+    for(String feat : trueFeats){
+      feats.add(new Feature(feat, true));
+    }
+
+    return feats;
+  }
+
 }