You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2015/11/20 23:04:09 UTC
svn commit: r1715429 -
/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/
Author: tmill
Date: Fri Nov 20 22:04:09 2015
New Revision: 1715429
URL: http://svn.apache.org/viewvc?rev=1715429&view=rev
Log:
Made a bunch of feature extractors both relation and mention extractors to separate out mention and pair features.
Added:
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java
Removed:
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeAgreementFeaturesExtractor.java
Modified:
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSectionFeaturesExtractor.java
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSemTypeDepPrefsFeatureExtractor.java
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java
Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java Fri Nov 20 22:04:09 2015
@@ -3,57 +3,38 @@ package org.apache.ctakes.coreference.ae
import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.getGender;
import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.isDefinite;
import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.isDemonstrative;
-import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.isNegated;
import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.numberSingular;
import java.util.ArrayList;
import java.util.List;
import org.apache.ctakes.core.util.ListIterable;
-import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
-import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
-import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
-import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
-import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
-import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
-public class MentionClusterAgreementFeaturesExtractor implements RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation> {
+public class MentionClusterAgreementFeaturesExtractor implements RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>, FeatureExtractor1<Markable> {
public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
List<Feature> features = new ArrayList<>();
- ConllDependencyNode mentionHead = DependencyUtility.getNominalHeadNode(jCas, mention);
-
String s = mention.getCoveredText().toLowerCase();
boolean isDem = isDemonstrative(s);
boolean isDef = isDefinite(s);
- features.add(new Feature("MC_MENTION_DEM", isDem));
- features.add(new Feature("MC_MENTION_DEF", isDef));
-
String gender = getGender(s);
- features.add(new Feature("MC_MENTION_GENDER", gender));
-
boolean singular = numberSingular(jCas, mention, s);
- features.add(new Feature("MC_MENTION_NUMBER", singular));
-
- boolean mentionTimex = isTimex(mention);
- features.add(new Feature("MC_MENTION_TIMEX", mentionTimex));
-
+
boolean matchDem = false;
boolean matchDef = false;
boolean matchGender = false;
boolean matchNumber = false;
- boolean clusterTimex = false; // if any cluster member is timex
for(IdentifiedAnnotation member : new ListIterable<IdentifiedAnnotation>(cluster.getMembers())){
if(member == null){
@@ -77,76 +58,33 @@ public class MentionClusterAgreementFeat
if(!matchNumber && numberSingular(jCas, member, m) == singular){
matchNumber = true;
}
- if(isTimex(member)){
- clusterTimex = true;
- }
}
features.add(new Feature("MC_AGREE_DEM", matchDem));
features.add(new Feature("MC_AGREE_DEF", matchDef));
features.add(new Feature("MC_AGREE_GEN", matchGender));
features.add(new Feature("MC_AGREE_NUM", matchNumber));
- features.add(new Feature("MC_AGREE_TIMEX", clusterTimex == mentionTimex));
-
- /// check attributes like location/degree/negation/uncertainty
- /*
- Set<String> mentionSites = new HashSet<>();
+ return features;
+ }
+
+ @Override
+ public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException {
+ List<Feature> features = new ArrayList<>();
+
+ String s = mention.getCoveredText().toLowerCase();
+
+ boolean isDem = isDemonstrative(s);
+ boolean isDef = isDefinite(s);
+ features.add(new Feature("MC_MENTION_DEM", isDem));
+ features.add(new Feature("MC_MENTION_DEF", isDef));
- if(mentionHead != null){
- for(IdentifiedAnnotation annot : JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, mentionHead)){
- LocationOfTextRelation rel = getLocation(annot);
- if(rel != null){
- AnatomicalSiteMention site = (AnatomicalSiteMention)rel.getArg2().getArgument();
- for(UmlsConcept concept : JCasUtil.select(site.getOntologyConceptArr(), UmlsConcept.class)){
- mentionSites.add(concept.getCui());
- }
- }
- }
- }
+ String gender = getGender(s);
+ features.add(new Feature("MC_MENTION_GENDER", gender));
+
+ boolean singular = numberSingular(jCas, mention, s);
+ features.add(new Feature("MC_MENTION_NUMBER", singular));
- if(mentionSites.size() > 0){
- Set<String> memberSites = new HashSet<>();
- for(Markable member : JCasUtil.select(cluster.getMembers(), Markable.class)){
- if(mention.getBegin() <= member.getBegin()) break;
- ConllDependencyNode memberHead = DependencyUtility.getNominalHeadNode(jCas, member);
- if(memberHead == null) continue;
-
- for(IdentifiedAnnotation annot : JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, memberHead)){
- LocationOfTextRelation rel = getLocation(annot);
- if(rel != null){
- boolean conflict = true;
- AnatomicalSiteMention site = (AnatomicalSiteMention)rel.getArg2().getArgument();
- for(UmlsConcept concept : JCasUtil.select(site.getOntologyConceptArr(), UmlsConcept.class)){
- memberSites.add(concept.getCui());
- if(mentionSites.contains(concept.getCui())){
- conflict = false;
- }
- }
- if(conflict){
- features.add(new Feature("MC_LOCATION_CONFLICT", true));
- }
- }
- }
- }
- }
- */
return features;
}
-
- private LocationOfTextRelation getLocation(IdentifiedAnnotation annot){
- LocationOfTextRelation rel = null;
- if(annot instanceof ProcedureMention){
- rel = ((ProcedureMention)annot).getBodyLocation();
- }else if(annot instanceof DiseaseDisorderMention){
- rel = ((DiseaseDisorderMention)annot).getBodyLocation();
- }else if(annot instanceof SignSymptomMention){
- rel = ((SignSymptomMention)annot).getBodyLocation();
- }
- return rel;
- }
-
- private boolean isTimex(Annotation a){
- return JCasUtil.selectCovered(TimeMention.class, a).size() > 0;
- }
}
Added: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java?rev=1715429&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java (added)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java Fri Nov 20 22:04:09 2015
@@ -0,0 +1,137 @@
+package org.apache.ctakes.coreference.ae.features.cluster;
+
+import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.*;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.core.util.ListIterable;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
+import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
+import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
+import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+
+public class MentionClusterAttributeFeaturesExtractor implements
+ RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
+
+ @Override
+ public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
+ IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
+ List<Feature> features = new ArrayList<>();
+
+ boolean mentionNegated = isNegated(mention);
+ boolean mentionUnc = isUncertain(mention);
+ boolean mentionTimex = isTimex(mention);
+
+ boolean matchNeg = true;
+ boolean clusterTimex = false; // if any cluster member is timex
+ boolean matchUnc = true;
+
+ for(IdentifiedAnnotation member : new ListIterable<IdentifiedAnnotation>(cluster.getMembers())){
+ if(mentionNegated != isNegated(member)){
+ matchNeg = false;
+ }
+ if(mentionUnc != isUncertain(member)){
+ matchUnc = false;
+ }
+ if(isTimex(member)){
+ clusterTimex = true;
+ }
+ }
+
+ features.add(new Feature("MC_AGREE_NEG", matchNeg));
+ features.add(new Feature("MC_AGREE_UNC", matchUnc));
+
+ features.add(new Feature("MC_AGREE_TIMEX", clusterTimex == mentionTimex));
+
+ /// check attributes like location/degree/negation/uncertainty
+ /*
+ Set<String> mentionSites = new HashSet<>();
+
+
+ if(mentionHead != null){
+ for(IdentifiedAnnotation annot : JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, mentionHead)){
+ LocationOfTextRelation rel = getLocation(annot);
+ if(rel != null){
+ AnatomicalSiteMention site = (AnatomicalSiteMention)rel.getArg2().getArgument();
+ for(UmlsConcept concept : JCasUtil.select(site.getOntologyConceptArr(), UmlsConcept.class)){
+ mentionSites.add(concept.getCui());
+ }
+ }
+ }
+ }
+
+ if(mentionSites.size() > 0){
+ Set<String> memberSites = new HashSet<>();
+ for(Markable member : JCasUtil.select(cluster.getMembers(), Markable.class)){
+ if(mention.getBegin() <= member.getBegin()) break;
+ ConllDependencyNode memberHead = DependencyUtility.getNominalHeadNode(jCas, member);
+ if(memberHead == null) continue;
+
+ for(IdentifiedAnnotation annot : JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, memberHead)){
+ LocationOfTextRelation rel = getLocation(annot);
+ if(rel != null){
+ boolean conflict = true;
+ AnatomicalSiteMention site = (AnatomicalSiteMention)rel.getArg2().getArgument();
+ for(UmlsConcept concept : JCasUtil.select(site.getOntologyConceptArr(), UmlsConcept.class)){
+ memberSites.add(concept.getCui());
+ if(mentionSites.contains(concept.getCui())){
+ conflict = false;
+ }
+ }
+ if(conflict){
+ features.add(new Feature("MC_LOCATION_CONFLICT", true));
+ }
+ }
+ }
+ }
+ }
+ */
+ return features;
+ }
+
+ @Override
+ public List<Feature> extract(JCas view, Markable mention)
+ throws CleartkExtractorException {
+ List<Feature> features = new ArrayList<>();
+
+ boolean mentionNegated = isNegated(mention);
+ features.add(new Feature("MC_MENTION_NEGATED", mentionNegated));
+
+ boolean mentionUncertain = isUncertain(mention);
+ features.add(new Feature("MC_MENTION_UNCERTAIN", mentionUncertain));
+
+ boolean mentionTimex = isTimex(mention);
+ features.add(new Feature("MC_MENTION_TIMEX", mentionTimex));
+
+ return features;
+ }
+
+ private boolean isTimex(Annotation a){
+ return JCasUtil.selectCovered(TimeMention.class, a).size() > 0;
+ }
+
+ private LocationOfTextRelation getLocation(IdentifiedAnnotation annot){
+ LocationOfTextRelation rel = null;
+ if(annot instanceof ProcedureMention){
+ rel = ((ProcedureMention)annot).getBodyLocation();
+ }else if(annot instanceof DiseaseDisorderMention){
+ rel = ((DiseaseDisorderMention)annot).getBodyLocation();
+ }else if(annot instanceof SignSymptomMention){
+ rel = ((SignSymptomMention)annot).getBodyLocation();
+ }
+ return rel;
+ }
+}
Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java Fri Nov 20 22:04:09 2015
@@ -17,9 +17,11 @@ import org.apache.ctakes.typesystem.type
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
public class MentionClusterDepHeadExtractor implements
- RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> {
+ RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
@Override
public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
@@ -40,16 +42,16 @@ public class MentionClusterDepHeadExtrac
}
// DependencyPath path = DependencyUtility.getPathToTop(jCas, memberHead);
}
- for(String headWord : memberHeads){
+// for(String headWord : memberHeads){
// feats.add(new Feature("MemberHead", headWord));
- }
+// }
// for(String path : memberPaths){
// feats.add(new Feature("MemberRel", path));
// }
if(mentionHead != null){
String headWord = mentionHead.getCoveredText().toLowerCase();
- feats.add(new Feature("MentionRel", mentionHead.getDeprel()));
+// feats.add(new Feature("MentionRel", mentionHead.getDeprel()));
// feats.add(new Feature("MentionHead", headWord));
if(memberHeads.contains(headWord) && !StringMatchingFeatureExtractor.isPronoun(mention)){
feats.add(new Feature("ClusterHeadMatchesMentionHead", true));
@@ -57,6 +59,18 @@ public class MentionClusterDepHeadExtrac
}
return feats;
+ }
+
+ @Override
+ public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException {
+ List<Feature> feats = new ArrayList<>();
+ ConllDependencyNode mentionHead = DependencyUtility.getNominalHeadNode(jCas, mention);
+
+ if(mentionHead != null){
+ feats.add(new Feature("MentionRel", mentionHead.getDeprel()));
+ }
+
+ return feats;
}
}
Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java Fri Nov 20 22:04:09 2015
@@ -70,8 +70,6 @@ public class MentionClusterMentionFeatur
feats.addAll(DependencyTreeFeaturesExtractor.extractForNode(view, focusAnnotation, "dep"));
- feats.add(new Feature("MC_MENTION_SALIENCE", focusAnnotation.getConfidence()));
-
return feats;
}
Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java Fri Nov 20 22:04:09 2015
@@ -11,9 +11,11 @@ import org.apache.ctakes.typesystem.type
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
public class MentionClusterSalienceFeaturesExtractor implements
- RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> {
+ RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
@Override
public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
@@ -52,4 +54,13 @@ public class MentionClusterSalienceFeatu
return feats;
}
+ @Override
+ public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException {
+ List<Feature> feats = new ArrayList<>();
+
+ feats.add(new Feature("MC_MENTION_SALIENCE", mention.getConfidence()));
+
+ return feats;
+ }
+
}
Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSectionFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSectionFeaturesExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSectionFeaturesExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSectionFeaturesExtractor.java Fri Nov 20 22:04:09 2015
@@ -16,9 +16,11 @@ import org.apache.uima.analysis_engine.A
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
public class MentionClusterSectionFeaturesExtractor implements
- RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> {
+ RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
@Override
public List<Feature> extract(JCas jcas, CollectionTextRelation cluster,
@@ -60,12 +62,42 @@ public class MentionClusterSectionFeatur
}
feats.add(new Feature("AnteInHeader", parsWithAnteHeader.size() > 0));
- feats.add(new Feature("AnaInHeader", anaInHeader));
if(anteInHeader && parsWithAnteHeader.contains(anaPar-1)){
feats.add(new Feature("AnteHeaderHeadsAna", true));
}
return feats;
}
+
+ @Override
+ public List<Feature> extract(JCas jcas, Markable mention) throws CleartkExtractorException {
+ List<Feature> feats = new ArrayList<>();
+
+ boolean anaInHeader = false;
+ int anaPar = -1;
+
+ // Find section headers -- paragraphs
+ List<Paragraph> pars = new ArrayList<>(JCasUtil.select(jcas, Paragraph.class));
+ for(int i = 0; i < pars.size(); i++){
+ Paragraph par = pars.get(i);
+ if(par.getBegin() > mention.getEnd()){
+ break;
+ }
+ // find the paragraph with the anaphor
+ if(mention.getBegin() >= par.getBegin() && mention.getEnd() <= par.getEnd()){
+ anaPar = i;
+ }
+ List<Sentence> coveredSents = JCasUtil.selectCovered(jcas, Sentence.class, par);
+ if(coveredSents != null && coveredSents.size() == 1){
+ if(anaPar == i){
+ anaInHeader = true;
+ break;
+ }
+ }
+ }
+ feats.add(new Feature("AnaInHeader", anaInHeader));
+
+ return feats;
+ }
}
Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSemTypeDepPrefsFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSemTypeDepPrefsFeatureExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSemTypeDepPrefsFeatureExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSemTypeDepPrefsFeatureExtractor.java Fri Nov 20 22:04:09 2015
@@ -52,6 +52,7 @@ public class MentionClusterSemTypeDepPre
if(semProbs == null) return feats;
for(Markable m : JCasUtil.select(cluster.getMembers(), Markable.class)){
+ // FIXME -- I think this is a bug -- shoud be >
if(mention.getBegin() < m.getEnd()){
// during training this might happen -- see a member of a cluster that
// is actually subsequent to the candidate mention
Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java?rev=1715429&r1=1715428&r2=1715429&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java Fri Nov 20 22:04:09 2015
@@ -24,9 +24,11 @@ import org.apache.uima.fit.util.JCasUtil
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
public class MentionClusterUMLSFeatureExtractor implements
- RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> {
+ RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
String docId = null;
Map<ConllDependencyNode,Collection<IdentifiedAnnotation>> coveringMap = null;
@@ -105,7 +107,7 @@ public class MentionClusterUMLSFeatureEx
for(IdentifiedAnnotation ent2 : mentionEnts){
HashSet<String> a2Tuis = new HashSet<>();
String a2SemType = ent2.getClass().getSimpleName();
- trueFeats.add("MentionSemType" + a2SemType);
+// trueFeats.add("MentionSemType" + a2SemType);
if(alias(ent1, ent2)){
trueFeats.add("UMLS_ALIAS");
break;
@@ -142,4 +144,44 @@ public class MentionClusterUMLSFeatureEx
return feats;
}
+ @Override
+ public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException {
+ List<Feature> feats = new ArrayList<>();
+ Set<String> trueFeats = new HashSet<>();
+
+ try{
+ if(docId == null || !getDocId(jCas).equals(docId)){
+ docId = getDocId(jCas);
+ coveringMap = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
+ }
+ }catch(AnalysisEngineProcessException e){
+ throw new CleartkExtractorException(e);
+ }
+
+ ConllDependencyNode head = DependencyUtility.getNominalHeadNode(jCas, mention);
+
+ List<IdentifiedAnnotation> rmList = new ArrayList<>();
+ // get the entities covering this markable:
+ List<IdentifiedAnnotation> mentionEnts = new ArrayList<>(coveringMap.get(head)); //JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, head1.getBegin(), head1.getEnd());'
+ for(IdentifiedAnnotation ann : mentionEnts){
+ if(!(ann instanceof EntityMention || ann instanceof EventMention)){
+ rmList.add(ann);
+ }
+ }
+ for(IdentifiedAnnotation toRm : rmList){
+ mentionEnts.remove(toRm);
+ }
+
+ for(IdentifiedAnnotation ent : mentionEnts){
+ String a2SemType = ent.getClass().getSimpleName();
+ trueFeats.add("MentionSemType" + a2SemType);
+ }
+
+ for(String feat : trueFeats){
+ feats.add(new Feature(feat, true));
+ }
+
+ return feats;
+ }
+
}