You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2015/11/30 23:06:27 UTC

svn commit: r1717336 - in /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference: ae/ ae/features/ ae/features/cluster/ eval/

Author: tmill
Date: Mon Nov 30 22:06:27 2015
New Revision: 1717336

URL: http://svn.apache.org/viewvc?rev=1717336&view=rev
Log:
Added other features to coref attribute extractors.

Added:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeVectorExtractor.java
Modified:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java?rev=1717336&r1=1717335&r2=1717336&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java Mon Nov 30 22:06:27 2015
@@ -15,6 +15,7 @@ import java.util.Set;
 import org.apache.ctakes.core.util.ListFactory;
 import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterAgreementFeaturesExtractor;
 import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterAttributeFeaturesExtractor;
+import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterAttributeVectorExtractor;
 import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterDepHeadExtractor;
 import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterDistSemExtractor;
 import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterMentionFeaturesExtractor;
@@ -118,7 +119,9 @@ public class MentionClusterCoreferenceAn
     extractors.add(new MentionClusterDepHeadExtractor());
     extractors.add(new MentionClusterStackFeaturesExtractor());
     extractors.add(new MentionClusterSalienceFeaturesExtractor());
-//    extractors.add(new MentionClusterAttributeFeaturesExtractor());
+    extractors.add(new MentionClusterAttributeFeaturesExtractor());
+    extractors.add(new MentionClusterAttributeVectorExtractor());
+    
 //    extractors.add(new MentionClusterDistanceFeaturesExtractor());
     
     try {
@@ -141,7 +144,7 @@ public class MentionClusterCoreferenceAn
     extractors.add(new MentionClusterSalienceFeaturesExtractor());
 
     extractors.add(new MentionClusterMentionFeaturesExtractor());
-//    extractors.add(new MentionClusterAttributeFeaturesExtractor());
+    extractors.add(new MentionClusterAttributeFeaturesExtractor());
 
     return extractors;
   }

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java?rev=1717336&r1=1717335&r2=1717336&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java Mon Nov 30 22:06:27 2015
@@ -152,4 +152,16 @@ public class TokenFeatureExtractor imple
 	public static boolean isUncertain(IdentifiedAnnotation mention){
 	  return mention.getUncertainty() == CONST.NE_UNCERTAINTY_PRESENT;
 	}
+	
+	public static boolean isGeneric(IdentifiedAnnotation mention){
+	  return mention.getGeneric() == CONST.NE_GENERIC_TRUE;
+	}
+	
+	public static boolean isPatient(IdentifiedAnnotation mention){
+	  return mention.getSubject() == CONST.ATTR_SUBJECT_PATIENT;
+	}
+	
+	public static boolean isHistory(IdentifiedAnnotation mention){
+	  return mention.getHistoryOf() == CONST.NE_HISTORY_OF_PRESENT;
+	}
 }

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java?rev=1717336&r1=1717335&r2=1717336&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeFeaturesExtractor.java Mon Nov 30 22:06:27 2015
@@ -33,11 +33,18 @@ public class MentionClusterAttributeFeat
     
     boolean mentionNegated = isNegated(mention);
     boolean mentionUnc = isUncertain(mention);
+    boolean mentionGen = isGeneric(mention);
+    boolean mentionSubj = isPatient(mention);
+    boolean mentionHist = isHistory(mention);
+    
     boolean mentionTimex = isTimex(mention);
 
     boolean matchNeg = true;
     boolean clusterTimex = false;  // if any cluster member is timex
     boolean matchUnc = true;
+    boolean matchGen = true;
+    boolean matchSubj = true;
+    boolean matchHist = true;
     
     for(Markable member : JCasUtil.select(cluster.getMembers(), Markable.class)){
       if(member.getBegin() > mention.getEnd()){
@@ -49,6 +56,15 @@ public class MentionClusterAttributeFeat
       if(mentionUnc != isUncertain(member)){
         matchUnc = false;
       }
+      if(mentionGen != isGeneric(member)){
+        matchGen = false;
+      }
+      if(mentionSubj != isPatient(member)){
+        matchSubj = false;
+      }
+      if(mentionHist != isHistory(member)){
+        matchHist = false;
+      }
       if(isTimex(member)){
         clusterTimex = true;
       }
@@ -56,6 +72,9 @@ public class MentionClusterAttributeFeat
     
     features.add(new Feature("MC_AGREE_NEG", matchNeg));
     features.add(new Feature("MC_AGREE_UNC", matchUnc));
+    features.add(new Feature("MC_AGREE_GEN", matchGen));
+    features.add(new Feature("MC_AGREE_SUBJ", matchSubj));
+    features.add(new Feature("MC_AGREE_HIST", matchHist));
     
     features.add(new Feature("MC_AGREE_TIMEX", clusterTimex == mentionTimex));
 
@@ -116,6 +135,15 @@ public class MentionClusterAttributeFeat
     boolean mentionUncertain = isUncertain(mention);
     features.add(new Feature("MC_MENTION_UNCERTAIN", mentionUncertain));
     
+    boolean mentionGen = isGeneric(mention);
+    features.add(new Feature("MC_MENTION_GENERIC", mentionGen));
+    
+    boolean mentionSubj = isPatient(mention);
+    features.add(new Feature("MC_MENTION_PATIENT", mentionSubj));
+    
+    boolean mentionHist = isHistory(mention);
+    features.add(new Feature("MC_MENTION_HISTORY", mentionHist));
+
     boolean mentionTimex = isTimex(mention);
     features.add(new Feature("MC_MENTION_TIMEX", mentionTimex));
 

Added: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeVectorExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeVectorExtractor.java?rev=1717336&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeVectorExtractor.java (added)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAttributeVectorExtractor.java Mon Nov 30 22:06:27 2015
@@ -0,0 +1,22 @@
+package org.apache.ctakes.coreference.ae.features.cluster;
+
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+
+public class MentionClusterAttributeVectorExtractor implements 
+  RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> {
+
+  @Override
+  public List<Feature> extract(JCas jCas, CollectionTextRelation arg1, IdentifiedAnnotation arg2)
+      throws AnalysisEngineProcessException {
+    // TODO Auto-generated method stub
+    return null;
+  }
+
+}

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java?rev=1717336&r1=1717335&r2=1717336&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java Mon Nov 30 22:06:27 2015
@@ -17,7 +17,10 @@ import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.ctakes.assertion.medfacts.cleartk.GenericCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.HistoryCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.SubjectCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.UncertaintyCleartkAnalysisEngine;
 import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
@@ -251,7 +254,10 @@ public class EvaluationOfEventCoreferenc
       AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
       aggregateBuilder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription());
       aggregateBuilder.add(UncertaintyCleartkAnalysisEngine.createAnnotatorDescription());
-      
+      aggregateBuilder.add(GenericCleartkAnalysisEngine.createAnnotatorDescription());
+      aggregateBuilder.add(HistoryCleartkAnalysisEngine.createAnnotatorDescription());
+      aggregateBuilder.add(SubjectCleartkAnalysisEngine.createAnnotatorDescription());
+
       aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ViewCreatorAnnotator.class, ViewCreatorAnnotator.PARAM_VIEW_NAME, "Baseline"));
       aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DocumentIDPrinter.class));
       aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ParagraphAnnotator.class));
@@ -331,6 +337,9 @@ public class EvaluationOfEventCoreferenc
     AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
     aggregateBuilder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription());
     aggregateBuilder.add(UncertaintyCleartkAnalysisEngine.createAnnotatorDescription());
+    aggregateBuilder.add(GenericCleartkAnalysisEngine.createAnnotatorDescription());
+    aggregateBuilder.add(HistoryCleartkAnalysisEngine.createAnnotatorDescription());
+    aggregateBuilder.add(SubjectCleartkAnalysisEngine.createAnnotatorDescription());
     aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(DocumentIDPrinter.class));
     aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ParagraphAnnotator.class));
     aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ParagraphVectorAnnotator.class));