You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/04/08 18:06:54 UTC

svn commit: r1465671 - /ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java

Author: dligach
Date: Mon Apr  8 16:06:54 2013
New Revision: 1465671

URL: http://svn.apache.org/r1465671
Log:
added fine grain stats (e.g. counts of as-ss entity pairs)

Modified:
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java?rev=1465671&r1=1465670&r2=1465671&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java Mon Apr  8 16:06:54 2013
@@ -18,14 +18,17 @@
  */
 package org.apache.ctakes.relationextractor.data;
 
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 
+import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator;
 import org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator;
 import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -53,6 +56,7 @@ public class GoldAnnotationStatsCalculat
 	public int entityMentionCount;
 	public int entityMentionPairCount;
 	public Multiset<String> relationTypes;
+	public Multiset<String> entityMentionPairTypes;
 	
 	@Override
   public void initialize(UimaContext context) throws ResourceInitializationException {
@@ -62,6 +66,7 @@ public class GoldAnnotationStatsCalculat
 	  entityMentionCount = 0;
 	  entityMentionPairCount = 0;
 	  relationTypes = HashMultiset.create();
+	  entityMentionPairTypes = HashMultiset.create();
 	}
   
 	@Override
@@ -74,6 +79,11 @@ public class GoldAnnotationStatsCalculat
 	  System.out.format("%-30s%d\n", "entity mention pair count", entityMentionPairCount);
 	  System.out.format("%-30s%d\n", "location_of count", relationTypes.count("location_of"));
 	  System.out.format("%-30s%d\n", "degree_of count", relationTypes.count("degree_of"));
+	  
+	  System.out.println();
+	  System.out.format("%-30s%d\n", "as-disorder", entityMentionPairTypes.count("as-dd"));
+	  System.out.format("%-30s%d\n", "as-ss", entityMentionPairTypes.count("as-ss"));
+	  System.out.format("%-30s%d\n", "as-procedure", entityMentionPairTypes.count("as-procedure"));
   }
   
 	@Override
@@ -89,7 +99,8 @@ public class GoldAnnotationStatsCalculat
     countTokens(jCas); // tokens exist in system view (not in gold)
     countSentences(jCas);
     countEntities(goldView);
-    countEntityMentionPairs(jCas, goldView); // TODO: need gold view?
+    countEntityMentionPairs(jCas, goldView); 
+    countEntityMentionPairTypes(jCas, goldView);
     countRelationTypes(goldView); 
   }
 	
@@ -112,7 +123,29 @@ public class GoldAnnotationStatsCalculat
       entityMentionPairCount += pairs.size();
     }
   }
-	
+
+  private void countEntityMentionPairTypes(JCas jCas, JCas goldView) {
+    
+    for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+      EntityMentionPairRelationExtractorAnnotator emPairAnnot = new EntityMentionPairRelationExtractorAnnotator();
+      DegreeOfRelationExtractorAnnotator degreeOfAnnot = new DegreeOfRelationExtractorAnnotator();
+      
+      List<IdentifiedAnnotationPair> pairs1 = emPairAnnot.getCandidateRelationArgumentPairs(goldView, sentence);
+      // List<IdentifiedAnnotationPair> pairs2 = degreeOfAnnot.getCandidateRelationArgumentPairs(goldView, sentence);
+      List<IdentifiedAnnotationPair> pairs = new ArrayList<IdentifiedAnnotationPair>();
+      pairs.addAll(pairs1);
+      // pairs.addAll(pairs2);
+      
+      for(IdentifiedAnnotationPair pair : pairs) {
+        IdentifiedAnnotation arg1 = pair.getArg1();
+        IdentifiedAnnotation arg2 = pair.getArg2();
+        String type1 = getEntityType(arg1.getTypeID());
+        String type2 = getEntityType(arg2.getTypeID());
+        entityMentionPairTypes.add(type1 + "-" + type2);
+      }
+    }
+  }
+
 	private void countRelationTypes(JCas jCas) {
 	  
     for(BinaryTextRelation binaryTextRelation : JCasUtil.select(jCas, BinaryTextRelation.class)) {
@@ -126,4 +159,30 @@ public class GoldAnnotationStatsCalculat
 	  Collection<EntityMention> entityMentions = JCasUtil.select(jCas, EntityMention.class);
 	  entityMentionCount += entityMentions.size();
 	}
+	
+	private String getEntityType(int typeId) {
+	  
+	  if(typeId == 0) {
+      return "modifier";
+    }
+	  if(typeId == 1) {
+	    return "drug";
+	  }
+	  if(typeId == 2) {
+	    return "dd";
+	  } 
+	  if(typeId == 3) {
+      return "ss";
+    }
+	  if(typeId == 4) {
+      return "none";
+    }
+	  if(typeId == 5) {
+      return "procedure";
+    }
+	  if(typeId == 6) {
+      return "as";
+    }
+	  return "n/a";
+	}
 }