You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/04/08 18:06:54 UTC
svn commit: r1465671 -
/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java
Author: dligach
Date: Mon Apr 8 16:06:54 2013
New Revision: 1465671
URL: http://svn.apache.org/r1465671
Log:
added fine grain stats (e.g. counts of as-ss entity pairs)
Modified:
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java
Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java?rev=1465671&r1=1465670&r2=1465671&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java Mon Apr 8 16:06:54 2013
@@ -18,14 +18,17 @@
*/
package org.apache.ctakes.relationextractor.data;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
+import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator;
import org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator;
import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -53,6 +56,7 @@ public class GoldAnnotationStatsCalculat
public int entityMentionCount;
public int entityMentionPairCount;
public Multiset<String> relationTypes;
+ public Multiset<String> entityMentionPairTypes;
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
@@ -62,6 +66,7 @@ public class GoldAnnotationStatsCalculat
entityMentionCount = 0;
entityMentionPairCount = 0;
relationTypes = HashMultiset.create();
+ entityMentionPairTypes = HashMultiset.create();
}
@Override
@@ -74,6 +79,11 @@ public class GoldAnnotationStatsCalculat
System.out.format("%-30s%d\n", "entity mention pair count", entityMentionPairCount);
System.out.format("%-30s%d\n", "location_of count", relationTypes.count("location_of"));
System.out.format("%-30s%d\n", "degree_of count", relationTypes.count("degree_of"));
+
+ System.out.println();
+ System.out.format("%-30s%d\n", "as-disorder", entityMentionPairTypes.count("as-dd"));
+ System.out.format("%-30s%d\n", "as-ss", entityMentionPairTypes.count("as-ss"));
+ System.out.format("%-30s%d\n", "as-procedure", entityMentionPairTypes.count("as-procedure"));
}
@Override
@@ -89,7 +99,8 @@ public class GoldAnnotationStatsCalculat
countTokens(jCas); // tokens exist in system view (not in gold)
countSentences(jCas);
countEntities(goldView);
- countEntityMentionPairs(jCas, goldView); // TODO: need gold view?
+ countEntityMentionPairs(jCas, goldView);
+ countEntityMentionPairTypes(jCas, goldView);
countRelationTypes(goldView);
}
@@ -112,7 +123,29 @@ public class GoldAnnotationStatsCalculat
entityMentionPairCount += pairs.size();
}
}
-
+
+ private void countEntityMentionPairTypes(JCas jCas, JCas goldView) {
+
+ for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+ EntityMentionPairRelationExtractorAnnotator emPairAnnot = new EntityMentionPairRelationExtractorAnnotator();
+ DegreeOfRelationExtractorAnnotator degreeOfAnnot = new DegreeOfRelationExtractorAnnotator();
+
+ List<IdentifiedAnnotationPair> pairs1 = emPairAnnot.getCandidateRelationArgumentPairs(goldView, sentence);
+ // List<IdentifiedAnnotationPair> pairs2 = degreeOfAnnot.getCandidateRelationArgumentPairs(goldView, sentence);
+ List<IdentifiedAnnotationPair> pairs = new ArrayList<IdentifiedAnnotationPair>();
+ pairs.addAll(pairs1);
+ // pairs.addAll(pairs2);
+
+ for(IdentifiedAnnotationPair pair : pairs) {
+ IdentifiedAnnotation arg1 = pair.getArg1();
+ IdentifiedAnnotation arg2 = pair.getArg2();
+ String type1 = getEntityType(arg1.getTypeID());
+ String type2 = getEntityType(arg2.getTypeID());
+ entityMentionPairTypes.add(type1 + "-" + type2);
+ }
+ }
+ }
+
private void countRelationTypes(JCas jCas) {
for(BinaryTextRelation binaryTextRelation : JCasUtil.select(jCas, BinaryTextRelation.class)) {
@@ -126,4 +159,30 @@ public class GoldAnnotationStatsCalculat
Collection<EntityMention> entityMentions = JCasUtil.select(jCas, EntityMention.class);
entityMentionCount += entityMentions.size();
}
+
+ private String getEntityType(int typeId) {
+
+ if(typeId == 0) {
+ return "modifier";
+ }
+ if(typeId == 1) {
+ return "drug";
+ }
+ if(typeId == 2) {
+ return "dd";
+ }
+ if(typeId == 3) {
+ return "ss";
+ }
+ if(typeId == 4) {
+ return "none";
+ }
+ if(typeId == 5) {
+ return "procedure";
+ }
+ if(typeId == 6) {
+ return "as";
+ }
+ return "n/a";
+ }
}