You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/01/07 20:31:12 UTC
svn commit: r1429977 - in
/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data:
GoldAnnotationAnalysis.java GoldAnnotationAnalysisPipeline.java
GoldAnnotationStats.java GoldAnnotationStatsCalculator.java
Author: dligach
Date: Mon Jan 7 19:31:12 2013
New Revision: 1429977
URL: http://svn.apache.org/viewvc?rev=1429977&view=rev
Log:
added token counts and relation counts
Added:
incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysisPipeline.java
- copied, changed from r1429860, incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysis.java
incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java
- copied, changed from r1429860, incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStats.java
Removed:
incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysis.java
incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStats.java
Copied: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysisPipeline.java (from r1429860, incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysis.java)
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysisPipeline.java?p2=incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysisPipeline.java&p1=incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysis.java&r1=1429860&r2=1429977&rev=1429977&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysis.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysisPipeline.java Mon Jan 7 19:31:12 2013
@@ -38,7 +38,7 @@ import org.uimafit.pipeline.SimplePipeli
* @author dmitriy dligach
*
*/
-public class GoldAnnotationAnalysis {
+public class GoldAnnotationAnalysisPipeline {
public static class Options extends Options_ImplBase {
@@ -57,10 +57,10 @@ public class GoldAnnotationAnalysis {
List<File> trainFiles = Arrays.asList(options.inputDirectory.listFiles());
CollectionReader collectionReader = getCollectionReader(trainFiles);
- AnalysisEngine relationExtractorConsumer = AnalysisEngineFactory.createPrimitive(
- GoldAnnotationStats.class);
+ AnalysisEngine goldAnnotationStatsCalculator = AnalysisEngineFactory.createPrimitive(
+ GoldAnnotationStatsCalculator.class);
- SimplePipeline.runPipeline(collectionReader, relationExtractorConsumer);
+ SimplePipeline.runPipeline(collectionReader, goldAnnotationStatsCalculator);
}
private static CollectionReader getCollectionReader(List<File> items) throws Exception {
Copied: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java (from r1429860, incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStats.java)
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java?p2=incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java&p1=incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStats.java&r1=1429860&r2=1429977&rev=1429977&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStats.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java Mon Jan 7 19:31:12 2013
@@ -18,87 +18,74 @@
*/
package org.apache.ctakes.relationextractor.data;
-import java.util.List;
-
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.util.JCasUtil;
-import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import com.google.common.collect.HashMultiset;
+import com.google.common.collect.Multiset;
/**
- * This is a sample relation annotation consumer.
- * Currently it displays the relation annotations.
- *
+ * Count various stats such as token and relation counts
+ * based on the gold standard data.
+ *
* @author dmitriy dligach
*
*/
-public class GoldAnnotationStats extends JCasAnnotator_ImplBase {
+public class GoldAnnotationStatsCalculator extends JCasAnnotator_ImplBase {
- // TODO: turn these into configuration parameters
- public final boolean displayEntities = false;
- public final boolean displayContext = false;
+ public static final String goldViewName = "GoldView";
+ public int tokenCount;
+ public Multiset<String> relationTypes;
@Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+
+ tokenCount = 0;
+ relationTypes = HashMultiset.create();
+ }
+
+ @Override
+ public void collectionProcessComplete() throws AnalysisEngineProcessException {
+
+ System.out.println();
+ System.out.println("token count: " + tokenCount);
+ System.out.println("location_of count: " + relationTypes.count("location_of"));
+ System.out.println("degree_of count: " + relationTypes.count("degree_of"));
+ }
+
+ @Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
- JCas systemView;
+ JCas goldView;
try {
- systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+ goldView = jCas.getView(goldViewName);
} catch (CASException e) {
throw new AnalysisEngineProcessException(e);
}
+
+ countTokens(jCas); // tokens exist in system view (not in gold)
+ countRelationTypes(goldView); // gold relations are in gold view
+ }
+
+ private void countTokens(JCas jCas) {
- if(displayEntities) {
- System.out.println();
- for(IdentifiedAnnotation identifiedAnnotation : JCasUtil.select(systemView, IdentifiedAnnotation.class)) {
- String text = identifiedAnnotation.getCoveredText();
- int type = identifiedAnnotation.getTypeID();
- System.out.format("%s/%d\n", text, type);
- }
- System.out.println();
+ for(BaseToken baseToken : JCasUtil.select(jCas, BaseToken.class)) {
+ tokenCount++;
}
-
- // print relations
- System.out.println();
- for(BinaryTextRelation binaryTextRelation : JCasUtil.select(systemView, BinaryTextRelation.class)) {
-
- String category = binaryTextRelation.getCategory();
-
- IdentifiedAnnotation entity1; // entity whose role is "Argument"
- IdentifiedAnnotation entity2; // entity whose role is "Related_to"
-
- if(binaryTextRelation.getArg1().getRole().equals("Argument")) {
- entity1 = (IdentifiedAnnotation) binaryTextRelation.getArg1().getArgument();
- entity2 = (IdentifiedAnnotation) binaryTextRelation.getArg2().getArgument();
- } else {
- entity1 = (IdentifiedAnnotation) binaryTextRelation.getArg2().getArgument();
- entity2 = (IdentifiedAnnotation) binaryTextRelation.getArg1().getArgument();
- }
-
- String arg1 = entity1.getCoveredText();
- String arg2 = entity2.getCoveredText();
-
- int type1 = entity1.getTypeID();
- int type2 = entity2.getTypeID();
-
- // print relation and its arguments: location_of(colon/6, colon cancer/2)
- System.out.format("%s(%s/%d, %s/%d)\n", category, arg1, type1, arg2, type2);
-
- if(displayContext) {
- List<Sentence> list = JCasUtil.selectCovering(jCas, Sentence.class, entity1.getBegin(), entity1.getEnd());
-
- // print the sentence containing this instance
- for(Sentence s : list) {
- System.out.println(s.getCoveredText());
- }
- System.out.println();
- }
+ }
+
+ private void countRelationTypes(JCas jCas) {
+
+ for(BinaryTextRelation binaryTextRelation : JCasUtil.select(jCas, BinaryTextRelation.class)) {
+ String category = binaryTextRelation.getCategory();
+ relationTypes.add(category);
}
- }
+ }
}