You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/01/07 20:31:12 UTC

svn commit: r1429977 - in /incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data: GoldAnnotationAnalysis.java GoldAnnotationAnalysisPipeline.java GoldAnnotationStats.java GoldAnnotationStatsCalculator.java

Author: dligach
Date: Mon Jan  7 19:31:12 2013
New Revision: 1429977

URL: http://svn.apache.org/viewvc?rev=1429977&view=rev
Log:
added token counts and relation counts

Added:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysisPipeline.java
      - copied, changed from r1429860, incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysis.java
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java
      - copied, changed from r1429860, incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStats.java
Removed:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysis.java
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStats.java

Copied: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysisPipeline.java (from r1429860, incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysis.java)
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysisPipeline.java?p2=incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysisPipeline.java&p1=incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysis.java&r1=1429860&r2=1429977&rev=1429977&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysis.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationAnalysisPipeline.java Mon Jan  7 19:31:12 2013
@@ -38,7 +38,7 @@ import org.uimafit.pipeline.SimplePipeli
  * @author dmitriy dligach
  *
  */
-public class GoldAnnotationAnalysis {
+public class GoldAnnotationAnalysisPipeline {
 
   public static class Options extends Options_ImplBase {
 
@@ -57,10 +57,10 @@ public class GoldAnnotationAnalysis {
 		List<File> trainFiles = Arrays.asList(options.inputDirectory.listFiles());
     CollectionReader collectionReader = getCollectionReader(trainFiles);
 		
-    AnalysisEngine relationExtractorConsumer = AnalysisEngineFactory.createPrimitive(
-    		GoldAnnotationStats.class);
+    AnalysisEngine goldAnnotationStatsCalculator = AnalysisEngineFactory.createPrimitive(
+    		GoldAnnotationStatsCalculator.class);
     		
-		SimplePipeline.runPipeline(collectionReader, relationExtractorConsumer);
+		SimplePipeline.runPipeline(collectionReader, goldAnnotationStatsCalculator);
 	}
 	
   private static CollectionReader getCollectionReader(List<File> items) throws Exception {

Copied: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java (from r1429860, incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStats.java)
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java?p2=incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java&p1=incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStats.java&r1=1429860&r2=1429977&rev=1429977&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStats.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java Mon Jan  7 19:31:12 2013
@@ -18,87 +18,74 @@
  */
 package org.apache.ctakes.relationextractor.data;
 
-import java.util.List;
-
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
 import org.uimafit.component.JCasAnnotator_ImplBase;
 import org.uimafit.util.JCasUtil;
 
-import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import com.google.common.collect.HashMultiset;
+import com.google.common.collect.Multiset;
 
 /**
- * This is a sample relation annotation consumer. 
- * Currently it displays the relation annotations.
- * 
+ * Count various stats such as token and relation counts 
+ * based on the gold standard data.
+ *  
  * @author dmitriy dligach
  *
  */
-public class GoldAnnotationStats extends JCasAnnotator_ImplBase {
+public class GoldAnnotationStatsCalculator extends JCasAnnotator_ImplBase {
 
-	// TODO: turn these into configuration parameters
-	public final boolean displayEntities = false;
-	public final boolean displayContext = false;
+	public static final String goldViewName = "GoldView";
+	public int tokenCount;
+	public Multiset<String> relationTypes;
 	
 	@Override
+  public void initialize(UimaContext context) throws ResourceInitializationException {
+	  
+	  tokenCount = 0;
+	  relationTypes = HashMultiset.create();
+	}
+  
+	@Override
+	public void collectionProcessComplete() throws AnalysisEngineProcessException {
+
+	  System.out.println();
+	  System.out.println("token count: " + tokenCount);
+	  System.out.println("location_of count: " + relationTypes.count("location_of"));
+	  System.out.println("degree_of count: " + relationTypes.count("degree_of"));
+  }
+  
+	@Override
   public void process(JCas jCas) throws AnalysisEngineProcessException {
 
-    JCas systemView;
+    JCas goldView;
     try {
-      systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+      goldView = jCas.getView(goldViewName);
     } catch (CASException e) {
       throw new AnalysisEngineProcessException(e);
     }	  
+
+    countTokens(jCas); // tokens exist in system view (not in gold)
+    countRelationTypes(goldView); // gold relations are in gold view
+  }
+	
+	private void countTokens(JCas jCas) {
     
-    if(displayEntities) {
-    	System.out.println();
-    	for(IdentifiedAnnotation identifiedAnnotation : JCasUtil.select(systemView, IdentifiedAnnotation.class)) {
-    		String text = identifiedAnnotation.getCoveredText();
-    		int type = identifiedAnnotation.getTypeID();
-    		System.out.format("%s/%d\n", text, type);
-    	}
-    	System.out.println();
+	  for(BaseToken baseToken : JCasUtil.select(jCas, BaseToken.class)) {
+	    tokenCount++;
     }
-
-    // print relations
-    System.out.println();
-    for(BinaryTextRelation binaryTextRelation : JCasUtil.select(systemView, BinaryTextRelation.class)) {
-    	    	
-    	String category = binaryTextRelation.getCategory();
-    	
-    	IdentifiedAnnotation entity1; // entity whose role is "Argument"
-    	IdentifiedAnnotation entity2; // entity whose role is "Related_to"
-    	
-    	if(binaryTextRelation.getArg1().getRole().equals("Argument")) {
-    		entity1 = (IdentifiedAnnotation) binaryTextRelation.getArg1().getArgument();
-    		entity2 = (IdentifiedAnnotation) binaryTextRelation.getArg2().getArgument();
-    	} else {
-    		entity1 = (IdentifiedAnnotation) binaryTextRelation.getArg2().getArgument();
-    		entity2 = (IdentifiedAnnotation) binaryTextRelation.getArg1().getArgument();
-    	}
-    	
-    	String arg1 = entity1.getCoveredText();
-    	String arg2 = entity2.getCoveredText();
-    	
-    	int type1 = entity1.getTypeID();
-    	int type2 = entity2.getTypeID();
-    	
-    	// print relation and its arguments: location_of(colon/6, colon cancer/2)
-    	System.out.format("%s(%s/%d, %s/%d)\n", category, arg1, type1, arg2, type2);
-
-    	if(displayContext) {
-    		List<Sentence> list = JCasUtil.selectCovering(jCas, Sentence.class, entity1.getBegin(), entity1.getEnd());
-    		
-    		// print the sentence containing this instance
-    		for(Sentence s : list) {
-    			System.out.println(s.getCoveredText());
-    		}
-    		System.out.println();
-    	}
+	}
+	
+	private void countRelationTypes(JCas jCas) {
+	  
+    for(BinaryTextRelation binaryTextRelation : JCasUtil.select(jCas, BinaryTextRelation.class)) {
+      String category = binaryTextRelation.getCategory();
+      relationTypes.add(category);
     }
-  }
+	}
 }