You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2013/05/20 17:37:05 UTC

svn commit: r1484501 - /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java

Author: james-masanz
Date: Mon May 20 15:37:04 2013
New Revision: 1484501

URL: http://svn.apache.org/r1484501
Log:
Make use of MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.
Also, allow for different number of annotations in gold view and system-generated annotations.

Modified:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1484501&r1=1484500&r2=1484501&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Mon May 20 15:37:04 2013
@@ -25,6 +25,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -63,6 +64,7 @@ import org.apache.uima.analysis_engine.A
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.text.AnnotationIndex;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
@@ -315,6 +317,7 @@ protected static Options options = new O
       List<File> testFiles;
       if (options.evalOnly) {
     	  testFiles = Arrays.asList(options.evaluationOutputDirectory.listFiles());
+    	  logger.debug("evalOnly using files in directory " + evaluationOutputDirectory.getName() + " aka " + evaluationOutputDirectory.getCanonicalPath());
       } else {
     	  testFiles = Arrays.asList(options.testDirectory.listFiles());
       }
@@ -323,6 +326,10 @@ protected static Options options = new O
     	  CollectionReader trainCollectionReader = evaluation.getCollectionReader(trainFiles);
     	  evaluation.train(trainCollectionReader, modelsDir);
       }
+      if (testFiles==null || testFiles.size()==0) {
+    	  throw new RuntimeException("testFiles = " + testFiles + " testFiles.size() = " + (testFiles==null ? "null": testFiles.size())) ;
+      }
+      logger.debug("testFiles.size() = " + testFiles.size());
       CollectionReader testCollectionReader = evaluation.getCollectionReader(testFiles);
       Map<String, AnnotationStatistics> stats = evaluation.test(testCollectionReader, modelsDir);
       
@@ -595,6 +602,8 @@ public static void printScore(Map<String
 
     AggregateBuilder builder = new AggregateBuilder();
     
+    // directory is such as /cTAKES/workspaces/Apache-cTAKES-trunk/ctakes/ctakes-assertion/sharp_data/model/eval.model
+    
     AnalysisEngineDescription goldCopierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceIdentifiedAnnotationsSystemToGoldCopier.class);
     builder.add(goldCopierAnnotator);
     
@@ -619,6 +628,11 @@ public static void printScore(Map<String
 	            NoOpAnnotator.class,
 	            typeSystemDescription);
     	builder.add(noOp);
+    	
+        AnalysisEngineDescription mergeGold =
+    		AnalysisEngineFactory.createPrimitiveDescription(org.apache.ctakes.assertion.eval.MergeGoldViewFromOneCasIntoInitialViewOfAnotherCas.class, typeSystemDescription);
+    	builder.add(mergeGold);
+    	
     } else if (evaluationOutputDirectory!=null)  {
         AnalysisEngineDescription xwriter =
     		AnalysisEngineFactory.createPrimitiveDescription(
@@ -634,7 +648,8 @@ public static void printScore(Map<String
     }
     
     //SimplePipeline.runPipeline(collectionReader,  builder.createAggregateDescription());
-    AnalysisEngineDescription aggregateDescription = builder.createAggregateDescription();
+    //AnalysisEngineDescription aggregateDescription = builder.createAggregateDescription();
+    
     AnalysisEngine aggregate = builder.createAggregate();
     
     AnnotationStatistics polarityStats = new AnnotationStatistics();
@@ -678,13 +693,17 @@ public static void printScore(Map<String
 
     // run on existing output that has both system (or instance gathering) and gold
     for (JCas jCas : new JCasIterable(collectionReader, aggregate)) {
+    	
+    	printViewNames("Views found by JCasIterable:", jCas);
+    	
       JCas goldView;
       try {
         goldView = jCas.getView(GOLD_VIEW_NAME);
       } catch (CASException e) {
+    	logger.info("jCas.getViewName() = " + jCas.getViewName());
         throw new AnalysisEngineProcessException(e);
       }
-      
+
       String documentId = DocumentIDAnnotationUtil.getDocumentID(jCas);
       System.out.format("document id: %s%n", documentId);
       
@@ -693,7 +712,17 @@ public static void printScore(Map<String
       goldEntitiesAndEvents.addAll(goldEntities);
       Collection<EventMention> goldEvents = JCasUtil.select(goldView, EventMention.class);
       goldEntitiesAndEvents.addAll(goldEvents);
-//      System.out.format("gold entities: %d%ngold events: %d%n%n", goldEntities.size(), goldEvents.size());
+      // System.out.format("gold entities: %d%ngold events: %d%n%n", goldEntities.size(), goldEvents.size());
+      
+      if (goldEntitiesAndEvents.size()==0) { 
+    	  // gold annotations might have been read in as just IdentifiedAnnotation annotations
+    	  // since no EventMentio or EntityMention annotations were found, ok to just try IdentifiedAnnotation
+    	  // without concern for using some twice.
+          Collection<IdentifiedAnnotation> identifiedAnnotations = JCasUtil.select(goldView, IdentifiedAnnotation.class);
+          goldEntitiesAndEvents.addAll(identifiedAnnotations);
+    	  
+    	  
+      }
       
       Collection<IdentifiedAnnotation> systemEntitiesAndEvents = new ArrayList<IdentifiedAnnotation>();
       Collection<EntityMention> systemEntities = JCasUtil.select(jCas, EntityMention.class);
@@ -767,7 +796,77 @@ public static void printScore(Map<String
     return map;
   }
 
-  private static void printErrors(JCas jCas,
+  private static boolean DEBUG = false;
+  private static void printViewNames(String message, JCas jcas) {
+	
+  	Iterator<JCas> viewIter;
+	try {
+		viewIter = jcas.getViewIterator();
+	} catch (CASException e) {
+		e.printStackTrace();
+		return;
+	}
+  	while (viewIter.hasNext()) {
+  		JCas view = viewIter.next();
+  		String viewName = view.getViewName();
+  		logger.debug(message + " View name " + viewName);
+  		int numIndexedAnnotations = view.getAnnotationIndex().size();
+  		logger.debug(message + "  has " + numIndexedAnnotations + " indexed annotations");
+  		if (viewName.toLowerCase().contains("gold")) {
+  	  	    if (DEBUG) printAnnotations(IdentifiedAnnotation.type, view);
+  		} else {
+  			if (DEBUG) printAnnotations(EventMention.type, view);
+  			if (DEBUG) printAnnotations(EntityMention.type, view);
+  		}
+  	}
+
+  }
+
+private static void printAnnotations(int uimaAnnotationType, JCas view) {
+	
+	AnnotationIndex<Annotation> index = view.getAnnotationIndex(uimaAnnotationType);
+	Iterator<Annotation> iter = index.iterator();
+	output("Printing annotations for view " + view.getViewName());
+	while (iter.hasNext()) {
+		Annotation a = iter.next();
+		printAnnotation(a);
+	}
+	
+	//// Temp debug code
+	//if (view.getViewName().equals("GoldView")) {
+	//	AnnotationIndex<Annotation> indexOfAll = view.getAnnotationIndex();
+	//	Iterator<Annotation> iterOverAll = indexOfAll.iterator();
+	//	output("Printing ALL annotations for view " + view.getViewName());
+	//	while (iterOverAll.hasNext()) {
+	//		Annotation a = iterOverAll.next();
+	//		printAnnotation(a);
+	//	}
+	//	
+	//}
+	
+}
+
+private static void printAnnotation(Annotation a) {
+	
+	String s = String.format(" (%d, %d) ", a.getBegin(), a.getEnd());
+	if (a instanceof IdentifiedAnnotation) {
+		s = s + ((IdentifiedAnnotation) a).getTypeID() + "=typeID, ";
+	}
+	s = s + "|" + a.getCoveredText() + "|";
+	s = s + a.getClass().getCanonicalName();
+	output(s);
+	
+}
+
+
+private static void output(Object o) {
+	if (o==null) {
+		System.out.println(o);
+	} else {
+		System.out.println(o.toString());
+	}
+}
+private static void printErrors(JCas jCas,
 		  Collection<IdentifiedAnnotation> goldEntitiesAndEvents,
 		  Collection<IdentifiedAnnotation> systemEntitiesAndEvents, String classifierType, Object trueCategory, Class<? extends Object> categoryClass) throws ResourceProcessException {
 	  Map<HashableAnnotation, IdentifiedAnnotation> goldMap = Maps.newHashMap();
@@ -784,26 +883,44 @@ public static void printScore(Map<String
 	  for (HashableAnnotation key : sorted) {
 		  IdentifiedAnnotation goldAnnotation = goldMap.get(key);
 		  IdentifiedAnnotation systemAnnotation = systemMap.get(key);
-		  Feature feature = goldAnnotation.getType().getFeatureByBaseName(classifierType);
-		  Object goldLabel = getFeatureValue(feature, categoryClass, goldAnnotation);
-//		  Integer goldLabel = goldAnnotation.getIntValue(feature);
-		  feature = systemAnnotation.getType().getFeatureByBaseName(classifierType);
-		  Object systemLabel = getFeatureValue(feature, categoryClass, systemAnnotation);
-//		  Integer systemLabel = systemAnnotation.getIntValue(feature);
-		  if(!goldLabel.equals(systemLabel)){
-			  if(trueCategory == null){
-				  // used for multi-class case:
-				  System.out.println("Incorrectly labeled as " + systemLabel + " when the example was " + goldLabel + ": " + formatError(jCas, goldAnnotation));
-			  }else if(systemLabel.equals(trueCategory)){
-				  System.out.println(classifierType+" FP: " + formatError(jCas, systemAnnotation));
-			  }else{
-				  System.out.println(classifierType+" FN: " + formatError(jCas, goldAnnotation));
-			  }
-		  }else{
-			  if(systemLabel.equals(trueCategory)){
-				  System.out.println(classifierType+" TP: " + formatError(jCas, systemAnnotation));
+		  Object goldLabel=null;
+		  Object systemLabel=null;
+		  if (goldAnnotation == null) {
+			  logger.debug(key + " not found in gold annotations ");
+		  } else {
+			  Feature feature = goldAnnotation.getType().getFeatureByBaseName(classifierType);
+			  goldLabel = getFeatureValue(feature, categoryClass, goldAnnotation);
+			  //  Integer goldLabel = goldAnnotation.getIntValue(feature);
+		  }
+		  
+		  if (systemAnnotation == null) {
+			  logger.info(key + " not found in system annotations ");
+		  } else {
+			  Feature feature = systemAnnotation.getType().getFeatureByBaseName(classifierType);
+			  systemLabel = getFeatureValue(feature, categoryClass, systemAnnotation);
+			  //  Integer systemLabel = systemAnnotation.getIntValue(feature);
+		  }
+		  
+		  
+		  if (goldLabel==null) {
+			  // skip counting the attribute value since we have no gold label to compare to
+			  logger.debug("Skipping annotation with label " + systemLabel + " because gold label is null");
+		  } else  {
+			  if(!goldLabel.equals(systemLabel)){
+				  if(trueCategory == null){
+					  // used for multi-class case:
+					  System.out.println("Incorrectly labeled as " + systemLabel + " when the example was " + goldLabel + ": " + formatError(jCas, goldAnnotation));
+				  }else if(systemLabel.equals(trueCategory)){
+					  System.out.println(classifierType+" FP: " + formatError(jCas, systemAnnotation));
+				  }else{
+					  System.out.println(classifierType+" FN: " + formatError(jCas, goldAnnotation));
+				  }
 			  }else{
-				  System.out.println(classifierType+" TN: " + formatError(jCas, systemAnnotation));
+				  if(systemLabel.equals(trueCategory)){
+					  System.out.println(classifierType+" TP: " + formatError(jCas, systemAnnotation));
+				  }else{
+					  System.out.println(classifierType+" TN: " + formatError(jCas, systemAnnotation));
+				  }
 			  }
 		  }
 	  }