You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2013/01/29 19:56:22 UTC

svn commit: r1440057 - in /incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor: ae/RelationExtractorAnnotator.java eval/RelationExtractorEvaluation.java

Author: stevenbethard
Date: Tue Jan 29 18:56:22 2013
New Revision: 1440057

URL: http://svn.apache.org/viewvc?rev=1440057&view=rev
Log:
Fixes view handling in RelationExtractorEvaluation so that RelationExtractorAnnotator no longer has to be view-aware

Modified:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java?rev=1440057&r1=1440056&r2=1440057&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java Tue Jan 29 18:56:22 2013
@@ -25,19 +25,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Random;
 
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CASException;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.cleartk.classifier.CleartkAnnotator;
-import org.cleartk.classifier.CleartkProcessingException;
-import org.cleartk.classifier.Feature;
-import org.cleartk.classifier.Instance;
-import org.uimafit.descriptor.ConfigurationParameter;
-import org.uimafit.util.JCasUtil;
-
 import org.apache.ctakes.relationextractor.ae.features.DependencyPathFeaturesExtractor;
 import org.apache.ctakes.relationextractor.ae.features.DependencyTreeFeaturesExtractor;
 import org.apache.ctakes.relationextractor.ae.features.NamedEntityFeaturesExtractor;
@@ -47,8 +34,17 @@ import org.apache.ctakes.relationextract
 import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
 import org.apache.ctakes.typesystem.type.relation.RelationArgument;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.CleartkProcessingException;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.Instance;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.util.JCasUtil;
 
 import com.google.common.collect.Lists;
 
@@ -56,14 +52,6 @@ public abstract class RelationExtractorA
 
   public static final String NO_RELATION_CATEGORY = "-NONE-";
 
-  public static final String PARAM_GOLD_VIEW_NAME = "GoldViewName";
-
-  @ConfigurationParameter(
-      name = PARAM_GOLD_VIEW_NAME,
-      mandatory = false,
-      description = "view containing the manual relation annotations; needed for training")
-  protected String goldViewName;
-
   public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE = "ProbabilityOfKeepingANegativeExample";
 
   @ConfigurationParameter(
@@ -92,14 +80,6 @@ public abstract class RelationExtractorA
         new DependencyPathFeaturesExtractor()
         );
   }
-
-  @Override
-  public void initialize(UimaContext context) throws ResourceInitializationException {
-    super.initialize(context);
-    if (this.isTraining() && this.goldViewName == null) {
-      throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during training");
-    }
-  }
  
   /**
    * Selects the relevant mentions/annotations within a sentence for relation identification/extraction.
@@ -111,17 +91,6 @@ public abstract class RelationExtractorA
    */
   @Override
   public void process(JCas jCas) throws AnalysisEngineProcessException {
-    // during training, pull entity and relation annotations from the manual annotation view
-  	JCas identifiedAnnotationView, relationView;
-    if (this.isTraining()) {
-      try {
-        identifiedAnnotationView = relationView = jCas.getView(this.goldViewName);
-      } catch (CASException e) {
-        throw new AnalysisEngineProcessException(e);
-      }
-    } else {
-      identifiedAnnotationView = relationView = jCas;
-    }
 
     // lookup from pair of annotations to binary text relation
     // note: assumes that there will be at most one relation per pair
@@ -129,7 +98,7 @@ public abstract class RelationExtractorA
     relationLookup = new HashMap<List<Annotation>, BinaryTextRelation>();
     if (this.isTraining()) {
       relationLookup = new HashMap<List<Annotation>, BinaryTextRelation>();
-      for (BinaryTextRelation relation : JCasUtil.select(relationView, BinaryTextRelation.class)) {
+      for (BinaryTextRelation relation : JCasUtil.select(jCas, BinaryTextRelation.class)) {
         Annotation arg1 = relation.getArg1().getArgument();
         Annotation arg2 = relation.getArg2().getArgument();
         // The key is a list of args so we can do bi-directional lookup
@@ -141,7 +110,7 @@ public abstract class RelationExtractorA
     for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
 
     	// collect all relevant relation arguments from the sentence
-    	List<IdentifiedAnnotationPair> candidatePairs = this.getCandidateRelationArgumentPairs(identifiedAnnotationView, sentence);
+    	List<IdentifiedAnnotationPair> candidatePairs = this.getCandidateRelationArgumentPairs(jCas, sentence);
 
     	// walk through the pairs of annotations
     	for (IdentifiedAnnotationPair pair : candidatePairs) {
@@ -186,15 +155,15 @@ public abstract class RelationExtractorA
     				}
 
     				// add the relation to the CAS
-    				RelationArgument relArg1 = new RelationArgument(relationView);
+    				RelationArgument relArg1 = new RelationArgument(jCas);
     				relArg1.setArgument(arg1);
     				relArg1.setRole("Argument");
     				relArg1.addToIndexes();
-    				RelationArgument relArg2 = new RelationArgument(relationView);
+    				RelationArgument relArg2 = new RelationArgument(jCas);
     				relArg2.setArgument(arg2);
     				relArg2.setRole("Related_to");
     				relArg2.addToIndexes();
-    				BinaryTextRelation relation = new BinaryTextRelation(relationView);
+    				BinaryTextRelation relation = new BinaryTextRelation(jCas);
     				relation.setArg1(relArg1);
     				relation.setArg2(relArg2);
     				relation.setCategory(predictedCategory);

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java?rev=1440057&r1=1440056&r2=1440057&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Tue Jan 29 18:56:22 2013
@@ -385,16 +385,14 @@ public class RelationExtractorEvaluation
         RemoveOtherRelations.PARAM_RELATION_CATEGORY,
         this.relationCategory),
         CAS.NAME_DEFAULT_SOFA, GOLD_VIEW_NAME);
-    // replace cTAKES entity mentions and modifiers in the system view with the gold annotations
-    builder.add(AnalysisEngineFactory.createPrimitiveDescription(ReplaceCTakesEntityMentionsAndModifiersWithGold.class));
+    // remove cTAKES entity mentions and modifiers in the system view and copy in the gold relations
+    builder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveCTakesMentionsAndCopyGoldRelations.class));
     // add the relation extractor, configured for training mode
     AnalysisEngineDescription classifierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
         this.classifierAnnotatorClass,
         this.additionalParameters);
     ConfigurationParameterFactory.addConfigurationParameters(
         classifierAnnotator,
-        RelationExtractorAnnotator.PARAM_GOLD_VIEW_NAME,
-        RelationExtractorEvaluation.GOLD_VIEW_NAME,
         DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
         this.dataWriterClass,
         DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
@@ -430,7 +428,7 @@ public class RelationExtractorEvaluation
       builder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveSmallerEntityMentions.class));
     } else {
       // replace cTAKES entity mentions and modifiers in the system view with the gold annotations
-      builder.add(AnalysisEngineFactory.createPrimitiveDescription(ReplaceCTakesEntityMentionsAndModifiersWithGold.class));
+      builder.add(AnalysisEngineFactory.createPrimitiveDescription(ReplaceCTakesMentionsWithGoldMentions.class));
     }
     // add the relation extractor, configured for classification mode
     AnalysisEngineDescription classifierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
@@ -715,11 +713,10 @@ public class RelationExtractorEvaluation
   }
 
   /**
-   * Annotator that removes cTAKES EntityMentions and Modifiers from the system view, and copies
-   * over the manually annotated EntityMentions and Modifiers from the gold view.
-   * 
+   * Annotator that removes cTAKES mentions in the system view and copies relations from the gold
+   * view to the system view
    */
-  public static class ReplaceCTakesEntityMentionsAndModifiersWithGold extends
+  public static class RemoveCTakesMentionsAndCopyGoldRelations extends
       JCasAnnotator_ImplBase {
 
     @Override
@@ -731,7 +728,7 @@ public class RelationExtractorEvaluation
       } catch (CASException e) {
         throw new AnalysisEngineProcessException(e);
       }
-
+      
       // remove cTAKES EntityMentions and Modifiers from system view
       List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
       cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
@@ -745,16 +742,31 @@ public class RelationExtractorEvaluation
       goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
       goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
       CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
+      Feature sofaFeature = jCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
       for (IdentifiedAnnotation goldMention : goldMentions) {
         Annotation copy = (Annotation) copier.copyFs(goldMention);
-        Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
         copy.setFeatureValue(sofaFeature, systemView.getSofa());
         copy.addToIndexes();
       }
+
+      // copy gold relations to the system view
+      for (BinaryTextRelation goldRelation : JCasUtil.select(goldView, BinaryTextRelation.class)) {
+        BinaryTextRelation relation = (BinaryTextRelation) copier.copyFs(goldRelation);
+        relation.addToIndexes(systemView);
+        for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
+          relArg.addToIndexes(systemView);
+          // relArg.getArgument() should have been added to indexes with mentions above
+        }
+      }
     }
   }
 
-  public static class ReplaceGoldEntityMentionsAndModifiersWithCTakes extends
+  /**
+   * Annotator that removes cTAKES EntityMentions and Modifiers from the system view, and copies
+   * over the manually annotated EntityMentions and Modifiers from the gold view.
+   * 
+   */
+  public static class ReplaceCTakesMentionsWithGoldMentions extends
       JCasAnnotator_ImplBase {
 
     @Override
@@ -767,73 +779,28 @@ public class RelationExtractorEvaluation
         throw new AnalysisEngineProcessException(e);
       }
 
-      // remove manual EntityMentions and Modifiers from gold view
-      List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
-      goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
-      goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
-      for (IdentifiedAnnotation goldMention : goldMentions) {
-        goldMention.removeFromIndexes();
-      }
-
-      // copy cTAKES EntityMentions and Modifiers to gold view
+      // remove cTAKES EntityMentions and Modifiers from system view
       List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
       cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
       cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
-      CasCopier copier = new CasCopier(systemView.getCas(), goldView.getCas());
       for (IdentifiedAnnotation cTakesMention : cTakesMentions) {
-        Annotation copy = (Annotation) copier.copyFs(cTakesMention);
-        Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
-        copy.setFeatureValue(sofaFeature, goldView.getSofa());
-        copy.addToIndexes();
+        cTakesMention.removeFromIndexes();
       }
 
-      // replace gold EntityMentions and Modifiers in relations with cTAKES ones
-      List<BinaryTextRelation> relations = new ArrayList<BinaryTextRelation>();
-      relations.addAll(JCasUtil.select(goldView, BinaryTextRelation.class));
-      for (BinaryTextRelation relation : relations) {
-
-        // attempt to replace the gold RelationArguments with system ones
-        for (RelationArgument relArg : Arrays.asList(relation.getArg1(), relation.getArg2())) {
-          IdentifiedAnnotation goldArg = (IdentifiedAnnotation) relArg.getArgument();
-          Class<? extends IdentifiedAnnotation> argClass = goldArg.getClass();
-
-          // find all annotations covered by the gold argument and of the same class (these should
-          // be the ones copied over from the cTAKES output earlier)
-          List<? extends IdentifiedAnnotation> systemArgs = JCasUtil.selectCovered(
-              goldView,
-              argClass,
-              goldArg);
-
-          // find the largest covered annotation that has the same type
-          IdentifiedAnnotation bestFitArg = null;
-          int maxSize = 0;
-          for (IdentifiedAnnotation systemArg : systemArgs) {
-            int size = systemArg.getEnd() - systemArg.getBegin();
-            if (size >= maxSize && goldArg.getTypeID() == systemArg.getTypeID()) {
-              maxSize = size;
-              bestFitArg = systemArg;
-            }
-          }
-          if (bestFitArg != null) {
-            relArg.setArgument(bestFitArg);
-          }
-
-          // log a message if we didn't find a perfect match
-          if (maxSize != goldArg.getEnd() - goldArg.getBegin()) {
-            List<String> choices = new ArrayList<String>();
-            for (IdentifiedAnnotation systemArg : systemArgs) {
-              choices.add(format(systemArg));
-            }
-            String actionFormat = bestFitArg == null ? "dropping" : "using %s instead of";
-            String action = String.format(actionFormat, format(bestFitArg));
-            String message = String.format("%s %s; choices: %s", action, format(goldArg), choices);
-            this.getContext().getLogger().log(Level.WARNING, message);
-          }
-        }
+      // copy gold EntityMentions and Modifiers to the system view
+      List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
+      goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
+      goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
+      CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
+      for (IdentifiedAnnotation goldMention : goldMentions) {
+        Annotation copy = (Annotation) copier.copyFs(goldMention);
+        Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
+        copy.setFeatureValue(sofaFeature, systemView.getSofa());
+        copy.addToIndexes();
       }
     }
   }
-  
+
   static String format(IdentifiedAnnotation a) {
     return a == null ? null : String.format("\"%s\"(type=%d)", a.getCoveredText(), a.getTypeID());
   }