You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2013/01/09 06:17:51 UTC

svn commit: r1430684 - in /incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor: eval/RelationExtractorEvaluation.java pipelines/RelationExtractorTrain.java

Author: stevenbethard
Date: Wed Jan  9 05:17:50 2013
New Revision: 1430684

URL: http://svn.apache.org/viewvc?rev=1430684&view=rev
Log:
Adds options for evaluating relation extraction using cTAKES mentions and various ways of mapping them to gold mentions

Modified:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java?rev=1430684&r1=1430683&r2=1430684&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Wed Jan  9 05:17:50 2013
@@ -118,6 +118,16 @@ public class RelationExtractorEvaluation
     public boolean testOnCTakes = false;
 
     @Option(
+        name = "--allow-smaller-system-arguments",
+        usage = "for evaluation, allow system relation arguments to match gold relation arguments that enclose them")
+    public boolean allowSmallerSystemArguments = false;
+
+    @Option(
+        name = "--ignore-impossible-gold-relations",
+        usage = "for evaluation, ignore gold relations that would be impossible to find because there are no corresponding system mentions")
+    public boolean ignoreImpossibleGoldRelations = false;
+
+    @Option(
         name = "--print-errors",
         usage = "print relations that were incorrectly predicted")
     public boolean printErrors = false;
@@ -225,6 +235,8 @@ public class RelationExtractorEvaluation
             additionalParameters,
             trainingArguments,
             options.testOnCTakes,
+            options.allowSmallerSystemArguments,
+            options.ignoreImpossibleGoldRelations,
             options.printErrors);
 
         if (options.devDirectory != null) {
@@ -301,6 +313,11 @@ public class RelationExtractorEvaluation
    *          Arguments that should be passed to the classifier's train method
    * @param testOnCTakes
    *          During testing, use annotations from cTAKES, not from the gold standard
+   * @param allowSmallerSystemArguments
+   *          During testing, allow system annotations to match gold annotations that enclose them
+   * @param ignoreImpossibleGoldRelations
+   *          During testing, ignore gold relations that would be impossible to find because there
+   *          are no corresponding system mentions
    */
   public RelationExtractorEvaluation(
       File baseDirectory,
@@ -310,6 +327,8 @@ public class RelationExtractorEvaluation
       Object[] additionalParameters,
       String[] trainingArguments,
       boolean testOnCTakes,
+      boolean allowSmallerSystemArguments,
+      boolean ignoreImpossibleGoldRelations,
       boolean printErrors) {
     super(baseDirectory);
     this.relationCategory = relationCategory;
@@ -318,6 +337,8 @@ public class RelationExtractorEvaluation
     this.additionalParameters = additionalParameters;
     this.trainingArguments = trainingArguments;
     this.testOnCTakes = testOnCTakes;
+    this.allowSmallerSystemArguments = allowSmallerSystemArguments;
+    this.ignoreImpossibleGoldRelations = ignoreImpossibleGoldRelations;
     this.printErrors = printErrors;
   }
   
@@ -333,6 +354,10 @@ public class RelationExtractorEvaluation
   
   private boolean testOnCTakes;
   
+  private boolean allowSmallerSystemArguments;
+  
+  private boolean ignoreImpossibleGoldRelations;
+  
   private boolean printErrors;
 
   @Override
@@ -403,8 +428,6 @@ public class RelationExtractorEvaluation
       builder.add(UIMAFramework.getXMLParser().parseAnalysisEngineDescription(source));
       // remove extraneous entity mentions
       builder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveSmallerEntityMentions.class));
-      // replace entity mentions in the gold view with the cTAKES entity mentions 
-      builder.add(AnalysisEngineFactory.createPrimitiveDescription(ReplaceGoldEntityMentionsAndModifiersWithCTakes.class));
     } else {
       // replace cTAKES entity mentions and modifiers in the system view with the gold annotations
       builder.add(AnalysisEngineFactory.createPrimitiveDescription(ReplaceCTakesEntityMentionsAndModifiersWithGold.class));
@@ -448,6 +471,74 @@ public class RelationExtractorEvaluation
       Collection<BinaryTextRelation> systemBinaryTextRelations = JCasUtil.select(
           jCas,
           BinaryTextRelation.class);
+      
+      if (this.ignoreImpossibleGoldRelations) {
+        // collect only relations where both arguments have some possible system arguments
+        List<BinaryTextRelation> relations = Lists.newArrayList();
+        for (BinaryTextRelation relation : goldBinaryTextRelations) {
+          boolean hasSystemArgs = true;
+          for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
+            IdentifiedAnnotation goldArg = (IdentifiedAnnotation) relArg.getArgument();
+            Class<? extends IdentifiedAnnotation> goldClass = goldArg.getClass();
+            boolean noSystemArg = JCasUtil.selectCovered(jCas, goldClass, goldArg).isEmpty();
+            hasSystemArgs = hasSystemArgs && !noSystemArg;
+          }
+          if (hasSystemArgs) {
+            relations.add(relation);
+          } else {
+            IdentifiedAnnotation arg1 = (IdentifiedAnnotation) relation.getArg1().getArgument();
+            IdentifiedAnnotation arg2 = (IdentifiedAnnotation) relation.getArg2().getArgument();
+            String messageFormat = "removing relation between %s and %s which is impossible to "
+                + "find with system mentions";
+            String message = String.format(messageFormat, format(arg1), format(arg2));
+            UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
+          }
+        }
+        goldBinaryTextRelations = relations;
+      }
+      
+      if (this.allowSmallerSystemArguments) {
+        // collect all the arguments of the manually annotated relations
+        Set<IdentifiedAnnotation> goldArgs = Sets.newHashSet();
+        for (BinaryTextRelation relation : goldBinaryTextRelations) {
+          for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
+            goldArgs.add((IdentifiedAnnotation) relArg.getArgument());
+          }
+        }
+
+        // map each system argument to the gold argument that encloses it
+        Map<IdentifiedAnnotation, IdentifiedAnnotation> systemToGold = Maps.newHashMap();
+        for (IdentifiedAnnotation goldArg : goldArgs) {
+          Class<? extends IdentifiedAnnotation> goldClass = goldArg.getClass();
+          for (IdentifiedAnnotation systemArg : JCasUtil.selectCovered(jCas, goldClass, goldArg)) {
+            if (systemToGold.containsKey(systemArg)) {
+              throw new IllegalArgumentException(String.format(
+                  "%s contained in both %s and %s",
+                  format(systemArg),
+                  format(goldArg),
+                  format(systemToGold.get(systemArg))));
+            }
+            // only map system arguments to gold arguments if they're not the same span already
+            if (goldArg.getBegin() != systemArg.getBegin() || goldArg.getEnd() != systemArg.getEnd()) {
+              systemToGold.put(systemArg, goldArg);
+            }
+          }
+        }
+        
+        // replace system arguments with gold arguments where necessary/possible
+        for (BinaryTextRelation relation : systemBinaryTextRelations) {
+          for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
+            IdentifiedAnnotation systemArg = (IdentifiedAnnotation) relArg.getArgument();
+            IdentifiedAnnotation matchingGoldArg = systemToGold.get(systemArg);
+            if (matchingGoldArg != null) {
+              String messageFormat = "replacing system argument %s with gold argument %s";
+              String message = String.format(messageFormat, format(systemArg), format(matchingGoldArg));
+              UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
+              relArg.setArgument(matchingGoldArg);
+            }
+          }
+        }
+      }
 
       // update the statistics based on the argument spans of the relation
       stats.add(
@@ -665,7 +756,6 @@ public class RelationExtractorEvaluation
       for (BinaryTextRelation relation : relations) {
 
         // attempt to replace the gold RelationArguments with system ones
-        int replacedArgumentCount = 0;
         for (RelationArgument relArg : Arrays.asList(relation.getArg1(), relation.getArg2())) {
           IdentifiedAnnotation goldArg = (IdentifiedAnnotation) relArg.getArgument();
           Class<? extends IdentifiedAnnotation> argClass = goldArg.getClass();
@@ -689,7 +779,6 @@ public class RelationExtractorEvaluation
           }
           if (bestFitArg != null) {
             relArg.setArgument(bestFitArg);
-            replacedArgumentCount += 1;
           }
 
           // log a message if we didn't find a perfect match
@@ -704,11 +793,6 @@ public class RelationExtractorEvaluation
             this.getContext().getLogger().log(Level.WARNING, message);
           }
         }
-
-        // if replacements were not found for both arguments, remove the relation
-        if (replacedArgumentCount < 2) {
-          relation.removeFromIndexes();
-        }
       }
     }
   }

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java?rev=1430684&r1=1430683&r2=1430684&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java Wed Jan  9 05:17:50 2013
@@ -124,6 +124,8 @@ public class RelationExtractorTrain {
 	    		additionalParameters,
 	    		trainingArguments,
 	    		false,
+	    		false,
+	    		false,
 	    		false);
 	    
 	    CollectionReader collectionReader = evaluation.getCollectionReader(trainFiles);