You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2015/11/09 15:28:47 UTC

svn commit: r1713441 - /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java

Author: tmill
Date: Mon Nov  9 14:28:46 2015
New Revision: 1713441

URL: http://svn.apache.org/viewvc?rev=1713441&view=rev
Log:
Name files after config, allow cluster ranking.

Modified:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java?rev=1713441&r1=1713440&r2=1713441&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java Mon Nov  9 14:28:46 2015
@@ -25,6 +25,7 @@ import org.apache.ctakes.coreference.ae.
 import org.apache.ctakes.coreference.ae.EventCoreferenceAnnotator;
 import org.apache.ctakes.coreference.ae.MarkableSalienceAnnotator;
 import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator;
+import org.apache.ctakes.coreference.ae.MentionClusterRankingCoreferenceAnnotator;
 import org.apache.ctakes.coreference.ae.PersonChainAnnotator;
 import org.apache.ctakes.dependency.parser.util.DependencyUtility;
 import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
@@ -88,7 +89,7 @@ import org.apache.uima.util.FileUtils;
 import org.cleartk.eval.AnnotationStatistics;
 import org.cleartk.ml.jar.JarClassifierBuilder;
 import org.cleartk.ml.liblinear.LibLinearStringOutcomeDataWriter;
-import org.cleartk.ml.svmlight.SvmLightStringOutcomeDataWriter;
+import org.cleartk.ml.svmlight.rank.SvmLightRankDataWriter;
 import org.cleartk.ml.tksvmlight.model.CompositeKernel.ComboOperator;
 import org.cleartk.util.ViewUriUtil;
 
@@ -117,13 +118,23 @@ public class EvaluationOfEventCoreferenc
     
     @Option(shortName="t", defaultValue={"MENTION_PAIR"})
     public EVAL_SYSTEM getEvalSystem();
+    
+    @Option(shortName="c", defaultValue="default")
+    public String getConfig();
   }
   
   private static Logger logger = Logger.getLogger(EvaluationOfEventCoreference.class);
-  public static float COREF_DOWNSAMPLE = 0.5f;
-  protected static ParameterSettings allParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS, COREF_DOWNSAMPLE, "tk",
+  public static float COREF_PAIRS_DOWNSAMPLE = 0.5f;
+  public static float COREF_CLUSTER_DOWNSAMPLE=0.5f;
+  
+  protected static ParameterSettings pairwiseParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS, COREF_PAIRS_DOWNSAMPLE, "tk",
       1.0, 1.0, "linear", ComboOperator.SUM, 0.1, 0.5);
-
+  protected static ParameterSettings clusterParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS, COREF_CLUSTER_DOWNSAMPLE, "tk",
+      1.0, 1.0, "linear", ComboOperator.SUM, 0.1, 0.5);
+  
+  private static String goldOut = "";
+  private static String systemOut = "";
+  
   public static void main(String[] args) throws Exception {
     CoreferenceOptions options = CliFactory.parseArguments(CoreferenceOptions.class, args);
 
@@ -131,8 +142,9 @@ public class EvaluationOfEventCoreferenc
     List<Integer> trainItems = getTrainItems(options);
     List<Integer> testItems = options.getTestOnTrain() ? getTrainItems(options) : getTestItems(options);
 
-    ParameterSettings params = allParams;
-    File workingDir = new File("target/eval/temporal-relations/coreference/" + options.getEvalSystem());
+    ParameterSettings params = options.getEvalSystem() == EVAL_SYSTEM.MENTION_PAIR ? pairwiseParams : clusterParams;
+    
+    File workingDir = new File("target/eval/temporal-relations/coreference/" + options.getEvalSystem() + File.separator +  options.getConfig());
     if(!workingDir.exists()) workingDir.mkdirs();
     if(options.getUseTmp()){
       File tempModelDir = File.createTempFile("temporal", null, workingDir);
@@ -161,11 +173,13 @@ public class EvaluationOfEventCoreferenc
       eval.skipWrite = true;
     }
     eval.evalType = options.getEvalSystem();
+    eval.config = options.getConfig();
+    goldOut = "gold." + eval.config + ".conll";
+    systemOut = "system." + eval.config + ".conll";
+    
     eval.prepareXMIsFor(patientSets);
-
+    
     params.stats = eval.trainAndTest(trainItems, testItems);//training);//
-    //      System.err.println(options.getKernelParams() == null ? params : options.getKernelParams());
-//    System.err.println(params.stats);
 
     if(options.getUseTmp()){
       FileUtils.deleteRecursive(workingDir);
@@ -176,10 +190,10 @@ public class EvaluationOfEventCoreferenc
       Runtime runtime = Runtime.getRuntime();
       Process p = runtime.exec(new String[]{
           "perl",
-          "/home/tmill/soft/reference-coreference-scorers-read-only/scorer.pl",
+          "/home/tmill/soft/reference-coreference-scorers/scorer.pl",
           "all",
-          options.getOutputDirectory() + "gold.chains",
-          options.getOutputDirectory() + "system.chains",
+          options.getOutputDirectory() + goldOut,
+          options.getOutputDirectory() + systemOut,
           "none"});
       BufferedReader reader = new BufferedReader(new InputStreamReader(p.getInputStream()));
       String line, metric=null;
@@ -208,8 +222,9 @@ public class EvaluationOfEventCoreferenc
   
   boolean skipTrain=false; 
   boolean skipWrite=false;
-  public enum EVAL_SYSTEM { BASELINE, MENTION_PAIR, MENTION_CLUSTER };
+  public enum EVAL_SYSTEM { BASELINE, MENTION_PAIR, MENTION_CLUSTER, CLUSTER_RANK };
   EVAL_SYSTEM evalType;
+  String config=null;
   
   private String outputDirectory;
   
@@ -248,11 +263,13 @@ public class EvaluationOfEventCoreferenc
         aggregateBuilder.add(EventCoreferenceAnnotator.createDataWriterDescription(
             //        TKSVMlightStringOutcomeDataWriter.class,
                     LibLinearStringOutcomeDataWriter.class,
+//            LibSvmStringOutcomeDataWriter.class,
 //            TkLibSvmStringOutcomeDataWriter.class,
             directory,
             params.probabilityOfKeepingANegativeExample
             ));
       }else if(this.evalType == EVAL_SYSTEM.MENTION_CLUSTER){
+//        aggregateBuilder.add(EventCoreferenceAnnotator.createScoringAnnotatorDescription("/org/apache/ctakes/coreference/mention-pair" + File.separator + "model.jar"));
         aggregateBuilder.add(MentionClusterCoreferenceAnnotator.createDataWriterDescription(
 //            LibSvmStringOutcomeDataWriter.class,
             LibLinearStringOutcomeDataWriter.class,
@@ -262,6 +279,12 @@ public class EvaluationOfEventCoreferenc
             directory,
             params.probabilityOfKeepingANegativeExample
             ));
+      }else if(this.evalType == EVAL_SYSTEM.CLUSTER_RANK){
+        // TODO
+        aggregateBuilder.add(MentionClusterRankingCoreferenceAnnotator.createDataWriterDescription(
+            SvmLightRankDataWriter.class, 
+            directory, 
+            params.probabilityOfKeepingANegativeExample));
       }
       Logger.getLogger(EventCoreferenceAnnotator.class).setLevel(Level.WARN);
       // create gold chains for writing out which we can then use for our scoring tool
@@ -311,7 +334,7 @@ public class EvaluationOfEventCoreferenc
     aggregateBuilder.add(DocTimeRelAnnotator.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/doctimerel/model.jar"));
     aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CoreferenceChainScoringOutput.class,
         CoreferenceChainScoringOutput.PARAM_OUTPUT_FILENAME,
-        this.outputDirectory + "gold.chains",
+        this.outputDirectory + goldOut,
         CoreferenceChainScoringOutput.PARAM_GOLD_VIEW_NAME,
         GOLD_VIEW_NAME));
     aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemovePersonMarkables.class));
@@ -319,13 +342,16 @@ public class EvaluationOfEventCoreferenc
     if(this.evalType == EVAL_SYSTEM.MENTION_PAIR){
       aggregateBuilder.add(EventCoreferenceAnnotator.createAnnotatorDescription(directory.getAbsolutePath() + File.separator + "model.jar"));
     }else if(this.evalType == EVAL_SYSTEM.MENTION_CLUSTER){
+//      aggregateBuilder.add(EventCoreferenceAnnotator.createScoringAnnotatorDescription("/org/apache/ctakes/coreference/mention-pair" + File.separator + "model.jar"));
       aggregateBuilder.add(MentionClusterCoreferenceAnnotator.createAnnotatorDescription(directory.getAbsolutePath() + File.separator + "model.jar"));
+    }else if(this.evalType == EVAL_SYSTEM.CLUSTER_RANK){
+      aggregateBuilder.add(MentionClusterRankingCoreferenceAnnotator.createAnnotatorDescription(directory.getAbsolutePath() + File.separator + "model.jar"));
     }
 //    aggregateBuilder.add(CoreferenceChainAnnotator.createAnnotatorDescription());
     aggregateBuilder.add(PersonChainAnnotator.createAnnotatorDescription());
     aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CoreferenceChainScoringOutput.class,
         CoreferenceChainScoringOutput.PARAM_OUTPUT_FILENAME,
-        this.outputDirectory + "system.chains"));
+        this.outputDirectory + systemOut));
 
     FlowControllerDescription corefFlowControl = FlowControllerFactory.createFlowControllerDescription(CorefEvalFlowController.class);
     aggregateBuilder.setFlowControllerDescription(corefFlowControl);