You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2013/06/27 22:14:43 UTC

svn commit: r1497555 [2/2] - in /ctakes/trunk/ctakes-relation-extractor: ./ desc/analysis_engine/ src/main/java/org/apache/ctakes/relationextractor/ae/ src/main/java/org/apache/ctakes/relationextractor/cr/ src/main/java/org/apache/ctakes/relationextrac...

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java?rev=1497555&r1=1497554&r2=1497555&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Thu Jun 27 20:14:42 2013
@@ -20,14 +20,25 @@ package org.apache.ctakes.relationextrac
 
 import java.io.File;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import javax.annotation.Nullable;
+
+import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.LocationOfRelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.DegreeOfTextRelation;
+import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Modifier;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -41,360 +52,233 @@ import org.apache.uima.jcas.tcas.Annotat
 import org.apache.uima.util.CasCopier;
 import org.apache.uima.util.Level;
 import org.apache.uima.util.XMLInputSource;
-import org.cleartk.classifier.DataWriter;
 import org.cleartk.classifier.jar.DefaultDataWriterFactory;
 import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
 import org.cleartk.classifier.jar.GenericJarClassifierFactory;
 import org.cleartk.classifier.jar.JarClassifierBuilder;
-import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
+import org.cleartk.classifier.liblinear.LIBLINEARStringOutcomeDataWriter;
 import org.cleartk.eval.AnnotationStatistics;
-import org.cleartk.eval.Evaluation_ImplBase;
-import org.cleartk.util.Options_ImplBase;
-import org.kohsuke.args4j.Option;
 import org.uimafit.component.JCasAnnotator_ImplBase;
-import org.uimafit.descriptor.ConfigurationParameter;
 import org.uimafit.factory.AggregateBuilder;
 import org.uimafit.factory.AnalysisEngineFactory;
-import org.uimafit.factory.CollectionReaderFactory;
 import org.uimafit.factory.ConfigurationParameterFactory;
-import org.uimafit.factory.TypeSystemDescriptionFactory;
 import org.uimafit.pipeline.JCasIterable;
 import org.uimafit.pipeline.SimplePipeline;
-import org.uimafit.testing.util.HideOutput;
 import org.uimafit.util.JCasUtil;
 
 import com.google.common.base.Function;
-import com.google.common.base.Functions;
 import com.google.common.base.Objects;
-import com.google.common.base.Objects.ToStringHelper;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
-import com.google.common.collect.Ordering;
 import com.google.common.collect.Sets;
+import com.lexicalscope.jewel.cli.CliFactory;
+import com.lexicalscope.jewel.cli.Option;
 
-import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator;
-import org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator;
-import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
-import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
-import org.apache.ctakes.typesystem.type.relation.RelationArgument;
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.Modifier;
-
-public class RelationExtractorEvaluation extends Evaluation_ImplBase<File, AnnotationStatistics<String>> {
+public class RelationExtractorEvaluation extends SHARPXMI.Evaluation_ImplBase {
 
-  public static class Options extends Options_ImplBase {
+  public static interface Options extends SHARPXMI.EvaluationOptions {
 
     @Option(
-        name = "--train-dir",
-        usage = "specify the directory contraining the XMI training files (for example, /NLP/Corpus/Relations/mipacq/xmi/train)",
-        required = true)
-    public File trainDirectory;
+        longName = "relations",
+        description = "determines which relations to evaluate on (separately)",
+        defaultValue = { "degree_of", "location_of" })
+    public List<String> getRelations();
 
     @Option(
-        name = "--dev-dir",
-        usage = "specify the directory contraining the XMI development files (for example, /NLP/Corpus/Relations/mipacq/xmi/dev)",
-        required = false)
-    public File devDirectory;
-    
-    @Option(
-        name = "--test-dir",
-        usage = "specify the directory contraining the XMI testing files (for example, /NLP/Corpus/Relations/mipacq/xmi/test)",
-        required = false)
-    public File testDirectory;
-    
-    @Option(name = "--grid-search", usage = "run a grid search to select the best parameters")
-    public boolean gridSearch = false;
+        longName = "test-on-ctakes",
+        description = "evaluate test performance on ctakes entities, instead of gold standard "
+            + "entities")
+    public boolean getTestOnCTakes();
 
     @Option(
-        name = "--relations",
-        usage = "determines which relations to evaluate on (separately)",
-        required = false)
-    public List<String> relations = null;
+        longName = "allow-smaller-system-arguments",
+        description = "for evaluation, allow system relation arguments to match gold relation "
+            + "arguments that enclose them")
+    public boolean getAllowSmallerSystemArguments();
 
     @Option(
-        name = "--test-on-ctakes",
-        usage = "evaluate test performance on ctakes entities, instead of gold standard entities")
-    public boolean testOnCTakes = false;
+        longName = "ignore-impossible-gold-relations",
+        description = "for evaluation, ignore gold relations that would be impossible to find "
+            + "because there are no corresponding system mentions")
+    public boolean getIgnoreImpossibleGoldRelations();
 
     @Option(
-        name = "--allow-smaller-system-arguments",
-        usage = "for evaluation, allow system relation arguments to match gold relation arguments that enclose them")
-    public boolean allowSmallerSystemArguments = false;
+        longName = "--print-errors",
+        description = "print relations that were incorrectly predicted")
+    public boolean getPrintErrors();
 
-    @Option(
-        name = "--ignore-impossible-gold-relations",
-        usage = "for evaluation, ignore gold relations that would be impossible to find because there are no corresponding system mentions")
-    public boolean ignoreImpossibleGoldRelations = false;
-
-    @Option(
-        name = "--print-errors",
-        usage = "print relations that were incorrectly predicted")
-    public boolean printErrors = false;
+  }
 
+  public static final Map<String, Class<? extends BinaryTextRelation>> RELATION_CLASSES =
+      Maps.newHashMap();
+  public static final Map<Class<? extends BinaryTextRelation>, Class<? extends RelationExtractorAnnotator>> ANNOTATOR_CLASSES =
+      Maps.newHashMap();
+  public static final Map<Class<? extends BinaryTextRelation>, ParameterSettings> BEST_PARAMETERS =
+      Maps.newHashMap();
+
+  static {
+    RELATION_CLASSES.put("degree_of", DegreeOfTextRelation.class);
+    ANNOTATOR_CLASSES.put(DegreeOfTextRelation.class, DegreeOfRelationExtractorAnnotator.class);
+    BEST_PARAMETERS.put(DegreeOfTextRelation.class, new ParameterSettings(
+        LIBLINEARStringOutcomeDataWriter.class,
+        new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+            1.0f },
+        new String[] { "-s", "1", "-c", "10.0" }));
+
+    RELATION_CLASSES.put("location_of", LocationOfTextRelation.class);
+    ANNOTATOR_CLASSES.put(LocationOfTextRelation.class, LocationOfRelationExtractorAnnotator.class);
+    BEST_PARAMETERS.put(LocationOfTextRelation.class, new ParameterSettings(
+        LIBLINEARStringOutcomeDataWriter.class,
+        new Object[] { RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+            1.0f },
+        new String[] { "-s", "1", "-c", "0.05" }));
   }
 
-  public static final String GOLD_VIEW_NAME = "GoldView";
-  
-  // parameter settings currently optimized for SHARP data
-  public static final ParameterSettings BEST_DEGREE_OF_PARAMETERS = new ParameterSettings(false, 1.0f, "linear", 0.05, 1.0);
-  public static final ParameterSettings BEST_NON_DEGREE_OF_PARAMETERS = new ParameterSettings(false, 0.5f, "radial basis function", 100.0, 0.01);
-  
   public static void main(String[] args) throws Exception {
-    Options options = new Options();
-    options.parseOptions(args);
-    if (options.relations == null) {
-      options.relations = Arrays.asList("degree_of", "location_of");
-    }
-
-    // error on invalid option combinations
-    if (options.testDirectory != null && options.gridSearch) {
-      throw new IllegalArgumentException("grid search can only be run on the train or dev sets");
-    }
-
-    List<File> trainFiles = Arrays.asList(options.trainDirectory.listFiles());
-
-    for (String relationCategory : options.relations) {
-
-      // define the output directory for models
-      File modelsDir = new File("target/models/" + relationCategory);
-
-      // determine class for the classifier annotator
-      boolean isDegreeOf = relationCategory.equals("degree_of");
-      Class<? extends RelationExtractorAnnotator> annotatorClass = isDegreeOf
-          ? DegreeOfRelationExtractorAnnotator.class
-          : EntityMentionPairRelationExtractorAnnotator.class;
-
-      // determine the type of classifier to be trained
-      Class<? extends DataWriter<String>> dataWriterClass = LIBSVMStringOutcomeDataWriter.class;
-
-      // define the set of possible training parameters
-      List<ParameterSettings> possibleParams = Lists.newArrayList();
-      if (options.gridSearch) {
-        boolean[] classifyBothDirectionsOptions = isDegreeOf
-            ? new boolean[] { false }
-            : new boolean[] { false, true };
-        for (boolean classifyBothDirections : classifyBothDirectionsOptions) {
-          for (float probabilityOfKeepingANegativeExample : new float[] { 0.25f, 0.5f, 1.0f }) {
-            // linear kernels
-            for (double svmCost : new double[] { 0.05, 0.1, 0.5, 1 }) {
-              possibleParams.add(new ParameterSettings(
-                  classifyBothDirections,
-                  probabilityOfKeepingANegativeExample,
-                  "linear",
-                  svmCost,
-                  1.0));
-            }
-            // RBF kernels
-            for (double svmCost : new double[] { 1, 10, 100 }) {
-              for (double gamma : new double[] { 0.001, 0.01, 0.1 }) {
-                possibleParams.add(new ParameterSettings(
-                  classifyBothDirections,
-                  probabilityOfKeepingANegativeExample,
-                  "radial basis function",
-                  svmCost,
-                  gamma));
-              }
-            }
-          }
-        }
-      } else if (isDegreeOf) {
-        possibleParams.add(BEST_DEGREE_OF_PARAMETERS);
-      } else {
-        possibleParams.add(BEST_NON_DEGREE_OF_PARAMETERS);
-      }
-
-      // run an evaluation for each set of parameters
-      Map<ParameterSettings, Double> scoredParams = new HashMap<ParameterSettings, Double>();
-      for (ParameterSettings params : possibleParams) {
-        System.err.println(relationCategory + ": " + params);
-        System.err.println();
-
-        // define additional configuration parameters for the annotator
-        Object[] additionalParameters = new Object[] {
-            RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
-            params.probabilityOfKeepingANegativeExample,
-            EntityMentionPairRelationExtractorAnnotator.PARAM_CLASSIFY_BOTH_DIRECTIONS,
-            params.classifyBothDirections };
-
-        // define arguments to be passed to the classifier
-        String[] trainingArguments = new String[] {
-            "-t",
-            String.valueOf(params.svmKernelIndex),
-            "-c",
-            String.valueOf(params.svmCost),
-            "-g",
-            String.valueOf(params.svmGamma) };
-
-        // create the evaluation
-        RelationExtractorEvaluation evaluation = new RelationExtractorEvaluation(
-            modelsDir,
-            relationCategory,
-            annotatorClass,
-            dataWriterClass,
-            additionalParameters,
-            trainingArguments,
-            options.testOnCTakes,
-            options.allowSmallerSystemArguments,
-            options.ignoreImpossibleGoldRelations,
-            options.printErrors);
-
-        if (options.devDirectory != null) {
-          if (options.testDirectory != null) {
-            // train on the training set + dev set and evaluate on the test set
-            List<File> allTrainFiles = new ArrayList<File>();
-            allTrainFiles.addAll(trainFiles);
-            allTrainFiles.addAll(Arrays.asList(options.devDirectory.listFiles()));
-            List<File> testFiles = Arrays.asList(options.testDirectory.listFiles());
-            params.stats = evaluation.trainAndTest(allTrainFiles, testFiles);
-          } else {
-            // train on the training set and evaluate on the dev set
-            List<File> devFiles = Arrays.asList(options.devDirectory.listFiles());
-            params.stats = evaluation.trainAndTest(trainFiles, devFiles);
-          }
-        } else {
-          if (options.testDirectory != null) {
-            // train on the training set and evaluate on the test set
-            List<File> testFiles = Arrays.asList(options.testDirectory.listFiles());
-            params.stats = evaluation.trainAndTest(trainFiles, testFiles);
-          } else {
-            // run n-fold cross-validation on the training set
-            List<AnnotationStatistics<String>> foldStats = evaluation.crossValidation(trainFiles, 2);
-            params.stats = AnnotationStatistics.addAll(foldStats);
-          }
+    // parse the options, validate them, and generate XMI if necessary
+    final Options options = CliFactory.parseArguments(Options.class, args);
+    SHARPXMI.validate(options);
+    SHARPXMI.generateXMI(options);
+
+    // determine the grid of parameters to search through
+    // for the full set of LIBLINEAR parameters, see:
+    // https://github.com/bwaldvogel/liblinear-java/blob/master/src/main/java/de/bwaldvogel/liblinear/Train.java
+    List<ParameterSettings> gridOfSettings = Lists.newArrayList();
+    for (float probabilityOfKeepingANegativeExample : new float[] { 0.5f, 1.0f }) {
+      for (int solver : new int[] { 0 /* logistic regression */, 1 /* SVM */}) {
+        for (double svmCost : new double[] { 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100 }) {
+          gridOfSettings.add(new ParameterSettings(
+              LIBLINEARStringOutcomeDataWriter.class,
+              new Object[] {
+                  RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+                  probabilityOfKeepingANegativeExample },
+              new String[] { "-s", String.valueOf(solver), "-c", String.valueOf(svmCost) }));
         }
-        scoredParams.put(params, params.stats.f1());
       }
+    }
 
-      // print parameters sorted by F1
-      List<ParameterSettings> list = new ArrayList<ParameterSettings>(scoredParams.keySet());
-      Function<ParameterSettings, Double> getCount = Functions.forMap(scoredParams);
-      Collections.sort(list, Ordering.natural().onResultOf(getCount));
-
-      // print performance of each set of parameters
-      if (list.size() > 1) {
-        System.err.println(relationCategory + ": summary:");
-        for (ParameterSettings params : list) {
-          System.err.printf(
-              "F1=%.3f P=%.3f R=%.3f %s\n",
-              params.stats.f1(),
-              params.stats.precision(),
-              params.stats.recall(),
-              params);
-        }
-        System.err.println();
-      }
-
-      // print overall best model
-      if (!list.isEmpty()) {
-        ParameterSettings lastParams = list.get(list.size() - 1);
-        System.err.println(relationCategory + ": best model:");
-        System.err.print(lastParams.stats);
-        System.err.println(lastParams);
-        System.err.println(lastParams.stats.confusions());
-        System.err.println();
-        System.err.println(lastParams.stats.confusions().toHTML());
-      }
+    // run an evaluation for each selected relation
+    for (final String relationCategory : options.getRelations()) {
+
+      // get the best parameters for the relation
+      final Class<? extends BinaryTextRelation> relationClass =
+          RELATION_CLASSES.get(relationCategory);
+      ParameterSettings bestSettings = BEST_PARAMETERS.get(relationClass);
+
+      // run the evaluation
+      SHARPXMI.evaluate(
+          options,
+          bestSettings,
+          gridOfSettings,
+          new Function<ParameterSettings, RelationExtractorEvaluation>() {
+            @Override
+            public RelationExtractorEvaluation apply(@Nullable ParameterSettings params) {
+              return new RelationExtractorEvaluation(
+                  new File("target/models/" + relationCategory),
+                  relationClass,
+                  ANNOTATOR_CLASSES.get(relationClass),
+                  params,
+                  options.getTestOnCTakes(),
+                  options.getAllowSmallerSystemArguments(),
+                  options.getIgnoreImpossibleGoldRelations(),
+                  options.getPrintErrors());
+            }
+          });
     }
   }
 
+  private Class<? extends BinaryTextRelation> relationClass;
+
+  private Class<? extends RelationExtractorAnnotator> classifierAnnotatorClass;
+
+  private ParameterSettings parameterSettings;
+
+  private boolean testOnCTakes;
+
+  private boolean allowSmallerSystemArguments;
+
+  private boolean ignoreImpossibleGoldRelations;
+
+  private boolean printErrors;
+
   /**
    * An evaluation of a relation extractor.
    * 
    * @param baseDirectory
    *          The directory where models, etc. should be written
+   * @param relationClass
+   *          The class of the relation to be predicted
    * @param classifierAnnotatorClass
    *          The CleartkAnnotator class that learns a relation extractor model
-   * @param dataWriterClass
-   *          The DataWriter defining what type of classifier to train
-   * @param additionalParameters
-   *          Additional parameters that should be supplied when creating the CleartkAnnotator
-   * @param trainingArguments
-   *          Arguments that should be passed to the classifier's train method
+   * @param parameterSettings
+   *          The parameters defining how to train a classifier
    * @param testOnCTakes
-   *          During testing, use annotations from cTAKES, not from the gold standard
+   *          During testing, use annotations from cTAKES, not from the gold
+   *          standard
    * @param allowSmallerSystemArguments
-   *          During testing, allow system annotations to match gold annotations that enclose them
+   *          During testing, allow system annotations to match gold annotations
+   *          that enclose them
    * @param ignoreImpossibleGoldRelations
-   *          During testing, ignore gold relations that would be impossible to find because there
-   *          are no corresponding system mentions
+   *          During testing, ignore gold relations that would be impossible to
+   *          find because there are no corresponding system mentions
    */
   public RelationExtractorEvaluation(
       File baseDirectory,
-      String relationCategory,
+      Class<? extends BinaryTextRelation> relationClass,
       Class<? extends RelationExtractorAnnotator> classifierAnnotatorClass,
-      Class<? extends DataWriter<String>> dataWriterClass,
-      Object[] additionalParameters,
-      String[] trainingArguments,
+      ParameterSettings parameterSettings,
       boolean testOnCTakes,
       boolean allowSmallerSystemArguments,
       boolean ignoreImpossibleGoldRelations,
       boolean printErrors) {
     super(baseDirectory);
-    this.relationCategory = relationCategory;
+    this.relationClass = relationClass;
     this.classifierAnnotatorClass = classifierAnnotatorClass;
-    this.dataWriterClass = dataWriterClass;
-    this.additionalParameters = additionalParameters;
-    this.trainingArguments = trainingArguments;
+    this.parameterSettings = parameterSettings;
     this.testOnCTakes = testOnCTakes;
     this.allowSmallerSystemArguments = allowSmallerSystemArguments;
     this.ignoreImpossibleGoldRelations = ignoreImpossibleGoldRelations;
     this.printErrors = printErrors;
   }
-  
-  private String relationCategory;
-
-  private Class<? extends RelationExtractorAnnotator> classifierAnnotatorClass;
-
-  private Class<? extends DataWriter<String>> dataWriterClass;
-
-  private Object[] additionalParameters;
 
-  private String[] trainingArguments;
-  
-  private boolean testOnCTakes;
-  
-  private boolean allowSmallerSystemArguments;
-  
-  private boolean ignoreImpossibleGoldRelations;
-  
-  private boolean printErrors;
-
-  @Override
-  public CollectionReader getCollectionReader(List<File> items) throws Exception {
-    // convert the List<File> to a String[]
-    String[] paths = new String[items.size()];
-    for (int i = 0; i < paths.length; ++i) {
-      paths[i] = items.get(i).getPath();
-    }
-
-    // return a reader that will load each of the XMI files
-    return CollectionReaderFactory.createCollectionReader(
-        XMIReader.class,
-        TypeSystemDescriptionFactory.createTypeSystemDescription(),
-        XMIReader.PARAM_FILES,
-        paths);
+  public RelationExtractorEvaluation(
+      File baseDirectory,
+      Class<? extends BinaryTextRelation> relationClass,
+      Class<? extends RelationExtractorAnnotator> classifierAnnotatorClass,
+      ParameterSettings parameterSettings) {
+    this(
+        baseDirectory,
+        relationClass,
+        classifierAnnotatorClass,
+        parameterSettings,
+        false,
+        false,
+        false,
+        false);
   }
 
   @Override
   public void train(CollectionReader collectionReader, File directory) throws Exception {
+    System.err.printf(
+        "%s: %s: %s:\n",
+        this.getClass().getSimpleName(),
+        this.relationClass.getSimpleName(),
+        directory.getName());
+    System.err.println(this.parameterSettings);
+
     AggregateBuilder builder = new AggregateBuilder();
-    // remove all but the relation of interest from the gold annotations
-    builder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        RemoveOtherRelations.class,
-        RemoveOtherRelations.PARAM_RELATION_CATEGORY,
-        this.relationCategory),
-        CAS.NAME_DEFAULT_SOFA, GOLD_VIEW_NAME);
-    // remove cTAKES entity mentions and modifiers in the system view and copy in the gold relations
+    // remove cTAKES entity mentions and modifiers in the system view and copy
+    // in the gold relations
     builder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveCTakesMentionsAndCopyGoldRelations.class));
     // add the relation extractor, configured for training mode
-    AnalysisEngineDescription classifierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
-        this.classifierAnnotatorClass,
-        this.additionalParameters);
+    AnalysisEngineDescription classifierAnnotator =
+        AnalysisEngineFactory.createPrimitiveDescription(
+            this.classifierAnnotatorClass,
+            this.parameterSettings.configurationParameters);
     ConfigurationParameterFactory.addConfigurationParameters(
         classifierAnnotator,
         DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
-        this.dataWriterClass,
+        this.parameterSettings.dataWriterClass,
         DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
         directory.getPath());
     builder.add(classifierAnnotator);
@@ -403,52 +287,48 @@ public class RelationExtractorEvaluation
     SimplePipeline.runPipeline(collectionReader, builder.createAggregateDescription());
 
     // train the classifier and package it into a .jar file
-    HideOutput hider = new HideOutput();
-    JarClassifierBuilder.trainAndPackage(directory, this.trainingArguments);
-    hider.restoreOutput();
-    hider.close(); // workaround for https://code.google.com/p/uimafit/issues/detail?id=129
+    JarClassifierBuilder.trainAndPackage(directory, this.parameterSettings.trainingArguments);
   }
 
   @Override
   protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
       throws Exception {
     AggregateBuilder builder = new AggregateBuilder();
-    // remove all but the relation of interest from the gold annotations
-    builder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        RemoveOtherRelations.class,
-        RemoveOtherRelations.PARAM_RELATION_CATEGORY,
-        this.relationCategory),
-        CAS.NAME_DEFAULT_SOFA, GOLD_VIEW_NAME);
     if (this.testOnCTakes) {
       // add the modifier extractor
       File file = new File("desc/analysis_engine/ModifierExtractorAnnotator.xml");
       XMLInputSource source = new XMLInputSource(file);
       builder.add(UIMAFramework.getXMLParser().parseAnalysisEngineDescription(source));
       // remove extraneous entity mentions
-      builder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveSmallerEntityMentions.class));
+      builder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveSmallerEventMentions.class));
     } else {
-      // replace cTAKES entity mentions and modifiers in the system view with the gold annotations
+      // replace cTAKES entity mentions and modifiers in the system view with
+      // the gold annotations
       builder.add(AnalysisEngineFactory.createPrimitiveDescription(ReplaceCTakesMentionsWithGoldMentions.class));
     }
     // add the relation extractor, configured for classification mode
-    AnalysisEngineDescription classifierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
-        this.classifierAnnotatorClass,
-        this.additionalParameters);
+    AnalysisEngineDescription classifierAnnotator =
+        AnalysisEngineFactory.createPrimitiveDescription(
+            this.classifierAnnotatorClass,
+            this.parameterSettings.configurationParameters);
     ConfigurationParameterFactory.addConfigurationParameters(
         classifierAnnotator,
         GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
-        new File(directory, "model.jar").getPath());
+        JarClassifierBuilder.getModelJarFile(directory));
     builder.add(classifierAnnotator);
 
-    // statistics will be based on the "category" feature of the BinaryTextRelations
+    // statistics will be based on the "category" feature of the
+    // BinaryTextRelations
     AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
-    Function<BinaryTextRelation, HashableArguments> getSpan = new Function<BinaryTextRelation, HashableArguments>() {
-      @Override
-      public HashableArguments apply(BinaryTextRelation relation) {
-        return new HashableArguments(relation);
-      }
-    };
-    Function<BinaryTextRelation, String> getOutcome = AnnotationStatistics.annotationToFeatureValue("category");
+    Function<BinaryTextRelation, HashableArguments> getSpan =
+        new Function<BinaryTextRelation, HashableArguments>() {
+          @Override
+          public HashableArguments apply(BinaryTextRelation relation) {
+            return new HashableArguments(relation);
+          }
+        };
+    Function<BinaryTextRelation, String> getOutcome =
+        AnnotationStatistics.annotationToFeatureValue("category");
 
     // calculate statistics, iterating over the results of the classifier
     AnalysisEngine engine = builder.createAggregate();
@@ -457,21 +337,20 @@ public class RelationExtractorEvaluation
       // get the gold view
       JCas goldView;
       try {
-        goldView = jCas.getView(GOLD_VIEW_NAME);
+        goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
       } catch (CASException e) {
         throw new AnalysisEngineProcessException(e);
       }
 
       // get the gold and system annotations
-      Collection<BinaryTextRelation> goldBinaryTextRelations = JCasUtil.select(
-          goldView,
-          BinaryTextRelation.class);
-      Collection<BinaryTextRelation> systemBinaryTextRelations = JCasUtil.select(
-          jCas,
-          BinaryTextRelation.class);
-      
+      Collection<? extends BinaryTextRelation> goldBinaryTextRelations =
+          JCasUtil.select(goldView, this.relationClass);
+      Collection<? extends BinaryTextRelation> systemBinaryTextRelations =
+          JCasUtil.select(jCas, this.relationClass);
+
       if (this.ignoreImpossibleGoldRelations) {
-        // collect only relations where both arguments have some possible system arguments
+        // collect only relations where both arguments have some possible system
+        // arguments
         List<BinaryTextRelation> relations = Lists.newArrayList();
         for (BinaryTextRelation relation : goldBinaryTextRelations) {
           boolean hasSystemArgs = true;
@@ -486,15 +365,16 @@ public class RelationExtractorEvaluation
           } else {
             IdentifiedAnnotation arg1 = (IdentifiedAnnotation) relation.getArg1().getArgument();
             IdentifiedAnnotation arg2 = (IdentifiedAnnotation) relation.getArg2().getArgument();
-            String messageFormat = "removing relation between %s and %s which is impossible to "
-                + "find with system mentions";
+            String messageFormat =
+                "removing relation between %s and %s which is impossible to "
+                    + "find with system mentions";
             String message = String.format(messageFormat, format(arg1), format(arg2));
             UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
           }
         }
         goldBinaryTextRelations = relations;
       }
-      
+
       if (this.allowSmallerSystemArguments) {
 
         // collect all the arguments of the manually annotated relations
@@ -505,15 +385,20 @@ public class RelationExtractorEvaluation
           }
         }
 
-        // collect all the arguments of system-predicted relations that don't match some gold argument
+        // collect all the arguments of system-predicted relations that don't
+        // match some gold argument
         Set<IdentifiedAnnotation> unmatchedSystemArgs = Sets.newHashSet();
         for (BinaryTextRelation relation : systemBinaryTextRelations) {
           for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
             IdentifiedAnnotation systemArg = (IdentifiedAnnotation) relArg.getArgument();
             Class<? extends IdentifiedAnnotation> systemClass = systemArg.getClass();
             boolean matchesSomeGold = false;
-            for (IdentifiedAnnotation goldArg : JCasUtil.selectCovered(goldView, systemClass, systemArg)) {
-              if (goldArg.getBegin() == systemArg.getBegin() && goldArg.getEnd() == systemArg.getEnd()) {
+            for (IdentifiedAnnotation goldArg : JCasUtil.selectCovered(
+                goldView,
+                systemClass,
+                systemArg)) {
+              if (goldArg.getBegin() == systemArg.getBegin()
+                  && goldArg.getEnd() == systemArg.getEnd()) {
                 matchesSomeGold = true;
                 break;
               }
@@ -524,20 +409,23 @@ public class RelationExtractorEvaluation
           }
         }
 
-        // map each unmatched system argument to the gold argument that encloses it
+        // map each unmatched system argument to the gold argument that encloses
+        // it
         Map<IdentifiedAnnotation, IdentifiedAnnotation> systemToGold = Maps.newHashMap();
         for (IdentifiedAnnotation goldArg : goldArgs) {
           Class<? extends IdentifiedAnnotation> goldClass = goldArg.getClass();
           for (IdentifiedAnnotation systemArg : JCasUtil.selectCovered(jCas, goldClass, goldArg)) {
             if (unmatchedSystemArgs.contains(systemArg)) {
-              
-              // if there's no mapping yet for this system arg, map it to the enclosing gold arg
+
+              // if there's no mapping yet for this system arg, map it to the
+              // enclosing gold arg
               IdentifiedAnnotation oldGoldArg = systemToGold.get(systemArg);
               if (oldGoldArg == null) {
                 systemToGold.put(systemArg, goldArg);
               }
-              
-              // if there's already a mapping for this system arg, only re-map it to match the type 
+
+              // if there's already a mapping for this system arg, only re-map
+              // it to match the type
               else {
                 IdentifiedAnnotation current, other;
                 if (systemArg.getTypeID() == goldArg.getTypeID()) {
@@ -550,16 +438,15 @@ public class RelationExtractorEvaluation
                 }
 
                 // issue a warning since this re-mapping procedure is imperfect
-                UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, String.format(
-                    "system argument %s mapped to gold argument %s, but could also be mapped to %s",
-                    format(systemArg),
-                    format(current),
-                    format(other)));
+                String message =
+                    "system argument %s mapped to gold argument %s, but could also be mapped to %s";
+                message = String.format(message, format(systemArg), format(current), format(other));
+                UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
               }
             }
           }
         }
-        
+
         // replace system arguments with gold arguments where necessary/possible
         for (BinaryTextRelation relation : systemBinaryTextRelations) {
           for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
@@ -567,7 +454,8 @@ public class RelationExtractorEvaluation
             IdentifiedAnnotation matchingGoldArg = systemToGold.get(systemArg);
             if (matchingGoldArg != null) {
               String messageFormat = "replacing system argument %s with gold argument %s";
-              String message = String.format(messageFormat, format(systemArg), format(matchingGoldArg));
+              String message =
+                  String.format(messageFormat, format(systemArg), format(matchingGoldArg));
               UIMAFramework.getLogger(this.getClass()).log(Level.WARNING, message);
               relArg.setArgument(matchingGoldArg);
             }
@@ -576,12 +464,8 @@ public class RelationExtractorEvaluation
       }
 
       // update the statistics based on the argument spans of the relation
-      stats.add(
-          goldBinaryTextRelations,
-          systemBinaryTextRelations,
-          getSpan,
-          getOutcome);
-      
+      stats.add(goldBinaryTextRelations, systemBinaryTextRelations, getSpan, getOutcome);
+
       // print errors if requested
       if (this.printErrors) {
         Map<HashableArguments, BinaryTextRelation> goldMap = Maps.newHashMap();
@@ -610,16 +494,14 @@ public class RelationExtractorEvaluation
       }
     }
 
-    System.err.printf("%s: %s:\n", this.relationCategory, directory.getName());
     System.err.print(stats);
-    System.err.println(stats.confusions());
     System.err.println();
     return stats;
   }
-  
+
   private static String formatRelation(BinaryTextRelation relation) {
-    IdentifiedAnnotation arg1 = (IdentifiedAnnotation)relation.getArg1().getArgument();
-    IdentifiedAnnotation arg2 = (IdentifiedAnnotation)relation.getArg2().getArgument();
+    IdentifiedAnnotation arg1 = (IdentifiedAnnotation) relation.getArg1().getArgument();
+    IdentifiedAnnotation arg2 = (IdentifiedAnnotation) relation.getArg2().getArgument();
     String text = arg1.getCAS().getDocumentText();
     int begin = Math.min(arg1.getBegin(), arg2.getBegin());
     int end = Math.max(arg1.getBegin(), arg2.getBegin());
@@ -636,109 +518,33 @@ public class RelationExtractorEvaluation
   }
 
   /**
-   * Holds a set of parameters for a relation extraction model
-   */
-  public static class ParameterSettings {
-    public boolean classifyBothDirections;
-
-    public float probabilityOfKeepingANegativeExample;
-
-    public String svmKernel;
-
-    public int svmKernelIndex;
-
-    public double svmCost;
-
-    public double svmGamma;
-
-    public AnnotationStatistics<String> stats;
-
-    private static List<String> SVM_KERNELS = Arrays.asList(
-        "linear",
-        "polynomial",
-        "radial basis function",
-        "sigmoid");
-
-    public ParameterSettings(
-        boolean classifyBothDirections,
-        float probabilityOfKeepingANegativeExample,
-        String svmKernel,
-        double svmCost,
-        double svmGamma) {
-      super();
-      this.classifyBothDirections = classifyBothDirections;
-      this.probabilityOfKeepingANegativeExample = probabilityOfKeepingANegativeExample;
-      this.svmKernel = svmKernel;
-      this.svmKernelIndex = SVM_KERNELS.indexOf(this.svmKernel);
-      if (this.svmKernelIndex == -1) {
-        throw new IllegalArgumentException("Unrecognized kernel: " + this.svmKernel);
-      }
-      this.svmCost = svmCost;
-      this.svmGamma = svmGamma;
-    }
-
-    @Override
-    public String toString() {
-      ToStringHelper helper = Objects.toStringHelper(this);
-      helper.add("classifyBothDirections", this.classifyBothDirections);
-      helper.add("probabilityOfKeepingANegativeExample", this.probabilityOfKeepingANegativeExample);
-      helper.add("svmKernel", this.svmKernel);
-      helper.add("svmCost", this.svmCost);
-      helper.add("svmGamma", this.svmGamma);
-      return helper.toString();
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(
-          this.classifyBothDirections,
-          this.probabilityOfKeepingANegativeExample,
-          this.svmKernel,
-          this.svmCost,
-          this.svmGamma);
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (!(obj instanceof ParameterSettings)) {
-        return false;
-      }
-      ParameterSettings that = (ParameterSettings) obj;
-      return this.classifyBothDirections == that.classifyBothDirections
-          && this.probabilityOfKeepingANegativeExample == that.probabilityOfKeepingANegativeExample
-          && this.svmKernel == that.svmKernel && this.svmCost == that.svmCost
-          && this.svmGamma == that.svmGamma;
-    }
-
-  }
-
-  /**
-   * Annotator that removes cTAKES mentions in the system view and copies relations from the gold
-   * view to the system view
+   * Annotator that removes cTAKES mentions in the system view and copies
+   * relations from the gold view to the system view
    */
-  public static class RemoveCTakesMentionsAndCopyGoldRelations extends
-      JCasAnnotator_ImplBase {
+  public static class RemoveCTakesMentionsAndCopyGoldRelations extends JCasAnnotator_ImplBase {
 
     @Override
     public void process(JCas jCas) throws AnalysisEngineProcessException {
       JCas goldView, systemView;
       try {
-        goldView = jCas.getView(GOLD_VIEW_NAME);
+        goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
         systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
       } catch (CASException e) {
         throw new AnalysisEngineProcessException(e);
       }
-      
-      // remove cTAKES EntityMentions and Modifiers from system view
+
+      // remove cTAKES Mentions and Modifiers from system view
       List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
+      cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class));
       cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
       cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
       for (IdentifiedAnnotation cTakesMention : cTakesMentions) {
         cTakesMention.removeFromIndexes();
       }
 
-      // copy gold EntityMentions and Modifiers to the system view
+      // copy gold Mentions and Modifiers to the system view
       List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
+      goldMentions.addAll(JCasUtil.select(goldView, EventMention.class));
       goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
       goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
       CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
@@ -755,40 +561,42 @@ public class RelationExtractorEvaluation
         relation.addToIndexes(systemView);
         for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
           relArg.addToIndexes(systemView);
-          // relArg.getArgument() should have been added to indexes with mentions above
+          // relArg.getArgument() should have been added to indexes with
+          // mentions above
         }
       }
     }
   }
 
   /**
-   * Annotator that removes cTAKES EntityMentions and Modifiers from the system view, and copies
-   * over the manually annotated EntityMentions and Modifiers from the gold view.
-   * 
+   * Annotator that removes cTAKES Mentions and Modifiers from the system view,
+   * and copies over the manually annotated Mentions and Modifiers from the gold
+   * view.
    */
-  public static class ReplaceCTakesMentionsWithGoldMentions extends
-      JCasAnnotator_ImplBase {
+  public static class ReplaceCTakesMentionsWithGoldMentions extends JCasAnnotator_ImplBase {
 
     @Override
     public void process(JCas jCas) throws AnalysisEngineProcessException {
       JCas goldView, systemView;
       try {
-        goldView = jCas.getView(GOLD_VIEW_NAME);
+        goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
         systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
       } catch (CASException e) {
         throw new AnalysisEngineProcessException(e);
       }
 
-      // remove cTAKES EntityMentions and Modifiers from system view
+      // remove cTAKES Mentions and Modifiers from system view
       List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
+      cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class));
       cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
       cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
       for (IdentifiedAnnotation cTakesMention : cTakesMentions) {
         cTakesMention.removeFromIndexes();
       }
 
-      // copy gold EntityMentions and Modifiers to the system view
+      // copy gold Mentions and Modifiers to the system view
       List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
+      goldMentions.addAll(JCasUtil.select(goldView, EventMention.class));
       goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
       goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
       CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
@@ -804,43 +612,22 @@ public class RelationExtractorEvaluation
   static String format(IdentifiedAnnotation a) {
     return a == null ? null : String.format("\"%s\"(type=%d)", a.getCoveredText(), a.getTypeID());
   }
-  
-  public static class RemoveOtherRelations extends JCasAnnotator_ImplBase {
-    
-    public static final String PARAM_RELATION_CATEGORY = "RelationCategory";
-    @ConfigurationParameter(name = PARAM_RELATION_CATEGORY)
-    private String relationCategory;
-    
 
-    @Override
-    public void process(JCas jCas) throws AnalysisEngineProcessException {
-      List<BinaryTextRelation> relations = new ArrayList<BinaryTextRelation>();
-      relations.addAll(JCasUtil.select(jCas, BinaryTextRelation.class));
-      for (BinaryTextRelation relation : relations) {
-        if (!relation.getCategory().equals(this.relationCategory)) {
-          relation.removeFromIndexes();
-        }
-      }
-    }
-  }
-  
-  public static class RemoveSmallerEntityMentions extends JCasAnnotator_ImplBase {
+  public static class RemoveSmallerEventMentions extends JCasAnnotator_ImplBase {
 
     @Override
     public void process(JCas jCas) throws AnalysisEngineProcessException {
-      Collection<EntityMention> mentions = JCasUtil.select(jCas, EntityMention.class);
-      for (EntityMention mention : Lists.newArrayList(mentions)) {
+      Collection<EventMention> mentions = JCasUtil.select(jCas, EventMention.class);
+      for (EventMention mention : Lists.newArrayList(mentions)) {
         int begin = mention.getBegin();
         int end = mention.getEnd();
         int typeID = mention.getTypeID();
-        List<EntityMention> subMentions = JCasUtil.selectCovered(jCas, EntityMention.class, mention);
-        for (EntityMention subMention : subMentions) {
+        List<EventMention> subMentions = JCasUtil.selectCovered(jCas, EventMention.class, mention);
+        for (EventMention subMention : subMentions) {
           if (subMention.getBegin() > begin || subMention.getEnd() < end) {
             if (subMention.getTypeID() == typeID) {
-              String message = String.format(
-                  "removed %s inside %s",
-                  format(subMention),
-                  format(mention));
+              String message =
+                  String.format("removed %s inside %s", format(subMention), format(mention));
               this.getContext().getLogger().log(Level.WARNING, message);
               subMention.removeFromIndexes();
             }
@@ -849,9 +636,10 @@ public class RelationExtractorEvaluation
       }
     }
   }
-  
+
   /**
-   * This class is useful for mapping the spans of relation arguments to the relation's category.
+   * This class is useful for mapping the spans of relation arguments to the
+   * relation's category.
    */
   public static class HashableArguments implements Comparable<HashableArguments> {
 
@@ -883,8 +671,11 @@ public class RelationExtractorEvaluation
       boolean result = false;
       if (otherObject instanceof HashableArguments) {
         HashableArguments other = (HashableArguments) otherObject;
-        result = (this.getClass() == other.getClass() && this.arg1begin == other.arg1begin
-            && this.arg1end == other.arg1end && this.arg2begin == other.arg2begin && this.arg2end == other.arg2end);
+        result =
+            (this.getClass() == other.getClass()
+                && this.arg1begin == other.arg1begin
+                && this.arg1end == other.arg1end
+                && this.arg2begin == other.arg2begin && this.arg2end == other.arg2end);
       }
       return result;
     }
@@ -919,5 +710,6 @@ public class RelationExtractorEvaluation
         return +1; // arbitrary choice for overlapping
       }
     }
+
   }
 }

Added: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/SHARPXMI.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/SHARPXMI.java?rev=1497555&view=auto
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/SHARPXMI.java (added)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/SHARPXMI.java Thu Jun 27 20:14:42 2013
@@ -0,0 +1,327 @@
+package org.apache.ctakes.relationextractor.eval;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import org.apache.ctakes.core.ae.SHARPKnowtatorXMLReader;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.typesystem.type.structured.DocumentID;
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.cas.impl.XmiCasSerializer;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.util.XMLInputSource;
+import org.apache.uima.util.XMLParser;
+import org.apache.uima.util.XMLSerializer;
+import org.cleartk.eval.AnnotationStatistics;
+import org.cleartk.util.ViewURIUtil;
+import org.cleartk.util.ae.UriToDocumentTextAnnotator;
+import org.cleartk.util.cr.UriCollectionReader;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.component.ViewCreatorAnnotator;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.factory.TypeSystemDescriptionFactory;
+import org.uimafit.pipeline.JCasIterable;
+import org.xml.sax.ContentHandler;
+
+import com.google.common.base.Function;
+import com.google.common.base.Functions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Ordering;
+import com.lexicalscope.jewel.cli.Option;
+
+public class SHARPXMI {
+
+  public static List<File> getTrainTextFiles(File batchesDirectory) {
+    // seed_set1: batches 2, 3, 4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 18, 19
+    // seed_set2: batches 1, 2, 3, 4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 18, 19
+    // seed_set3: batches 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 15, 16, 18, 19
+    // seed_set4: batches 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 15, 16, 18, 19
+    return getTextFilesFor(
+        batchesDirectory,
+        Pattern.compile("^(ss[1234]_batch0[2-9]|ss[1234]_batch1[56]"
+            + "|ss[1234]_batch1[89]|ss[123]_batch01"
+            + "|ss[12]_batch1[34]|ss[34]_batch1[12])$"));
+  }
+
+  public static List<File> getDevTextFiles(File batchesDirectory) {
+    // seed_set1: batches 10, 17
+    // seed_set2: batches 10, 17
+    // seed_set3: batches 10, 17
+    // seed_set4: batches 10, 17
+    return getTextFilesFor(batchesDirectory, Pattern.compile("^(ss[1234]_batch1[07])$"));
+  }
+
+  public static List<File> getTestTextFiles(File batchesDirectory) {
+    // seed_set1: batches 11, 12
+    // seed_set2: batches 11, 12
+    // seed_set3: batches 13, 14
+    // seed_set4: batches 13, 14
+    return getTextFilesFor(
+        batchesDirectory,
+        Pattern.compile("^(ss[12]_batch1[12]|ss[34]_batch1[34])$"));
+  }
+
+  public static List<File> getAllTextFiles(File batchesDirectory) {
+    return getTextFilesFor(batchesDirectory, Pattern.compile(""));
+  }
+
+  private static List<File> getTextFilesFor(File batchesDirectory, Pattern pattern) {
+    List<File> files = Lists.newArrayList();
+    for (File batchDir : batchesDirectory.listFiles()) {
+      if (batchDir.isDirectory() && !batchDir.isHidden()) {
+        if (pattern.matcher(batchDir.getName()).find()) {
+          File textDirectory = new File(batchDir, "Knowtator/text");
+          for (File textFile : textDirectory.listFiles()) {
+            if (textFile.isFile() && !textFile.isHidden()) {
+              files.add(textFile);
+            }
+          }
+        }
+      }
+    }
+    return files;
+  }
+
+  public static List<File> toXMIFiles(Options options, List<File> textFiles) {
+    List<File> xmiFiles = Lists.newArrayList();
+    for (File textFile : textFiles) {
+      xmiFiles.add(toXMIFile(options, textFile));
+    }
+    return xmiFiles;
+  }
+
+  private static File toXMIFile(Options options, File textFile) {
+    return new File(options.getXMIDirectory(), textFile.getName() + ".xmi");
+  }
+
+  public static interface Options {
+    @Option(
+        longName = "batches-dir",
+        description = "directory containing ssN_batchNN directories, each of which should contain "
+            + "a Knowtator directory and a Knowtator_XML directory")
+    public File getBatchesDirectory();
+
+    @Option(
+        longName = "xmi-dir",
+        defaultValue = "target/xmi",
+        description = "directory to store and load XMI serialization of annotations")
+    public File getXMIDirectory();
+
+    @Option(
+        longName = "generate-xmi",
+        description = "read in the gold annotations and serialize them as XMI")
+    public boolean getGenerateXMI();
+  }
+
+  public static final String GOLD_VIEW_NAME = "GoldView";
+
+  public static void generateXMI(Options options) throws Exception {
+    // if necessary, write the XMIs first
+    if (options.getGenerateXMI()) {
+      if (!options.getXMIDirectory().exists()) {
+        options.getXMIDirectory().mkdirs();
+      }
+
+      // create a collection reader that loads URIs for all Knowtator text files
+      List<File> files = Lists.newArrayList();
+      files.addAll(getTrainTextFiles(options.getBatchesDirectory()));
+      files.addAll(getDevTextFiles(options.getBatchesDirectory()));
+      files.addAll(getTestTextFiles(options.getBatchesDirectory()));
+      CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(files);
+
+      // load the text from the URI, run the preprocessor, then run the
+      // Knowtator XML reader
+      AggregateBuilder builder = new AggregateBuilder();
+      builder.add(UriToDocumentTextAnnotator.getDescription());
+      File preprocessDescFile = new File("desc/analysis_engine/RelationExtractorPreprocessor.xml");
+      XMLParser parser = UIMAFramework.getXMLParser();
+      XMLInputSource source = new XMLInputSource(preprocessDescFile);
+      builder.add(parser.parseAnalysisEngineDescription(source));
+      builder.add(AnalysisEngineFactory.createPrimitiveDescription(
+          ViewCreatorAnnotator.class,
+          ViewCreatorAnnotator.PARAM_VIEW_NAME,
+          GOLD_VIEW_NAME));
+      builder.add(AnalysisEngineFactory.createPrimitiveDescription(CopyDocumentTextToGoldView.class));
+      builder.add(
+          AnalysisEngineFactory.createPrimitiveDescription(DocumentIDAnnotator.class),
+          CAS.NAME_DEFAULT_SOFA,
+          GOLD_VIEW_NAME);
+      builder.add(
+          AnalysisEngineFactory.createPrimitiveDescription(SHARPKnowtatorXMLReader.class),
+          CAS.NAME_DEFAULT_SOFA,
+          GOLD_VIEW_NAME);
+
+      // write out an XMI for each file
+      for (JCas jCas : new JCasIterable(reader, builder.createAggregate())) {
+        JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+        String documentID = DocumentIDAnnotationUtil.getDocumentID(goldView);
+        if (documentID == null) {
+          throw new IllegalArgumentException("No documentID for CAS:\n" + jCas);
+        }
+        File outFile = toXMIFile(options, new File(documentID));
+        FileOutputStream stream = new FileOutputStream(outFile);
+        ContentHandler handler = new XMLSerializer(stream).getContentHandler();
+        new XmiCasSerializer(jCas.getTypeSystem()).serialize(jCas.getCas(), handler);
+        stream.close();
+      }
+    }
+  }
+
+  public enum EvaluateOn {
+    TRAIN, DEV, TEST
+  }
+
+  public static interface EvaluationOptions extends Options {
+    @Option(
+        longName = "evalute-on",
+        defaultValue = "DEV",
+        description = "perform evaluation using the training (TRAIN), development (DEV) or test "
+            + "(TEST) data.")
+    public EvaluateOn getEvaluteOn();
+
+    @Option(
+        longName = "grid-search",
+        description = "run a grid search to select the best parameters")
+    public boolean getGridSearch();
+  }
+  
+  public static abstract class Evaluation_ImplBase extends org.cleartk.eval.Evaluation_ImplBase<File, AnnotationStatistics<String>> {
+
+    public Evaluation_ImplBase(File baseDirectory) {
+      super(baseDirectory);
+    }
+
+    @Override
+    public CollectionReader getCollectionReader(List<File> items) throws Exception {
+      return CollectionReaderFactory.createCollectionReader(
+          XMIReader.class,
+          TypeSystemDescriptionFactory.createTypeSystemDescription(),
+          XMIReader.PARAM_FILES,
+          items);
+    }
+
+  }
+
+  public static void validate(EvaluationOptions options) throws Exception {
+    // error on invalid option combinations
+    if (options.getEvaluteOn().equals(EvaluateOn.TEST) && options.getGridSearch()) {
+      throw new IllegalArgumentException("grid search can only be run on the train or dev sets");
+    }
+  }
+
+  public static <T extends Evaluation_ImplBase> void evaluate(
+      EvaluationOptions options,
+      ParameterSettings bestSettings,
+      List<ParameterSettings> gridOfSettings,
+      Function<ParameterSettings, T> getEvaluation) throws Exception {
+    // define the set of possible training parameters
+    List<ParameterSettings> possibleParams;
+    if (options.getGridSearch()) {
+      possibleParams = gridOfSettings;
+    } else {
+      possibleParams = Lists.newArrayList(bestSettings);
+    }
+
+    // run an evaluation for each set of parameters
+    Map<ParameterSettings, Double> scoredParams = new HashMap<ParameterSettings, Double>();
+    for (ParameterSettings params : possibleParams) {
+      Evaluation_ImplBase evaluation = getEvaluation.apply(params);
+
+      List<File> trainFiles, devFiles, testFiles;
+      switch (options.getEvaluteOn()) {
+      case TRAIN:
+        // run n-fold cross-validation on the training set
+        trainFiles = getTrainTextFiles(options.getBatchesDirectory());
+        trainFiles = toXMIFiles(options, trainFiles);
+        List<AnnotationStatistics<String>> foldStats = evaluation.crossValidation(trainFiles, 2);
+        params.stats = AnnotationStatistics.addAll(foldStats);
+        break;
+      case DEV:
+        // train on the training set and evaluate on the dev set
+        trainFiles = getTrainTextFiles(options.getBatchesDirectory());
+        trainFiles = toXMIFiles(options, trainFiles);
+        devFiles = getDevTextFiles(options.getBatchesDirectory());
+        devFiles = toXMIFiles(options, devFiles);
+        params.stats = evaluation.trainAndTest(trainFiles, devFiles);
+        break;
+      case TEST:
+        // train on the training set + dev set and evaluate on the test set
+        List<File> allTrainFiles = new ArrayList<File>();
+        allTrainFiles.addAll(getTrainTextFiles(options.getBatchesDirectory()));
+        allTrainFiles.addAll(getDevTextFiles(options.getBatchesDirectory()));
+        allTrainFiles = toXMIFiles(options, allTrainFiles);
+        testFiles = getTestTextFiles(options.getBatchesDirectory());
+        testFiles = toXMIFiles(options, testFiles);
+        params.stats = evaluation.trainAndTest(allTrainFiles, testFiles);
+        break;
+      default:
+        throw new IllegalArgumentException("Invalid EvaluateOn: " + options.getEvaluteOn());
+      }
+      scoredParams.put(params, params.stats.f1());
+    }
+
+    // print parameters sorted by F1
+    List<ParameterSettings> list = new ArrayList<ParameterSettings>(scoredParams.keySet());
+    Function<ParameterSettings, Double> getCount = Functions.forMap(scoredParams);
+    Collections.sort(list, Ordering.natural().onResultOf(getCount));
+
+    // print performance of each set of parameters
+    if (list.size() > 1) {
+      System.err.println("Summary");
+      for (ParameterSettings params : list) {
+        System.err.printf(
+            "F1=%.3f P=%.3f R=%.3f %s\n",
+            params.stats.f1(),
+            params.stats.precision(),
+            params.stats.recall(),
+            params);
+      }
+      System.err.println();
+    }
+
+    // print overall best model
+    if (!list.isEmpty()) {
+      ParameterSettings lastParams = list.get(list.size() - 1);
+      System.err.println("Best model:");
+      System.err.print(lastParams.stats);
+      System.err.println(lastParams);
+      System.err.println(lastParams.stats.confusions());
+      System.err.println();
+    }
+  }
+
+  public static class DocumentIDAnnotator extends JCasAnnotator_ImplBase {
+
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+      String documentID = new File(ViewURIUtil.getURI(jCas)).getPath();
+      DocumentID documentIDAnnotation = new DocumentID(jCas);
+      documentIDAnnotation.setDocumentID(documentID);
+      documentIDAnnotation.addToIndexes();
+    }
+  }
+
+  public static class CopyDocumentTextToGoldView extends JCasAnnotator_ImplBase {
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+      try {
+        JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+        goldView.setDocumentText(jCas.getDocumentText());
+      } catch (CASException e) {
+        throw new AnalysisEngineProcessException(e);
+      }
+    }
+  }
+}

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java?rev=1497555&r1=1497554&r2=1497555&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java Thu Jun 27 20:14:42 2013
@@ -21,66 +21,72 @@ package org.apache.ctakes.relationextrac
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.List;
 
-import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator;
-import org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator;
 import org.apache.ctakes.relationextractor.ae.ModifierExtractorAnnotator;
 import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
 import org.apache.ctakes.relationextractor.eval.ModifierExtractorEvaluation;
+import org.apache.ctakes.relationextractor.eval.ParameterSettings;
 import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation;
-import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.ParameterSettings;
+import org.apache.ctakes.relationextractor.eval.SHARPXMI;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.DegreeOfTextRelation;
+import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.util.XMLInputSource;
 import org.apache.uima.util.XMLParser;
-import org.cleartk.classifier.DataWriter;
 import org.cleartk.classifier.jar.GenericJarClassifierFactory;
-import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
-import org.cleartk.util.Options_ImplBase;
-import org.kohsuke.args4j.Option;
+import org.cleartk.classifier.jar.JarClassifierBuilder;
 import org.uimafit.factory.AggregateBuilder;
 import org.uimafit.factory.AnalysisEngineFactory;
-import org.uimafit.factory.ConfigurationParameterFactory;
 import org.uimafit.factory.TypeSystemDescriptionFactory;
-import org.uimafit.testing.util.HideOutput;
 import org.xml.sax.SAXException;
 
+import com.google.common.collect.ObjectArrays;
+import com.lexicalscope.jewel.cli.CliFactory;
+import com.lexicalscope.jewel.cli.Option;
+
 /**
- * This class produces production models for the RelationExtractor module. Specifically it produces
- * model and descriptor files for the ModifierExtractor, DegreeOfRelationExtractor, and
- * EntityMentionPairRelationExtractor. Additionally it produces an aggregrate descriptor for the
- * entire pipeline from pre-processing to relation extraction.
+ * This class produces production models for the RelationExtractor module.
+ * Specifically it produces model and descriptor files for the
+ * ModifierExtractor, DegreeOfRelationExtractor, and
+ * EntityMentionPairRelationExtractor. Additionally it produces an aggregrate
+ * descriptor for the entire pipeline from pre-processing to relation
+ * extraction.
  * 
  * @author dmitriy dligach
  * 
  */
 public class RelationExtractorTrain {
 
-  public static class Options extends Options_ImplBase {
+  static interface Options extends RelationExtractorEvaluation.Options {
 
     @Option(
-        name = "--output-dir",
-        usage = "the output directory; typically the ctakes-relation-extractor directory")
-    public File outputDirectory = new File(".");
+        longName = "resources-dir",
+        defaultValue = "../ctakes-relation-extractor-res/src/main/resources",
+        description = "the directory where resources (e.g. models) should be written")
+    public File getResourcesDirectory();
 
     @Option(
-        name = "--train-dir",
-        usage = "specify the directory contraining the XMI training files (for example, /NLP/Corpus/Relations/sharp/xmi/all)",
-        required = true)
-    public File trainDirectory;
+        longName = "descriptors-dir",
+        defaultValue = "desc/analysis_engine",
+        description = "the directory where descriptors should be written")
+    public File getDescriptorsDirectory();
   }
 
   public static void main(String[] args) throws Exception {
-    Options options = new Options();
-    options.parseOptions(args);
-    if (!options.outputDirectory.exists()) {
+    final Options options = CliFactory.parseArguments(Options.class, args);
+    if (!options.getResourcesDirectory().exists()) {
+      throw new IllegalArgumentException("directory not found: "
+          + options.getResourcesDirectory().getCanonicalPath());
+    }
+    if (!options.getDescriptorsDirectory().exists()) {
       throw new IllegalArgumentException("directory not found: "
-          + options.outputDirectory.getCanonicalPath());
+          + options.getDescriptorsDirectory().getCanonicalPath());
     }
-    File resourcesDirectory = new File(options.outputDirectory, "src/main/resources");
-    File descriptorsDirectory = new File(options.outputDirectory, "desc/analysis_engine");
+    File resourcesDirectory = options.getResourcesDirectory();
+    File descriptorsDirectory = options.getDescriptorsDirectory();
 
     File preprocessDescFile = new File(descriptorsDirectory, "RelationExtractorPreprocessor.xml");
     if (!preprocessDescFile.exists()) {
@@ -88,60 +94,52 @@ public class RelationExtractorTrain {
           + preprocessDescFile.getCanonicalPath());
     }
 
-    List<File> trainFiles = Arrays.asList(options.trainDirectory.listFiles());
+    List<File> trainFiles = SHARPXMI.getAllTextFiles(options.getBatchesDirectory());
+    trainFiles = SHARPXMI.toXMIFiles(options, trainFiles);
 
     // Initialize model directories
     String modelPathPrefix = "org/apache/ctakes/relationextractor/models/";
     String modifierModelPath = modelPathPrefix + "modifier_extractor";
     String degreeOfModelPath = modelPathPrefix + "degree_of";
-    String locationOfModelPath = modelPathPrefix + "em_pair";
+    String locationOfModelPath = modelPathPrefix + "location_of";
 
     // create the modifier extractor
-    System.out.println("training modifier extractor");
+    System.err.println("Training modifier extractor");
     File modifierTrainDirectory = new File(resourcesDirectory, modifierModelPath);
-    ModifierExtractorEvaluation evaluation = new ModifierExtractorEvaluation(
-        modifierTrainDirectory,
-        "-t",
-        "0", // svm kernel index
-        "-c",
-        "1000" // svm cost
-    );
-    HideOutput hider = new HideOutput();
+    ModifierExtractorEvaluation evaluation =
+        new ModifierExtractorEvaluation(
+            modifierTrainDirectory,
+            ModifierExtractorEvaluation.BEST_PARAMETERS);
     evaluation.train(evaluation.getCollectionReader(trainFiles), modifierTrainDirectory);
-    hider.restoreOutput();
-    hider.close(); // workaround for https://code.google.com/p/uimafit/issues/detail?id=129
-    AnalysisEngineDescription modifierExtractorDesc = AnalysisEngineFactory.createPrimitiveDescription(
-        ModifierExtractorAnnotator.class,
-        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
-        "/" + modifierModelPath + "/model.jar");
+    AnalysisEngineDescription modifierExtractorDesc =
+        AnalysisEngineFactory.createPrimitiveDescription(
+            ModifierExtractorAnnotator.class,
+            GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+            "/" + modifierModelPath + "/model.jar");
     writeDesc(descriptorsDirectory, ModifierExtractorAnnotator.class, modifierExtractorDesc);
 
     // create the degree_of extractor
-    System.out.println("training degree_of extractor");
-    AnalysisEngineDescription degreeOfRelationExtractorDesc = trainRelationExtractor(
-        resourcesDirectory,
-        degreeOfModelPath,
-        trainFiles,
-        "degree_of",
-        DegreeOfRelationExtractorAnnotator.class,
-        LIBSVMStringOutcomeDataWriter.class,
-        RelationExtractorEvaluation.BEST_DEGREE_OF_PARAMETERS,
-        descriptorsDirectory);
+    System.err.println("Training degree_of extractor");
+    AnalysisEngineDescription degreeOfRelationExtractorDesc =
+        trainRelationExtractor(
+            resourcesDirectory,
+            degreeOfModelPath,
+            trainFiles,
+            DegreeOfTextRelation.class,
+            descriptorsDirectory);
 
     // create the location_of extractor
-    System.out.println("training location_of extractor");
-    AnalysisEngineDescription locationOfRelationExtractorDesc = trainRelationExtractor(
-        resourcesDirectory,
-        locationOfModelPath,
-        trainFiles,
-        "location_of",
-        EntityMentionPairRelationExtractorAnnotator.class,
-        LIBSVMStringOutcomeDataWriter.class,
-        RelationExtractorEvaluation.BEST_NON_DEGREE_OF_PARAMETERS,
-        descriptorsDirectory);
+    System.err.println("Training location_of extractor");
+    AnalysisEngineDescription locationOfRelationExtractorDesc =
+        trainRelationExtractor(
+            resourcesDirectory,
+            locationOfModelPath,
+            trainFiles,
+            LocationOfTextRelation.class,
+            descriptorsDirectory);
 
     // create the aggregate
-    System.out.println("assembling relation extraction aggregate");
+    System.err.println("Assembling relation extraction aggregate");
     AggregateBuilder builder = new AggregateBuilder();
     XMLParser parser = UIMAFramework.getXMLParser();
     XMLInputSource source = new XMLInputSource(preprocessDescFile);
@@ -154,9 +152,10 @@ public class RelationExtractorTrain {
 
     // cleanup unnecessary model files
     for (File modelDir : new File(resourcesDirectory, modelPathPrefix).listFiles()) {
-      for (File modelFile : modelDir.listFiles()) {
-        if (!modelFile.getName().equals("model.jar")) {
-          modelFile.delete();
+      File modelFile = JarClassifierBuilder.getModelJarFile(modelDir);
+      for (File file : modelDir.listFiles()) {
+        if (!file.equals(modelFile)) {
+          file.delete();
         }
       }
     }
@@ -166,56 +165,28 @@ public class RelationExtractorTrain {
       File resourcesDirectory,
       String modelPath,
       List<File> trainFiles,
-      String relationCategory,
-      Class<? extends RelationExtractorAnnotator> annotatorClass,
-      Class<? extends DataWriter<String>> dataWriterClass,
-      ParameterSettings params,
+      Class<? extends BinaryTextRelation> relationClass,
       File descriptorsDirectory) throws Exception {
 
-    // define additional configuration parameters for the annotator
-    Object[] additionalParameters = new Object[] {
-        RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
-        params.probabilityOfKeepingANegativeExample,
-        EntityMentionPairRelationExtractorAnnotator.PARAM_CLASSIFY_BOTH_DIRECTIONS,
-        params.classifyBothDirections };
-
-    // define arguments to be passed to the classifier
-    String[] trainingArguments = new String[] {
-        "-t",
-        String.valueOf(params.svmKernelIndex),
-        "-c",
-        String.valueOf(params.svmCost),
-        "-g",
-        String.valueOf(params.svmGamma) };
+    // get the annotator class and best parameters for this relation
+    Class<? extends RelationExtractorAnnotator> annotatorClass =
+        RelationExtractorEvaluation.ANNOTATOR_CLASSES.get(relationClass);
+    ParameterSettings params = RelationExtractorEvaluation.BEST_PARAMETERS.get(relationClass);
 
+    // train the relation extractor
     File trainDirectory = new File(resourcesDirectory, modelPath);
-    RelationExtractorEvaluation evaluation = new RelationExtractorEvaluation(
-        trainDirectory,
-        relationCategory,
-        annotatorClass,
-        dataWriterClass,
-        additionalParameters,
-        trainingArguments,
-        false,
-        false,
-        false,
-        false);
-
+    RelationExtractorEvaluation evaluation =
+        new RelationExtractorEvaluation(trainDirectory, relationClass, annotatorClass, params);
     evaluation.train(evaluation.getCollectionReader(trainFiles), trainDirectory);
 
     // create the description
-    AnalysisEngineDescription relationExtractorDescription = AnalysisEngineFactory.createPrimitiveDescription(
-        annotatorClass,
-        RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
-        params.probabilityOfKeepingANegativeExample,
-        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
-        "/" + modelPath + "/model.jar");
-    if (annotatorClass == EntityMentionPairRelationExtractorAnnotator.class) {
-      ConfigurationParameterFactory.addConfigurationParameters(
-          relationExtractorDescription,
-          EntityMentionPairRelationExtractorAnnotator.PARAM_CLASSIFY_BOTH_DIRECTIONS,
-          params.classifyBothDirections);
-    }
+    Object[] pathParameters =
+        new Object[] { GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+            "/" + modelPath + "/model.jar" };
+    AnalysisEngineDescription relationExtractorDescription =
+        AnalysisEngineFactory.createPrimitiveDescription(
+            annotatorClass,
+            ObjectArrays.concat(params.configurationParameters, pathParameters, Object.class));
 
     // write the description
     writeDesc(descriptorsDirectory, annotatorClass, relationExtractorDescription);
@@ -228,18 +199,20 @@ public class RelationExtractorTrain {
       File descDir,
       Class<?> annotatorClass,
       AnalysisEngineDescription desc) throws SAXException, IOException {
-    // set the type system (uimaFIT expands all imports, so this simplifies the descriptor)
-    desc.getAnalysisEngineMetaData().setTypeSystem(TypeSystemDescriptionFactory.createTypeSystemDescription(
-        "org.apache.ctakes.typesystem.types.TypeSystem"));
+    // set the type system (uimaFIT expands all imports, so this simplifies the
+    // descriptor)
+    desc.getAnalysisEngineMetaData().setTypeSystem(
+        TypeSystemDescriptionFactory.createTypeSystemDescription("org.apache.ctakes.typesystem.types.TypeSystem"));
     writeDesc(descDir, annotatorClass.getSimpleName(), desc);
   }
 
   private static void writeDesc(File descDir, String name, AnalysisEngineDescription desc)
-      throws SAXException, IOException {
+      throws SAXException,
+      IOException {
     // set the name (not done by uimaFIT)
     desc.getMetaData().setName(name);
     File descFile = new File(descDir, name + ".xml");
-    System.out.println("writing description to " + descFile);
+    System.err.println("Writing description to " + descFile);
     FileOutputStream output = new FileOutputStream(descFile);
     desc.toXML(output);
     output.close();

Modified: ctakes/trunk/ctakes-relation-extractor/src/test/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotatorsTest.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/test/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotatorsTest.java?rev=1497555&r1=1497554&r2=1497555&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/test/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotatorsTest.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/test/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotatorsTest.java Thu Jun 27 20:14:42 2013
@@ -18,7 +18,8 @@
  */
 package org.apache.ctakes.relationextractor.ae;
 
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 import java.io.File;
 import java.util.Collection;
@@ -26,8 +27,11 @@ import java.util.Iterator;
 
 import org.apache.ctakes.typesystem.type.constants.CONST;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.DegreeOfTextRelation;
+import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
+import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
 import org.apache.ctakes.typesystem.type.textsem.Modifier;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.uima.UIMAFramework;
@@ -50,27 +54,24 @@ public class RelationExtractorAnnotators
     AggregateBuilder builder = new AggregateBuilder();
     builder.add(findDescription(ModifierExtractorAnnotator.class));
     builder.add(findDescription(DegreeOfRelationExtractorAnnotator.class));
-    builder.add(findDescription(EntityMentionPairRelationExtractorAnnotator.class));
+    builder.add(findDescription(LocationOfRelationExtractorAnnotator.class));
     AnalysisEngine engine = builder.createAggregate();
     JCas jCas = engine.newJCas();
 
     // populate the CAS with an example sentence
     // TODO: add annotations to support phrase chunk and dependency features
-    TokenBuilder<BaseToken, Sentence> tokenBuilder = new TokenBuilder<BaseToken, Sentence>(
-        BaseToken.class,
-        Sentence.class,
-        "partOfSpeech",
-        null);
+    TokenBuilder<BaseToken, Sentence> tokenBuilder =
+        new TokenBuilder<BaseToken, Sentence>(BaseToken.class, Sentence.class, "partOfSpeech", null);
     tokenBuilder.buildTokens(
         jCas,
         "He had a slight fracture in the proximal right fibula.",
         "He had a slight fracture in the proximal right fibula .",
         "PRP VBD DT JJ NN IN DT JJ JJ NN .");
-    EntityMention fracture = new EntityMention(jCas, 16, 24);
+    DiseaseDisorderMention fracture = new DiseaseDisorderMention(jCas, 16, 24);
     fracture.setTypeID(CONST.NE_TYPE_ID_DISORDER);
     fracture.addToIndexes();
     assertEquals("fracture", fracture.getCoveredText());
-    EntityMention fibula = new EntityMention(jCas, 32, 53);
+    AnatomicalSiteMention fibula = new AnatomicalSiteMention(jCas, 32, 53);
     fibula.setTypeID(CONST.NE_TYPE_ID_ANATOMICAL_SITE);
     fibula.addToIndexes();
     assertEquals("proximal right fibula", fibula.getCoveredText());
@@ -80,27 +81,39 @@ public class RelationExtractorAnnotators
 
     // test the modifier annotator
     Collection<Modifier> modifiers = JCasUtil.select(jCas, Modifier.class);
-    assertEquals(1, modifiers.size());
-    Modifier slight = modifiers.iterator().next();
+    assertEquals(3, modifiers.size());
+    Iterator<Modifier> modifierIterator = modifiers.iterator();
+    Modifier slight = modifierIterator.next();
     assertEquals("slight", slight.getCoveredText());
+    assertEquals(CONST.MODIFIER_TYPE_ID_SEVERITY_CLASS, slight.getTypeID());
+    //assertTrue(slight instanceof SeverityModifier);
+    Modifier proximal = modifierIterator.next();
+    assertEquals("proximal", proximal.getCoveredText());
+    assertEquals(CONST.MODIFIER_TYPE_ID_UNKNOWN, proximal.getTypeID());
+    //assertTrue(proximal instanceof BodyLateralityModifier);
+    Modifier right = modifierIterator.next();
+    assertEquals("right", right.getCoveredText());
+    assertEquals(CONST.MODIFIER_TYPE_ID_UNKNOWN, right.getTypeID());
+    //assertTrue(right instanceof BodySideModifier);
 
     // test the relation annotators
     Collection<BinaryTextRelation> relations = JCasUtil.select(jCas, BinaryTextRelation.class);
     assertEquals(2, relations.size());
     Iterator<BinaryTextRelation> iterator = relations.iterator();
     BinaryTextRelation slightFracture = iterator.next();
+    assertTrue(slightFracture instanceof DegreeOfTextRelation);
     assertEquals("degree_of", slightFracture.getCategory());
     assertEquals(fracture, slightFracture.getArg1().getArgument());
     assertEquals(slight, slightFracture.getArg2().getArgument());
     BinaryTextRelation fractureFibula = iterator.next();
     assertEquals("location_of", fractureFibula.getCategory());
-    // TODO: this seems backwards, but maybe that's how it's supposed to be?
-    assertEquals(fibula, fractureFibula.getArg1().getArgument());
-    assertEquals(fracture, fractureFibula.getArg2().getArgument());
+    assertTrue(fractureFibula instanceof LocationOfTextRelation);
+    assertEquals(fracture, fractureFibula.getArg1().getArgument());
+    assertEquals(fibula, fractureFibula.getArg2().getArgument());
   }
 
-  private static AnalysisEngineDescription findDescription(Class<? extends JCasAnnotator_ImplBase> cls)
-      throws Exception {
+  private static AnalysisEngineDescription findDescription(
+      Class<? extends JCasAnnotator_ImplBase> cls) throws Exception {
     File directory = new File("desc/analysis_engine");
     File file = new File(directory, cls.getSimpleName() + ".xml");
     XMLParser parser = UIMAFramework.getXMLParser();