You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2021/09/22 19:00:06 UTC

svn commit: r1893521 - in /ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor: eval/ metastasis/ pipelines/

Author: tmill
Date: Wed Sep 22 19:00:05 2021
New Revision: 1893521

URL: http://svn.apache.org/viewvc?rev=1893521&view=rev
Log:
Major refactor of relation evaluation code to generalize the idea of different corpora, add reader for DeepPhe, and make it easier to evaluate cross-domain.

Added:
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/CorpusXMI.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/DeepPheXMI.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationEvaluation_ImplBase.java
Modified:
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ModifierExtractorEvaluation.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/SHARPXMI.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java

Added: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/CorpusXMI.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/CorpusXMI.java?rev=1893521&view=auto
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/CorpusXMI.java (added)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/CorpusXMI.java Wed Sep 22 19:00:05 2021
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.relationextractor.eval;
+
+
+import com.google.common.collect.Lists;
+import org.apache.ctakes.typesystem.type.structured.DocumentID;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.util.ViewUriUtil;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Created by tmill on 1/31/17.
+ */
+public abstract class CorpusXMI {
+    public enum Corpus {SHARP, SHARP_RELEASE, DeepPhe}
+
+    public enum EvaluateOn {
+        TRAIN, DEV, TEST, OTHER
+    }
+
+    public static final String GOLD_VIEW_NAME = "GoldView";
+
+    public static void validate(RelationEvaluation_ImplBase.EvaluationOptions options) throws Exception {
+        // error on invalid option combinations
+        if (options.getEvaluateOn().equals(EvaluateOn.TEST) && options.getGridSearch()) {
+            throw new IllegalArgumentException("grid search can only be run on the train or dev sets");
+        }
+    }
+
+    public static List<File> getTrainTextFiles(Corpus trainCorpus, EvaluateOn split, File corpusDirectory) {
+        List<File> trainFiles = null;
+        new ArrayList<>();
+
+        // No matter what, the training files will contain the training data from the training corpus. May need to
+        // add dev later.
+        if (trainCorpus == Corpus.SHARP) {
+            trainFiles = SHARPXMI.getTrainTextFiles(corpusDirectory);
+        } else if (trainCorpus == Corpus.SHARP_RELEASE) {
+            trainFiles = SHARPXMI.getTrainTextFilesFromCorpus(corpusDirectory);
+        } else if (trainCorpus == Corpus.DeepPhe) {
+            trainFiles = DeepPheXMI.getTrainTextFiles(corpusDirectory);
+        } else {
+            throw new RuntimeException("Unrecognized train corpus option: " + trainCorpus);
+        }
+
+        if (split == EvaluateOn.TEST) {
+            // if we are testing on an actual test set then we first need to add the dev set notes to the training
+            // set.
+            if (trainCorpus == Corpus.SHARP) {
+                trainFiles.addAll(SHARPXMI.getDevTextFiles(corpusDirectory));
+            } else if (trainCorpus == Corpus.SHARP_RELEASE) {
+                trainFiles.addAll(SHARPXMI.getTrainTextFilesFromCorpus(corpusDirectory));
+            } else if (trainCorpus == Corpus.DeepPhe) {
+                trainFiles.addAll(DeepPheXMI.getTrainTextFiles(corpusDirectory));
+            } else {
+                throw new RuntimeException("Unrecognized train corpus option: " + trainCorpus);
+            }
+
+        }
+        return trainFiles;
+    }
+
+    public static List<File> getTestTextFiles(Corpus testCorpus, EvaluateOn split, File corpusDirectory) {
+        List<File> testFiles = null;
+
+        if (split == CorpusXMI.EvaluateOn.TRAIN) {
+            if (testCorpus == CorpusXMI.Corpus.SHARP) {
+                testFiles = SHARPXMI.getTrainTextFiles(corpusDirectory);
+            } else if (testCorpus == CorpusXMI.Corpus.SHARP_RELEASE) {
+                testFiles = SHARPXMI.getTrainTextFilesFromCorpus(corpusDirectory);
+            } else if (testCorpus == CorpusXMI.Corpus.DeepPhe) {
+                testFiles = DeepPheXMI.getTrainTextFiles(corpusDirectory);
+            }
+        } else if (split == CorpusXMI.EvaluateOn.DEV) {
+            if (testCorpus == CorpusXMI.Corpus.SHARP) {
+                testFiles = SHARPXMI.getDevTextFiles(corpusDirectory);
+            } else if (testCorpus == Corpus.SHARP_RELEASE) {
+                testFiles = SHARPXMI.getDevTextFilesFromCorpus(corpusDirectory);
+            } else if (testCorpus == CorpusXMI.Corpus.DeepPhe) {
+                testFiles = DeepPheXMI.getDevTextFiles(corpusDirectory);
+            }
+        } else if (split == CorpusXMI.EvaluateOn.TEST) {
+            // find the test set files:
+            if (testCorpus == CorpusXMI.Corpus.SHARP) {
+                testFiles = SHARPXMI.getTestTextFiles(corpusDirectory);
+            } else if (testCorpus == Corpus.SHARP_RELEASE) {
+                testFiles = SHARPXMI.getTestTextFilesFromCorpus(corpusDirectory);
+            } else if (testCorpus == CorpusXMI.Corpus.DeepPhe) {
+                testFiles = DeepPheXMI.getTestTextFiles(corpusDirectory);
+            }
+        }
+        return testFiles;
+    }
+
+    public static List<File> toXMIFiles( File xmiDirectory, List<File> textFiles ) {
+        List<File> xmiFiles = Lists.newArrayList();
+        for ( File textFile : textFiles ) {
+            xmiFiles.add( toXMIFile( xmiDirectory, textFile ) );
+        }
+        return xmiFiles;
+    }
+
+    protected static File toXMIFile( File xmiDirectory, File textFile ) {
+        return new File( xmiDirectory, textFile.getName() + ".xmi" );
+    }
+
+    public static class DocumentIDAnnotator extends JCasAnnotator_ImplBase {
+
+        @Override
+        public void process( JCas jCas ) throws AnalysisEngineProcessException {
+            String documentID = new File( ViewUriUtil.getURI( jCas ) ).getPath();
+            DocumentID documentIDAnnotation = new DocumentID( jCas );
+            documentIDAnnotation.setDocumentID( documentID );
+            documentIDAnnotation.addToIndexes();
+        }
+    }
+
+    public static class CopyDocumentTextToGoldView extends JCasAnnotator_ImplBase {
+        @Override
+        public void process( JCas jCas ) throws AnalysisEngineProcessException {
+            try {
+                JCas goldView = jCas.getView( GOLD_VIEW_NAME );
+                goldView.setDocumentText( jCas.getDocumentText() );
+            } catch ( CASException e ) {
+                throw new AnalysisEngineProcessException( e );
+            }
+        }
+    }
+}
+

Added: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/DeepPheXMI.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/DeepPheXMI.java?rev=1893521&view=auto
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/DeepPheXMI.java (added)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/DeepPheXMI.java Wed Sep 22 19:00:05 2021
@@ -0,0 +1,391 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.relationextractor.eval;
+
+import com.google.common.collect.Sets;
+import org.apache.ctakes.chunker.ae.Chunker;
+import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
+import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
+import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
+import org.apache.ctakes.core.ae.SentenceDetectorAnnotatorBIO;
+import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
+import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
+import org.apache.ctakes.core.util.doc.DocIdUtil;
+import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
+import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
+import org.apache.ctakes.lvg.ae.LvgAnnotator;
+import org.apache.ctakes.postagger.POSTagger;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.textsem.*;
+import org.apache.log4j.Logger;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.impl.XmiCasSerializer;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.component.ViewCreatorAnnotator;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.factory.AggregateBuilder;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.pipeline.JCasIterator;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.XMLSerializer;
+import org.cleartk.util.ViewUriUtil;
+import org.cleartk.util.ae.UriToDocumentTextAnnotator;
+import org.cleartk.util.cr.UriCollectionReader;
+import org.jdom2.Element;
+import org.jdom2.JDOMException;
+import org.jdom2.input.SAXBuilder;
+import org.xml.sax.ContentHandler;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Created by tmill on 1/24/17.
+ */
+public class DeepPheXMI extends CorpusXMI {
+
+    private static Pattern dirPatt = Pattern.compile("patient(\\d+)_report(\\d+)_(.*)");
+    private static Matcher matcher = null;
+
+    // These are the splits for the breast cancer patient set.
+    // See here: https://healthnlp.hms.harvard.edu/cancer/wiki/index.php/Main_Page#DeepPhe_Gold_Set
+    public final static Set<Integer> trainPatients = Sets.newHashSet(3, 11, 92, 93);
+    public final static Set<Integer> devPatients = Sets.newHashSet(2, 21);
+    public final static Set<Integer> testPatients = Sets.newHashSet(1, 16);
+
+    // TODO - much of this can be encapsulated in the parent class and just pass it the description for the corpus reader.
+    public static void generateXMI(File xmiDirectory, File anaforaInputDirectory) throws Exception {
+        // if necessary, write the XMIs first
+        if ( !xmiDirectory.exists() ) {
+            xmiDirectory.mkdirs();
+        }
+
+        List<File> files = new ArrayList<>();
+        files.addAll(getTrainTextFiles(anaforaInputDirectory));
+        files.addAll(getDevTextFiles(anaforaInputDirectory));
+        files.addAll(getTestTextFiles(anaforaInputDirectory));
+
+        CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(files);
+        AggregateBuilder builder = new AggregateBuilder();
+        builder.add( UriToDocumentTextAnnotator.getDescription() );
+
+        builder.add( getDeepPhePreprocessingPipeline() );
+        builder.add( AnalysisEngineFactory.createEngineDescription(
+                ViewCreatorAnnotator.class,
+                ViewCreatorAnnotator.PARAM_VIEW_NAME,
+                GOLD_VIEW_NAME ) );
+        builder.add( AnalysisEngineFactory.createEngineDescription( CopyDocumentTextToGoldView.class ) );
+        builder.add(
+                AnalysisEngineFactory.createEngineDescription( DocumentIDAnnotator.class ),
+                CAS.NAME_DEFAULT_SOFA,
+                GOLD_VIEW_NAME );
+        builder.add(
+                AnalysisEngineFactory.createEngineDescription( DeepPheAnaforaXMLReader.getDescription(anaforaInputDirectory) ),
+                CAS.NAME_DEFAULT_SOFA,
+                GOLD_VIEW_NAME );
+
+        // write out an XMI for each file
+        for (Iterator<JCas> casIter = new JCasIterator( reader, builder.createAggregate() ); casIter.hasNext(); ) {
+            JCas jCas = casIter.next();
+            JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+            String documentID = DocIdUtil.getDocumentID(goldView);
+            if (documentID == null) {//|| documentID.equals( DocumentIDAnnotationUtil.NO_DOCUMENT_ID ) ) {
+                throw new IllegalArgumentException("No documentID for CAS:\n" + jCas);
+            }
+            File outFile = toXMIFile(xmiDirectory, new File(documentID));
+            FileOutputStream stream = new FileOutputStream(outFile);
+            ContentHandler handler = new XMLSerializer(stream).getContentHandler();
+            new XmiCasSerializer(jCas.getTypeSystem()).serialize(jCas.getCas(), handler);
+            stream.close();
+        }
+    }
+
+    public static List<File> getTrainTextFiles(File anaforaDirectory) {
+        return getSetTextFiles(anaforaDirectory, trainPatients);
+    }
+
+    public static List<File> getDevTextFiles(File anaforaDirectory){
+        return getSetTextFiles(anaforaDirectory, devPatients);
+    }
+
+    public static List<File> getTestTextFiles(File anaforaDirectory){
+        return getSetTextFiles(anaforaDirectory, testPatients);
+    }
+
+    private static List<File> getSetTextFiles(File anaforaDirectory, Set<Integer> setToUse){
+        List<File> files = new ArrayList<>();
+
+        for(File file : anaforaDirectory.listFiles()){
+            if(file.isDirectory()){
+                // Anafora files are organized into directories per annotation file.
+                matcher = dirPatt.matcher(file.getName());
+                if(matcher.matches()){
+                    int patientId = Integer.parseInt(matcher.group(1));
+                    if(setToUse.contains(patientId)){
+                        // The text file just replicates the last level of the directory path:
+                        files.add(new File(file, file.getName()));
+                    }
+                }
+            }
+        }
+        return files;
+
+    }
+
+    private static AnalysisEngineDescription getDeepPhePreprocessingPipeline() throws ResourceInitializationException, MalformedURLException {
+        AggregateBuilder builder = new AggregateBuilder();
+        builder.add(SimpleSegmentAnnotator.createAnnotatorDescription());
+        builder.add(SentenceDetectorAnnotatorBIO.getDescription());
+        builder.add(TokenizerAnnotatorPTB.createAnnotatorDescription());
+        builder.add(LvgAnnotator.createAnnotatorDescription());
+        builder.add(ContextDependentTokenizerAnnotator.createAnnotatorDescription());
+        builder.add(POSTagger.createAnnotatorDescription());
+        builder.add(ConstituencyParser.createAnnotatorDescription());
+        builder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());
+        builder.add(Chunker.createAnnotatorDescription());
+        builder.add(ChunkAdjuster.createAnnotatorDescription(new String[]{"NP", "NP"}, 1));
+        builder.add(ChunkAdjuster.createAnnotatorDescription(new String[]{"NP", "PP", "NP"}, 2));
+        builder.add(DefaultJCasTermAnnotator.createAnnotatorDescription());
+
+        return builder.createAggregateDescription();
+    }
+
+
+
+    /**
+     * Created by tmill on 2/7/17.
+     */
+    public static class DeepPheAnaforaXMLReader extends JCasAnnotator_ImplBase {
+        private static Logger LOGGER = Logger.getLogger(DeepPheAnaforaXMLReader.class);
+
+        public static final String PARAM_ANAFORA_DIRECTORY = "anaforaDirectory";
+
+        @ConfigurationParameter(
+                name = PARAM_ANAFORA_DIRECTORY,
+                description = "root directory of the Anafora-annotated files, with one subdirectory for "
+                        + "each annotated file")
+        private File anaforaDirectory;
+
+        public static final String PARAM_ANAFORA_XML_SUFFIXES = "anaforaSuffixes";
+        @ConfigurationParameter(
+                name = PARAM_ANAFORA_XML_SUFFIXES,
+                mandatory = false,
+                description = "list of suffixes that might be added to a file name to identify the Anafora "
+                        + "XML annotations file; only the first suffix corresponding to a file will be used")
+        private String[] anaforaXMLSuffixes = new String[]{".UmlsDeepPhe.dave.completed.xml"};
+
+        public static AnalysisEngineDescription getDescription() throws ResourceInitializationException {
+            return AnalysisEngineFactory.createEngineDescription(DeepPheAnaforaXMLReader.class);
+        }
+
+        public static AnalysisEngineDescription getDescription(File anaforaDirectory)
+                throws ResourceInitializationException {
+            return AnalysisEngineFactory.createEngineDescription(
+                    DeepPheAnaforaXMLReader.class,
+                    DeepPheAnaforaXMLReader.PARAM_ANAFORA_DIRECTORY,
+                    anaforaDirectory);
+        }
+
+        @Override
+        public void process(JCas jCas) throws AnalysisEngineProcessException {
+            File textFile = new File(ViewUriUtil.getURI(jCas));
+            LOGGER.info("processing " + textFile);
+
+            List<File> possibleXMLFiles = new ArrayList<>();
+            for (String anaforaXMLSuffix : this.anaforaXMLSuffixes) {
+                if (this.anaforaDirectory == null) {
+                    possibleXMLFiles.add(new File(textFile + anaforaXMLSuffix));
+                } else {
+                    possibleXMLFiles.add(new File(textFile.getPath() + anaforaXMLSuffix));
+                }
+            }
+
+            // find an Anafora XML file that actually exists
+            File xmlFile = null;
+            for (File possibleXMLFile : possibleXMLFiles) {
+                if (possibleXMLFile.exists()) {
+                    xmlFile = possibleXMLFile;
+                    break;
+                }
+            }
+            if (this.anaforaXMLSuffixes.length > 0 && xmlFile == null) {
+                throw new IllegalArgumentException("no Anafora XML file found from " + possibleXMLFiles);
+            }
+
+            if (xmlFile != null) {
+                processXmlFile(jCas, xmlFile);
+            }
+
+        }
+
+        private static void processXmlFile(JCas jCas, File xmlFile) throws AnalysisEngineProcessException {
+            // load the XML
+            Element dataElem;
+            try {
+                dataElem = new SAXBuilder().build(xmlFile.toURI().toURL()).getRootElement();
+            } catch (MalformedURLException e) {
+                throw new AnalysisEngineProcessException(e);
+            } catch (JDOMException e) {
+                throw new AnalysisEngineProcessException(e);
+            } catch (IOException e) {
+                throw new AnalysisEngineProcessException(e);
+            }
+
+            List<String[]> delayedLocationRelations = new ArrayList<>();
+            int docLen = jCas.getDocumentText().length();
+
+            for (Element annotationsElem : dataElem.getChildren("annotations")) {
+
+                Map<String, Annotation> idToAnnotation = new HashMap<>();
+                for (Element entityElem : annotationsElem.getChildren("entity")) {
+                    String id = removeSingleChildText(entityElem, "id", null);
+                    Element spanElem = removeSingleChild(entityElem, "span", id);
+                    String type = removeSingleChildText(entityElem, "type", id);
+                    Element propertiesElem = removeSingleChild(entityElem, "properties", id);
+
+                    // UIMA doesn't support disjoint spans, so take the span enclosing
+                    // everything
+                    int begin = Integer.MAX_VALUE;
+                    int end = Integer.MIN_VALUE;
+                    for (String spanString : spanElem.getText().split(";")) {
+                        String[] beginEndStrings = spanString.split(",");
+                        if (beginEndStrings.length != 2) {
+                            error("span not of the format 'number,number'", id);
+                        }
+                        int spanBegin = Integer.parseInt(beginEndStrings[0]);
+                        int spanEnd = Integer.parseInt(beginEndStrings[1]);
+                        if (spanBegin < begin) {
+                            begin = spanBegin;
+                        }
+                        if (spanEnd > end) {
+                            end = spanEnd;
+                        }
+                    }
+                    if (begin < 0 || end >= docLen) {
+                        error("Illegal begin or end boundary", id);
+                        continue;
+                    }
+
+                    Annotation annotation = null;
+                    if (type.equals("Disease_Disorder")) {
+                        DiseaseDisorderMention dd = new DiseaseDisorderMention(jCas, begin, end);
+
+                        String bodyLocation = removeSingleChildText(propertiesElem, "body_location", id);
+                        if (bodyLocation != null && !bodyLocation.equals("")) {
+                            delayedLocationRelations.add(new String[]{id, bodyLocation});
+                        }
+                        annotation = dd;
+                    } else if (type.equals("Procedure")) {
+                        ProcedureMention proc = new ProcedureMention(jCas, begin, end);
+                        String bodyLocation = removeSingleChildText(propertiesElem, "body_location", id);
+                        if (bodyLocation != null && !bodyLocation.equals("")) {
+                            delayedLocationRelations.add(new String[]{id, bodyLocation});
+                        }
+                        annotation = proc;
+                    } else if (type.equals("Sign_symptom")) {
+                        SignSymptomMention ss = new SignSymptomMention(jCas, begin, end);
+                        String bodyLocation = removeSingleChildText(propertiesElem, "body_location", id);
+                        if (bodyLocation != null && !bodyLocation.equals("")) {
+                            delayedLocationRelations.add(new String[]{id, bodyLocation});
+                        }
+                        annotation = ss;
+                    } else if (type.equals("Metastasis")) {
+                        EventMention meta = new EventMention(jCas, begin, end);
+                        String bodyLocation = removeSingleChildText(propertiesElem, "body_location", id);
+                        if (bodyLocation != null && !bodyLocation.equals("")) {
+                            delayedLocationRelations.add(new String[]{id, bodyLocation});
+                        }
+                        annotation = meta;
+                    } else if (type.equals("Anatomical_site")) {
+                        AnatomicalSiteMention as = new AnatomicalSiteMention(jCas, begin, end);
+                        String code = removeSingleChildText(propertiesElem, "associatedCode", id);
+                        extractAttributeValues(propertiesElem, as, id);
+                        annotation = as;
+                    } else {
+                        LOGGER.info("This entity type is not being extracted yet!");
+                    }
+
+                    // match the annotation to it's ID for later use
+                    if (annotation != null) {
+                        annotation.addToIndexes();
+                        idToAnnotation.put(id, annotation);
+                    }
+                }
+
+                for (String[] args : delayedLocationRelations) {
+                    LocationOfTextRelation rel = new LocationOfTextRelation(jCas);
+                    rel.setCategory("location_of");
+                    RelationArgument arg1 = new RelationArgument(jCas);
+                    arg1.setArgument(idToAnnotation.get(args[0]));
+                    rel.setArg1(arg1);
+                    RelationArgument arg2 = new RelationArgument(jCas);
+                    arg2.setArgument(idToAnnotation.get(args[1]));
+                    rel.setArg2(arg2);
+                    rel.setDiscoveryTechnique(CONST.REL_DISCOVERY_TECH_GOLD_ANNOTATION);
+                    rel.addToIndexes();
+                }
+            }
+        }
+
+        private static void extractAttributeValues(Element propertiesElem, IdentifiedAnnotation annotation, String id) {
+
+        }
+
+        private static Element getSingleChild(Element elem, String elemName, String causeID) {
+            List<Element> children = elem.getChildren(elemName);
+            if (children.size() != 1) {
+                error(String.format("not exactly one '%s' child", elemName), causeID);
+            }
+            return children.size() > 0 ? children.get(0) : null;
+        }
+
+        private static Element removeSingleChild(Element elem, String elemName, String causeID) {
+            Element child = getSingleChild(elem, elemName, causeID);
+            elem.removeChildren(elemName);
+            return child;
+        }
+
+        private static String removeSingleChildText(Element elem, String elemName, String causeID) {
+            Element child = getSingleChild(elem, elemName, causeID);
+            String text = child.getText();
+            if (text.isEmpty()) {
+                error(String.format("an empty '%s' child", elemName), causeID);
+                text = null;
+            }
+            elem.removeChildren(elemName);
+            return text;
+        }
+
+        private static void error(String found, String id) {
+            LOGGER.error(String.format("found %s in annotation with ID %s", found, id));
+        }
+    }
+}

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ModifierExtractorEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ModifierExtractorEvaluation.java?rev=1893521&r1=1893520&r2=1893521&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ModifierExtractorEvaluation.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ModifierExtractorEvaluation.java Wed Sep 22 19:00:05 2021
@@ -18,15 +18,10 @@
  */
 package org.apache.ctakes.relationextractor.eval;
 
-import java.io.File;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.List;
-
-import javax.annotation.Nullable;
-
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+import com.lexicalscope.jewel.cli.CliFactory;
 import org.apache.ctakes.relationextractor.ae.ModifierExtractorAnnotator;
-import org.apache.ctakes.relationextractor.eval.SHARPXMI.EvaluationOptions;
 import org.apache.ctakes.typesystem.type.textsem.Modifier;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -45,11 +40,11 @@ import org.cleartk.ml.jar.GenericJarClas
 import org.cleartk.ml.jar.JarClassifierBuilder;
 import org.cleartk.ml.liblinear.LibLinearStringOutcomeDataWriter;
 
-import com.google.common.base.Function;
-import com.google.common.collect.Lists;
-import com.lexicalscope.jewel.cli.CliFactory;
+import javax.annotation.Nullable;
+import java.io.File;
+import java.util.*;
 
-public class ModifierExtractorEvaluation extends SHARPXMI.Evaluation_ImplBase {
+public class ModifierExtractorEvaluation extends RelationEvaluation_ImplBase {
 
   public static final ParameterSettings BEST_PARAMETERS = new ParameterSettings(
       LibLinearStringOutcomeDataWriter.class,
@@ -57,9 +52,30 @@ public class ModifierExtractorEvaluation
 
   public static void main(String[] args) throws Exception {
     // parse the options, validate them, and generate XMI if necessary
-    final EvaluationOptions options = CliFactory.parseArguments(EvaluationOptions.class, args);
-    SHARPXMI.validate(options);
-    SHARPXMI.generateXMI(options);
+    final RelationExtractorEvaluation.Options options = CliFactory.parseArguments(RelationExtractorEvaluation.Options.class, args);
+    CorpusXMI.validate(options);
+    if(options.getGenerateXMI()) {
+      boolean generateSharp = false, generateDeepPhe = false;
+      if (options.getTestCorpus() == CorpusXMI.Corpus.SHARP || options.getTestCorpus() == CorpusXMI.Corpus.SHARP_RELEASE) {
+        generateSharp = true;
+      } else if (options.getTestCorpus() == CorpusXMI.Corpus.DeepPhe) {
+        generateDeepPhe = true;
+      }
+      for(CorpusXMI.Corpus corpus : options.getTrainCorpus()){
+        if(corpus == CorpusXMI.Corpus.SHARP_RELEASE || corpus == CorpusXMI.Corpus.SHARP){
+          generateSharp = true;
+        }else if(corpus == CorpusXMI.Corpus.DeepPhe){
+          generateDeepPhe = true;
+        }
+      }
+
+      if(generateSharp){
+        SHARPXMI.generateXMI(options.getXMIDirectory(), options.getSharpCorpusDirectory(), options.getSharpBatchesDirectory());
+      }
+      if(generateDeepPhe){
+        DeepPheXMI.generateXMI(options.getXMIDirectory(), options.getDeepPheAnaforaDirectory());
+      }
+    }
 
     // determine the grid of parameters to search through
     // for the full set of LibLinear parameters, see:
@@ -73,17 +89,36 @@ public class ModifierExtractorEvaluation
       }
     }
 
-    // run the evaluation
-    SHARPXMI.evaluate(
-        options,
-        BEST_PARAMETERS,
-        gridOfSettings,
-        new Function<ParameterSettings, ModifierExtractorEvaluation>() {
-          @Override
-          public ModifierExtractorEvaluation apply(@Nullable ParameterSettings params) {
-            return new ModifierExtractorEvaluation(new File("target/models/modifier"), params);
-          }
-        });
+    List<File> trainFiles = new ArrayList<>();
+    for(CorpusXMI.Corpus corpus : options.getTrainCorpus()){
+      File trainCorpusDirectory;
+      if(corpus == CorpusXMI.Corpus.SHARP) trainCorpusDirectory = options.getSharpBatchesDirectory();
+      else if(corpus == CorpusXMI.Corpus.SHARP_RELEASE) trainCorpusDirectory = options.getSharpCorpusDirectory();
+      else if(corpus == CorpusXMI.Corpus.DeepPhe) trainCorpusDirectory = options.getDeepPheAnaforaDirectory();
+      else{
+        throw new Exception("Train corpus not recognized: " + corpus);
+      }
+      trainFiles.addAll(CorpusXMI.toXMIFiles(options.getXMIDirectory(), CorpusXMI.getTrainTextFiles(corpus, options.getEvaluateOn(), trainCorpusDirectory)));
+    }
+
+    File testCorpusDirectory=null;
+    if(options.getTestCorpus() == CorpusXMI.Corpus.SHARP) testCorpusDirectory = options.getSharpBatchesDirectory();
+    else if(options.getTestCorpus() == CorpusXMI.Corpus.SHARP_RELEASE) testCorpusDirectory = options.getSharpCorpusDirectory();
+    else if(options.getTestCorpus() == CorpusXMI.Corpus.DeepPhe) testCorpusDirectory = options.getDeepPheAnaforaDirectory();
+
+    List<File> testFiles = CorpusXMI.getTestTextFiles(options.getTestCorpus(), options.getEvaluateOn(), testCorpusDirectory);
+
+    if(options.getGridSearch()){
+      Map<ParameterSettings, Double> scoredParams = new HashMap<>();
+      for(ParameterSettings params : gridOfSettings){
+        ModifierExtractorEvaluation eval = new ModifierExtractorEvaluation(new File("target/models/modifier"), params);
+        params.stats = eval.trainAndTest(trainFiles, testFiles);
+        scoredParams.put(params, params.stats.f1());
+      }
+    }else {
+      ModifierExtractorEvaluation eval = new ModifierExtractorEvaluation(new File("target/models/modifier"), BEST_PARAMETERS);
+      System.err.println(eval.trainAndTest(trainFiles, testFiles));
+    }
   }
 
   private ParameterSettings parameterSettings;

Added: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationEvaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationEvaluation_ImplBase.java?rev=1893521&view=auto
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationEvaluation_ImplBase.java (added)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationEvaluation_ImplBase.java Wed Sep 22 19:00:05 2021
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.relationextractor.eval;
+
+import com.lexicalscope.jewel.cli.Option;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.fit.factory.CollectionReaderFactory;
+import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
+import org.cleartk.eval.AnnotationStatistics;
+
+import java.io.File;
+import java.util.List;
+
+/**
+ * Created by tmill on 1/31/17.
+ */
+public abstract class RelationEvaluation_ImplBase extends org.cleartk.eval.Evaluation_ImplBase<File, AnnotationStatistics<String>> {
+    public RelationEvaluation_ImplBase( File baseDirectory ) {
+        super( baseDirectory );
+    }
+
+    @Override
+    public CollectionReader getCollectionReader(List<File> items ) throws Exception {
+        return CollectionReaderFactory.createReader(
+                XMIReader.class,
+                TypeSystemDescriptionFactory.createTypeSystemDescription(),
+                XMIReader.PARAM_FILES,
+                items );
+    }
+
+    public static interface EvaluationOptions {
+        @Option(
+                longName = "evaluate-on",
+                defaultValue = "DEV",
+                description = "perform evaluation using the training (TRAIN), development (DEV) or test "
+                        + "(TEST) data.")
+        public CorpusXMI.EvaluateOn getEvaluateOn();
+
+        @Option(
+                longName = "grid-search",
+                description = "run a grid search to select the best parameters")
+        public boolean getGridSearch();
+
+        @Option(
+                defaultToNull=true,
+                longName = "train-xmi-dir",
+                description = "use these XMI files for training; they must contain the necessary preprocessing "
+                        + "in system view and gold annotation in gold view")
+        public File getTrainXmiDir();
+
+        @Option(
+                longName = "test-xmi-dir",
+                defaultValue = "",
+                description = "evaluate on these XMI files; they must contain the necessary preprocessing "
+                        + "in system view and gold annotation in gold view")
+        public File getTestXmiDir();
+
+        @Option(
+                longName = "batches-dir",
+                description = "directory containing ssN_batchNN directories, each of which should contain "
+                        + "a Knowtator directory and a Knowtator_XML directory",
+                defaultToNull = true)
+        public File getSharpBatchesDirectory();
+
+        @Option(
+                longName = "corpus-dir",
+                description = "Path to the SHARP corpus release (version 2 would end in /v2/SHARP)",
+                defaultToNull = true)
+        public File getSharpCorpusDirectory();
+
+        @Option(
+                longName = "deepphe-anafora-dir",
+                description = "Path to the anafora directory containing DeepPhe data",
+                shortName = "d",
+                defaultToNull = true)
+        public File getDeepPheAnaforaDirectory();
+
+        @Option(
+                longName = "xmi-dir",
+                defaultValue = "target/xmi",
+                description = "directory to store and load XMI serialization of annotations")
+        public File getXMIDirectory();
+
+        @Option(
+                longName = "generate-xmi",
+                description = "read in the gold annotations and serialize them as XMI")
+        public boolean getGenerateXMI();
+    }
+}

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java?rev=1893521&r1=1893520&r2=1893521&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Wed Sep 22 19:00:05 2021
@@ -18,17 +18,51 @@
  */
 package org.apache.ctakes.relationextractor.eval;
 
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.ctakes.relationextractor.ae.CausesBringsAboutRelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.LocationOfRelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.ManagesTreatsRelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.ManifestationOfRelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.CausesBringsAboutTextRelation;
+import org.apache.ctakes.typesystem.type.relation.DegreeOfTextRelation;
+import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
+import org.apache.ctakes.typesystem.type.relation.ManagesTreatsTextRelation;
+import org.apache.ctakes.typesystem.type.relation.ManifestationOfTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Modifier;
 import com.google.common.base.Function;
+import com.google.common.base.Functions;
 import com.google.common.base.Objects;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.collect.Ordering;
 import com.google.common.collect.Sets;
 import com.lexicalscope.jewel.cli.CliFactory;
 import com.lexicalscope.jewel.cli.Option;
 import org.apache.ctakes.core.pipeline.PipeBitInfo;
-import org.apache.ctakes.relationextractor.ae.*;
-import org.apache.ctakes.typesystem.type.relation.*;
-import org.apache.ctakes.typesystem.type.textsem.*;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -57,13 +91,11 @@ import org.cleartk.ml.jar.JarClassifierB
 import org.cleartk.ml.liblinear.LibLinearStringOutcomeDataWriter;
 import org.cleartk.util.ViewUriUtil;
 
-import javax.annotation.Nullable;
-import java.io.*;
-import java.util.*;
+public class RelationExtractorEvaluation extends RelationEvaluation_ImplBase {
 
-public class RelationExtractorEvaluation extends SHARPXMI.Evaluation_ImplBase {
 
-	public static interface Options extends SHARPXMI.EvaluationOptions {
+
+	public static interface Options extends RelationEvaluation_ImplBase.EvaluationOptions {
 
 		@Option(
 				longName = "relations",
@@ -104,6 +136,15 @@ public class RelationExtractorEvaluation
 				description = "expand events to their covering or covered events")
 		public boolean getExpandEvents();
 
+		@Option(
+				longName = "train-corpus",
+				description = "Corpora to use for training (space-separated if more than one)")
+		public List<CorpusXMI.Corpus> getTrainCorpus();
+
+		@Option(
+				longName = "test-corpus",
+				description = "Corpus to use for testing")
+		public CorpusXMI.Corpus getTestCorpus();
 	}
 
 	public static final Map<String, Class<? extends BinaryTextRelation>> RELATION_CLASSES =
@@ -158,55 +199,140 @@ public class RelationExtractorEvaluation
 	public static void main(String[] args) throws Exception {
 		// parse the options, validate them, and generate XMI if necessary
 		final Options options = CliFactory.parseArguments(Options.class, args);
-		SHARPXMI.validate(options);
-		SHARPXMI.generateXMI(options);
+		CorpusXMI.validate(options);
+		if(options.getGenerateXMI()) {
+			boolean generateSharp = false, generateDeepPhe = false;
+			if (options.getTestCorpus() == CorpusXMI.Corpus.SHARP || options.getTestCorpus() == CorpusXMI.Corpus.SHARP_RELEASE) {
+				generateSharp = true;
+			} else if (options.getTestCorpus() == CorpusXMI.Corpus.DeepPhe) {
+				generateDeepPhe = true;
+			}
+			for(CorpusXMI.Corpus corpus : options.getTrainCorpus()){
+				if(corpus == CorpusXMI.Corpus.SHARP_RELEASE || corpus == CorpusXMI.Corpus.SHARP){
+					generateSharp = true;
+				}else if(corpus == CorpusXMI.Corpus.DeepPhe){
+					generateDeepPhe = true;
+				}
+			}
+
+			if(generateSharp){
+				SHARPXMI.generateXMI(options.getXMIDirectory(), options.getSharpCorpusDirectory(), options.getSharpBatchesDirectory());
+			}
+			if(generateDeepPhe){
+				DeepPheXMI.generateXMI(options.getXMIDirectory(), options.getDeepPheAnaforaDirectory());
+			}
+		}
+
 
 		// determine the grid of parameters to search through
 		// for the full set of LibLinear parameters, see:
 		// https://github.com/bwaldvogel/liblinear-java/blob/master/src/main/java/de/bwaldvogel/liblinear/Train.java
-		List<ParameterSettings> gridOfSettings = Lists.newArrayList();
-		for (float probabilityOfKeepingANegativeExample : new float[] { 1.0f }) {//0.5f, 
-			for (int solver : new int[] { 0 /* logistic regression */, 1 /* SVM */}) {
-				for (double svmCost : new double[] { 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100 }) {
-					gridOfSettings.add(new ParameterSettings(
-							LibLinearStringOutcomeDataWriter.class,
-							new Object[] {
-									RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
-									probabilityOfKeepingANegativeExample },
-							new String[] { "-s", String.valueOf(solver), "-c", String.valueOf(svmCost) }));
+		List<ParameterSettings> gridOfSettings = null;
+		if(options.getGridSearch()) {
+			gridOfSettings = new ArrayList<>();
+			for (float probabilityOfKeepingANegativeExample : new float[]{1.0f}) {//0.5f,
+				for (int solver : new int[]{0 /* logistic regression */, 1 /* SVM */}) {
+					for (double svmCost : new double[]{0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100}) {
+						gridOfSettings.add(new ParameterSettings(
+								LibLinearStringOutcomeDataWriter.class,
+								new Object[]{
+										RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+										probabilityOfKeepingANegativeExample},
+								new String[]{"-s", String.valueOf(solver), "-c", String.valueOf(svmCost)}));
+					}
 				}
 			}
 		}
 
 		// run an evaluation for each selected relation
 		for (final String relationCategory : options.getRelations()) {
-
 			// get the best parameters for the relation
 			final Class<? extends BinaryTextRelation> relationClass =
 					RELATION_CLASSES.get(relationCategory);
-			ParameterSettings bestSettings = BEST_PARAMETERS.get(relationClass);
 
-			// run the evaluation
-			SHARPXMI.evaluate(
-					options,
-					bestSettings,
-					gridOfSettings,
-					new Function<ParameterSettings, RelationExtractorEvaluation>() {
-						@Override
-						public RelationExtractorEvaluation apply(@Nullable ParameterSettings params) {
-							return new RelationExtractorEvaluation(
-									new File("target/models/" + relationCategory),
-									relationClass,
-									ANNOTATOR_CLASSES.get(relationClass),
-									params,
-									options.getTestOnCTakes(),
-									options.getAllowSmallerSystemArguments(),
-									options.getIgnoreImpossibleGoldRelations(),
-									options.getPrintErrors(),
-									options.getClassWeights(),
-									options.getExpandEvents());
-						}
-					});
+			List<File> trainFiles = new ArrayList<>();
+			for(CorpusXMI.Corpus corpus : options.getTrainCorpus()){
+				File trainCorpusDirectory;
+				if(corpus == CorpusXMI.Corpus.SHARP) trainCorpusDirectory = options.getSharpBatchesDirectory();
+				else if(corpus == CorpusXMI.Corpus.SHARP_RELEASE) trainCorpusDirectory = options.getSharpCorpusDirectory();
+				else if(corpus == CorpusXMI.Corpus.DeepPhe) trainCorpusDirectory = options.getDeepPheAnaforaDirectory();
+				else{
+					throw new Exception("Train corpus not recognized: " + corpus);
+				}
+				trainFiles.addAll(CorpusXMI.toXMIFiles(options.getXMIDirectory(), CorpusXMI.getTrainTextFiles(corpus, options.getEvaluateOn(), trainCorpusDirectory)));
+			}
+
+			File testCorpusDirectory=null;
+
+			if(options.getTestCorpus() == CorpusXMI.Corpus.SHARP) testCorpusDirectory = options.getSharpBatchesDirectory();
+			else if(options.getTestCorpus() == CorpusXMI.Corpus.SHARP_RELEASE) testCorpusDirectory = options.getSharpCorpusDirectory();
+			else if(options.getTestCorpus() == CorpusXMI.Corpus.DeepPhe) testCorpusDirectory = options.getDeepPheAnaforaDirectory();
+
+			List<File> testFiles = CorpusXMI.toXMIFiles(options.getXMIDirectory(), CorpusXMI.getTestTextFiles(options.getTestCorpus(), options.getEvaluateOn(), testCorpusDirectory));
+
+			if(gridOfSettings != null){
+				// grid search:
+				Map<ParameterSettings, Double> scoredParams = new HashMap<>();
+				for(ParameterSettings params : gridOfSettings) {
+					RelationExtractorEvaluation eval = new RelationExtractorEvaluation(
+							new File("target/models/" + relationCategory),
+							relationClass,
+							ANNOTATOR_CLASSES.get(relationClass),
+							params,
+							options.getTestOnCTakes(),
+							options.getAllowSmallerSystemArguments(),
+							options.getIgnoreImpossibleGoldRelations(),
+							options.getPrintErrors(),
+							options.getClassWeights(),
+							options.getExpandEvents());
+					params.stats = eval.trainAndTest(trainFiles, testFiles);
+					scoredParams.put(params, params.stats.f1());
+				}
+				// print parameters sorted by F1
+				List<ParameterSettings> list = new ArrayList<>( scoredParams.keySet() );
+				Function<ParameterSettings, Double> getCount = Functions.forMap( scoredParams );
+				Collections.sort( list, Ordering.natural().onResultOf( getCount ) );
+
+				// print performance of each set of parameters
+				if ( list.size() > 1 ) {
+					System.err.println( "Summary" );
+					for ( ParameterSettings params : list ) {
+						System.err.printf(
+								"F1=%.3f P=%.3f R=%.3f %s\n",
+								params.stats.f1(),
+								params.stats.precision(),
+								params.stats.recall(),
+								params );
+					}
+					System.err.println();
+				}
+				// print best settings:
+				if ( !list.isEmpty() ) {
+					ParameterSettings lastParams = list.get( list.size() - 1 );
+					System.err.println( "Best model:" );
+					System.err.print( lastParams.stats );
+					System.err.println( lastParams );
+					System.err.println( lastParams.stats.confusions() );
+					System.err.println();
+				}
+			}else {
+				ParameterSettings bestSettings = BEST_PARAMETERS.get(relationClass);
+				RelationExtractorEvaluation eval = new RelationExtractorEvaluation(new File("target/models/" + relationCategory),
+						relationClass,
+						ANNOTATOR_CLASSES.get(relationClass),
+						bestSettings,
+						options.getTestOnCTakes(),
+						options.getAllowSmallerSystemArguments(),
+						options.getIgnoreImpossibleGoldRelations(),
+						options.getPrintErrors(),
+						options.getClassWeights(),
+						options.getExpandEvents());
+				bestSettings.stats = eval.trainAndTest(trainFiles, testFiles);
+				System.err.println( bestSettings.stats);
+				System.err.println(bestSettings);
+				System.err.println(bestSettings.stats.confusions());
+				System.err.println();
+			}
 		}
 	}
 
@@ -250,7 +376,7 @@ public class RelationExtractorEvaluation
 	 * @param ignoreImpossibleGoldRelations
 	 *          During testing, ignore gold relations that would be impossible to
 	 *          find because there are no corresponding system mentions
-	//	 * @param expandEvent
+	 * @param expandEventParameter
 	 */
 	public RelationExtractorEvaluation(
 			File baseDirectory,

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/SHARPXMI.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/SHARPXMI.java?rev=1893521&r1=1893520&r2=1893521&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/SHARPXMI.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/SHARPXMI.java Wed Sep 22 19:00:05 2021
@@ -22,10 +22,11 @@ import com.google.common.base.Function;
 import com.google.common.base.Functions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Ordering;
+import com.lexicalscope.jewel.cli.CliFactory;
 import com.lexicalscope.jewel.cli.Option;
 import org.apache.ctakes.core.ae.SHARPKnowtatorXMLReader;
 import org.apache.ctakes.core.pipeline.PipeBitInfo;
-import org.apache.ctakes.core.util.doc.DocIdUtil;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
 import org.apache.ctakes.typesystem.type.structured.DocumentID;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -55,7 +56,9 @@ import java.io.FileOutputStream;
 import java.util.*;
 import java.util.regex.Pattern;
 
-public class SHARPXMI {
+public class SHARPXMI extends CorpusXMI {
+
+   private static String BATCH_TEXT_SUBDIR = "Knowtator/text";
 
    public static List<File> getTrainTextFiles( File batchesDirectory ) {
       // seed_set1: batches 2, 3, 4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 18, 19
@@ -66,7 +69,8 @@ public class SHARPXMI {
             batchesDirectory,
             Pattern.compile( "^(ss[1234]_batch0[2-9]|ss[1234]_batch1[56]"
                              + "|ss[1234]_batch1[89]|ss[123]_batch01"
-                             + "|ss[12]_batch1[34]|ss[34]_batch1[12])$" ) );
+                             + "|ss[12]_batch1[34]|ss[34]_batch1[12])$" ),
+              BATCH_TEXT_SUBDIR);
    }
 
    public static List<File> getDevTextFiles( File batchesDirectory ) {
@@ -74,7 +78,7 @@ public class SHARPXMI {
       // seed_set2: batches 10, 17
       // seed_set3: batches 10, 17
       // seed_set4: batches 10, 17
-      return getTextFilesFor( batchesDirectory, Pattern.compile( "^(ss[1234]_batch1[07])$" ) );
+      return getTextFilesFor( batchesDirectory, Pattern.compile( "^(ss[1234]_batch1[07])$" ), BATCH_TEXT_SUBDIR );
    }
 
    public static List<File> getTestTextFiles( File batchesDirectory ) {
@@ -84,19 +88,20 @@ public class SHARPXMI {
       // seed_set4: batches 13, 14
       return getTextFilesFor(
             batchesDirectory,
-            Pattern.compile( "^(ss[12]_batch1[12]|ss[34]_batch1[34])$" ) );
+            Pattern.compile( "^(ss[12]_batch1[12]|ss[34]_batch1[34])$" ),
+              BATCH_TEXT_SUBDIR);
    }
 
    public static List<File> getAllTextFiles( File batchesDirectory ) {
-      return getTextFilesFor( batchesDirectory, Pattern.compile( "" ) );
+      return getTextFilesFor( batchesDirectory, Pattern.compile( "" ), BATCH_TEXT_SUBDIR );
    }
 
-   private static List<File> getTextFilesFor( File batchesDirectory, Pattern pattern ) {
+   private static List<File> getTextFilesFor( File batchesDirectory, Pattern pattern, String textSubdir ) {
       List<File> files = Lists.newArrayList();
       for ( File batchDir : batchesDirectory.listFiles() ) {
          if ( batchDir.isDirectory() && !batchDir.isHidden() ) {
             if ( pattern.matcher( batchDir.getName() ).find() ) {
-               File textDirectory = new File( batchDir, "Knowtator/text" );
+               File textDirectory = new File( batchDir, textSubdir );
                for ( File textFile : textDirectory.listFiles() ) {
                   if ( textFile.isFile() && !textFile.isHidden() ) {
                      files.add( textFile );
@@ -108,151 +113,81 @@ public class SHARPXMI {
       return files;
    }
 
-   public static List<File> toXMIFiles( Options options, List<File> textFiles ) {
-      List<File> xmiFiles = Lists.newArrayList();
-      for ( File textFile : textFiles ) {
-         xmiFiles.add( toXMIFile( options, textFile ) );
-      }
-      return xmiFiles;
+   public static List<File> getTrainTextFilesFromCorpus(File corpusDirectory) {
+      return getTextFilesFor(new File(corpusDirectory, "SeedSet1/by-batch/umls"), Pattern.compile("^0[2-9]|1[3-6,8-9]"), "text");
    }
 
-   private static File toXMIFile( Options options, File textFile ) {
-      return new File( options.getXMIDirectory(), textFile.getName() + ".xmi" );
+   public static List<File> getDevTextFilesFromCorpus(File corpusDirectory) {
+      return getTextFilesFor(new File(corpusDirectory, "SeedSet1/by-batch/umls"), Pattern.compile("^1[0,7]"), "text");
    }
 
-   public static interface Options {
-      @Option(
-            longName = "batches-dir",
-            description = "directory containing ssN_batchNN directories, each of which should contain "
-                          + "a Knowtator directory and a Knowtator_XML directory")
-      public File getBatchesDirectory();
-
-      @Option(
-            longName = "xmi-dir",
-            defaultValue = "target/xmi",
-            description = "directory to store and load XMI serialization of annotations")
-      public File getXMIDirectory();
-
-      @Option(
-            longName = "generate-xmi",
-            description = "read in the gold annotations and serialize them as XMI")
-      public boolean getGenerateXMI();
+   public static List<File> getTestTextFilesFromCorpus(File corpusDirectory) {
+      return getTextFilesFor(new File(corpusDirectory, "SeedSet1/by-batch/umls"), Pattern.compile("^1[1-2]"), "text");
    }
 
-   public static final String GOLD_VIEW_NAME = "GoldView";
-
-   public static void generateXMI( Options options ) throws Exception {
+   public static void generateXMI( File xmiDirectory, File corpusDirectory, File batchesDirectory ) throws Exception {
       // if necessary, write the XMIs first
-      if ( options.getGenerateXMI() ) {
-         if ( !options.getXMIDirectory().exists() ) {
-            options.getXMIDirectory().mkdirs();
-         }
-
-         // create a collection reader that loads URIs for all Knowtator text files
-         List<File> files = Lists.newArrayList();
-         files.addAll( getTrainTextFiles( options.getBatchesDirectory() ) );
-         files.addAll( getDevTextFiles( options.getBatchesDirectory() ) );
-         files.addAll( getTestTextFiles( options.getBatchesDirectory() ) );
-         CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles( files );
-
-         // load the text from the URI, run the preprocessor, then run the
-         // Knowtator XML reader
-         AggregateBuilder builder = new AggregateBuilder();
-         builder.add( UriToDocumentTextAnnotator.getDescription() );
-         File preprocessDescFile = new File( "desc/analysis_engine/RelationExtractorPreprocessor.xml" );
-         XMLParser parser = UIMAFramework.getXMLParser();
-         XMLInputSource source = new XMLInputSource( preprocessDescFile );
-         builder.add( parser.parseAnalysisEngineDescription( source ) );
-         builder.add( AnalysisEngineFactory.createEngineDescription(
-               ViewCreatorAnnotator.class,
-               ViewCreatorAnnotator.PARAM_VIEW_NAME,
-               GOLD_VIEW_NAME ) );
-         builder.add( AnalysisEngineFactory.createEngineDescription( CopyDocumentTextToGoldView.class ) );
-         builder.add(
-               AnalysisEngineFactory.createEngineDescription( DocumentIDAnnotator.class ),
-               CAS.NAME_DEFAULT_SOFA,
-               GOLD_VIEW_NAME );
-         builder.add(
-               AnalysisEngineFactory.createEngineDescription( SHARPKnowtatorXMLReader.class,
-                     SHARPKnowtatorXMLReader.PARAM_SET_DEFAULTS,
-                     true ),
-               CAS.NAME_DEFAULT_SOFA,
-               GOLD_VIEW_NAME );
-
-         // write out an XMI for each file
-         for ( Iterator<JCas> casIter = new JCasIterator( reader, builder.createAggregate() ); casIter.hasNext(); ) {
-            JCas jCas = casIter.next();
-            JCas goldView = jCas.getView( GOLD_VIEW_NAME );
-            String documentID = DocIdUtil.getDocumentID( goldView );
-            if ( documentID == null ){//|| documentID.equals( DocumentIDAnnotationUtil.NO_DOCUMENT_ID ) ) {
-               throw new IllegalArgumentException( "No documentID for CAS:\n" + jCas );
-            }
-            File outFile = toXMIFile( options, new File( documentID ) );
-            FileOutputStream stream = new FileOutputStream( outFile );
-            ContentHandler handler = new XMLSerializer( stream ).getContentHandler();
-            new XmiCasSerializer( jCas.getTypeSystem() ).serialize( jCas.getCas(), handler );
-            stream.close();
-         }
+      if ( !xmiDirectory.exists() ) {
+            xmiDirectory.mkdirs();
       }
-   }
 
-   public enum EvaluateOn {
-      TRAIN, DEV, TEST, OTHER
-   }
-
-   public static interface EvaluationOptions extends Options {
-      @Option(
-            longName = "evaluate-on",
-            defaultValue = "DEV",
-            description = "perform evaluation using the training (TRAIN), development (DEV) or test "
-                          + "(TEST) data.")
-      public EvaluateOn getEvaluteOn();
-
-      @Option(
-            longName = "grid-search",
-            description = "run a grid search to select the best parameters")
-      public boolean getGridSearch();
-      
-      @Option(
-          defaultToNull=true,
-          longName = "train-xmi-dir",
-          description = "use these XMI files for training; they must contain the necessary preprocessing " 
-              + "in system view and gold annotation in gold view")
-      public File getTrainXmiDir();
-      
-      @Option(
-          longName = "test-xmi-dir",
-          defaultValue = "",
-          description = "evaluate on these XMI files; they must contain the necessary preprocessing " 
-              + "in system view and gold annotation in gold view")
-      public File getTestXmiDir();
-   }
-
-   public static abstract class Evaluation_ImplBase
-         extends org.cleartk.eval.Evaluation_ImplBase<File, AnnotationStatistics<String>> {
-
-      public Evaluation_ImplBase( File baseDirectory ) {
-         super( baseDirectory );
-      }
-
-      @Override
-      public CollectionReader getCollectionReader( List<File> items ) throws Exception {
-         return CollectionReaderFactory.createReader(
-               XMIReader.class,
-               TypeSystemDescriptionFactory.createTypeSystemDescription(),
-               XMIReader.PARAM_FILES,
-               items );
-      }
-   }
-
-   public static void validate( EvaluationOptions options ) throws Exception {
-      // error on invalid option combinations
-      if ( options.getEvaluteOn().equals( EvaluateOn.TEST ) && options.getGridSearch() ) {
-         throw new IllegalArgumentException( "grid search can only be run on the train or dev sets" );
+      // create a collection reader that loads URIs for all Knowtator text files
+      List<File> files = new ArrayList<>();
+      if(corpusDirectory != null){
+         files.addAll(getTrainTextFilesFromCorpus(corpusDirectory));
+         files.addAll(getDevTextFilesFromCorpus(corpusDirectory));
+         files.addAll(getTestTextFilesFromCorpus(corpusDirectory));
+      }else if(batchesDirectory != null) {
+         files.addAll(getTrainTextFiles(batchesDirectory));
+         files.addAll(getDevTextFiles(batchesDirectory));
+         files.addAll(getTestTextFiles(batchesDirectory));
+      }else{
+         throw new RuntimeException("Either the corpus-dir or batches-dir option must be set.");
+      }
+
+      CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(files);
+      // load the text from the URI, run the preprocessor, then run the
+      // Knowtator XML reader
+      AggregateBuilder builder = new AggregateBuilder();
+      builder.add( UriToDocumentTextAnnotator.getDescription() );
+      File preprocessDescFile = new File( "desc/analysis_engine/RelationExtractorPreprocessor.xml" );
+      XMLParser parser = UIMAFramework.getXMLParser();
+      XMLInputSource source = new XMLInputSource( preprocessDescFile );
+      builder.add( parser.parseAnalysisEngineDescription( source ) );
+      builder.add( AnalysisEngineFactory.createEngineDescription(
+              ViewCreatorAnnotator.class,
+              ViewCreatorAnnotator.PARAM_VIEW_NAME,
+              GOLD_VIEW_NAME ) );
+      builder.add( AnalysisEngineFactory.createEngineDescription( CopyDocumentTextToGoldView.class ) );
+      builder.add(
+              AnalysisEngineFactory.createEngineDescription( DocumentIDAnnotator.class ),
+              CAS.NAME_DEFAULT_SOFA,
+              GOLD_VIEW_NAME );
+      builder.add(
+              AnalysisEngineFactory.createEngineDescription( SHARPKnowtatorXMLReader.class,
+                      SHARPKnowtatorXMLReader.PARAM_SET_DEFAULTS,
+                      true ),
+              CAS.NAME_DEFAULT_SOFA,
+              GOLD_VIEW_NAME );
+
+      // write out an XMI for each file
+      for ( Iterator<JCas> casIter = new JCasIterator( reader, builder.createAggregate() ); casIter.hasNext(); ) {
+         JCas jCas = casIter.next();
+         JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+         String documentID = DocumentIDAnnotationUtil.getDocumentID(goldView);
+         if (documentID == null) {//|| documentID.equals( DocumentIDAnnotationUtil.NO_DOCUMENT_ID ) ) {
+            throw new IllegalArgumentException("No documentID for CAS:\n" + jCas);
+         }
+         File outFile = toXMIFile(xmiDirectory, new File(documentID));
+         FileOutputStream stream = new FileOutputStream(outFile);
+         ContentHandler handler = new XMLSerializer(stream).getContentHandler();
+         new XmiCasSerializer(jCas.getTypeSystem()).serialize(jCas.getCas(), handler);
+         stream.close();
       }
    }
 
-   public static <T extends Evaluation_ImplBase> void evaluate(
+   /*
+   public static <T extends RelationEvaluation_ImplBase> void evaluate(
          EvaluationOptions options,
          ParameterSettings bestSettings,
          List<ParameterSettings> gridOfSettings,
@@ -268,30 +203,50 @@ public class SHARPXMI {
       // run an evaluation for each set of parameters
       Map<ParameterSettings, Double> scoredParams = new HashMap<>();
       for ( ParameterSettings params : possibleParams ) {
-         Evaluation_ImplBase evaluation = getEvaluation.apply( params );
+         RelationEvaluation_ImplBase evaluation = getEvaluation.apply( params );
 
          List<File> trainFiles, devFiles, testFiles;
          switch ( options.getEvaluteOn() ) {
             case TRAIN:
                // run n-fold cross-validation on the training set
-               trainFiles = getTrainTextFiles( options.getBatchesDirectory() );
+               if(options.getCorpusDirectory() != null){
+                  trainFiles = getTrainTextFilesFromCorpus(options.getCorpusDirectory());
+               }else if(options.getBatchesDirectory() != null) {
+                  trainFiles = getTrainTextFiles(options.getBatchesDirectory());
+               }else{
+                  throw new RuntimeException("Either corpus-dir or batch-dir must have an argument.");
+               }
                trainFiles = toXMIFiles( options, trainFiles );
                List<AnnotationStatistics<String>> foldStats = evaluation.crossValidation( trainFiles, 2 );
                params.stats = AnnotationStatistics.addAll( foldStats );
                break;
             case DEV:
                // train on the training set and evaluate on the dev set
-               trainFiles = getTrainTextFiles( options.getBatchesDirectory() );
+               if(options.getCorpusDirectory() != null){
+                  trainFiles = getTrainTextFilesFromCorpus(options.getCorpusDirectory());
+                  devFiles = getDevTextFilesFromCorpus(options.getCorpusDirectory());
+               }else if(options.getBatchesDirectory() != null) {
+                  trainFiles = getTrainTextFiles(options.getBatchesDirectory());
+                  devFiles = getDevTextFiles( options.getBatchesDirectory() );
+               }else{
+                  throw new RuntimeException("Either corpus-dir or batch-dir must have an argument.");
+               }
                trainFiles = toXMIFiles( options, trainFiles );
-               devFiles = getDevTextFiles( options.getBatchesDirectory() );
                devFiles = toXMIFiles( options, devFiles );
                params.stats = evaluation.trainAndTest( trainFiles, devFiles );
                break;
             case TEST:
                // train on the training set + dev set and evaluate on the test set
                List<File> allTrainFiles = new ArrayList<>();
-               allTrainFiles.addAll( getTrainTextFiles( options.getBatchesDirectory() ) );
-               allTrainFiles.addAll( getDevTextFiles( options.getBatchesDirectory() ) );
+               if(options.getCorpusDirectory() != null){
+                 allTrainFiles.addAll( getTrainTextFilesFromCorpus(options.getCorpusDirectory()));
+                 allTrainFiles.addAll( getDevTextFilesFromCorpus(options.getCorpusDirectory()));
+               }else if(options.getBatchesDirectory() != null) {
+                  allTrainFiles.addAll(getTrainTextFiles(options.getBatchesDirectory()));
+                  allTrainFiles.addAll(getDevTextFiles(options.getBatchesDirectory()));
+               }else{
+                  throw new RuntimeException("Either corpus-dir or batch-dir must have an argument.");
+               }
                allTrainFiles = toXMIFiles( options, allTrainFiles );
                testFiles = getTestTextFiles( options.getBatchesDirectory() );
                testFiles = toXMIFiles( options, testFiles );
@@ -356,6 +311,7 @@ public class SHARPXMI {
          System.err.println();
       }
    }
+*/
 
    public static class DocumentIDAnnotator extends JCasAnnotator_ImplBase {
 
@@ -369,9 +325,9 @@ public class SHARPXMI {
    }
 
    @PipeBitInfo(
-         name = "Text to Gold Copier",
-         description = "Copies Text from the System view to the Gold view.",
-         role = PipeBitInfo.Role.SPECIAL
+           name = "Text to Gold Copier",
+           description = "Copies Text from the System view to the Gold view.",
+           role = PipeBitInfo.Role.SPECIAL
    )
    public static class CopyDocumentTextToGoldView extends JCasAnnotator_ImplBase {
       @Override

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java?rev=1893521&r1=1893520&r2=1893521&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java Wed Sep 22 19:00:05 2021
@@ -1,8 +1,8 @@
 package org.apache.ctakes.relationextractor.metastasis;
 
 import com.google.common.io.CharStreams;
-import org.apache.ctakes.relationextractor.eval.SHARPXMI.CopyDocumentTextToGoldView;
-import org.apache.ctakes.relationextractor.eval.SHARPXMI.DocumentIDAnnotator;
+import org.apache.ctakes.relationextractor.eval.CorpusXMI.CopyDocumentTextToGoldView;
+import org.apache.ctakes.relationextractor.eval.CorpusXMI.DocumentIDAnnotator;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngine;

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java?rev=1893521&r1=1893520&r2=1893521&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java Wed Sep 22 19:00:05 2021
@@ -94,8 +94,8 @@ public class RelationExtractorTrain {
           + preprocessDescFile.getCanonicalPath());
     }
 
-    List<File> trainFiles = SHARPXMI.getAllTextFiles(options.getBatchesDirectory());
-    trainFiles = SHARPXMI.toXMIFiles(options, trainFiles);
+    List<File> trainFiles = SHARPXMI.getAllTextFiles(options.getSharpBatchesDirectory());
+    trainFiles = SHARPXMI.toXMIFiles(options.getXMIDirectory(), trainFiles);
 
     // Initialize model directories
     String modelPathPrefix = "org/apache/ctakes/relationextractor/models/";