You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/04/10 18:11:39 UTC
svn commit: r1586359 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval:
EvaluationOfEventSpans.java Evaluation_ImplBase.java
Author: tmill
Date: Thu Apr 10 16:11:39 2014
New Revision: 1586359
URL: http://svn.apache.org/r1586359
Log:
CTAKES-82: Event eval now works for i2b2, can now output i2b2-style xml for tlinks and events.
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java?rev=1586359&r1=1586358&r2=1586359&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java Thu Apr 10 16:11:39 2014
@@ -26,6 +26,7 @@ import java.util.logging.Level;
import org.apache.ctakes.temporal.ae.EventAnnotator;
import org.apache.ctakes.temporal.ae.feature.selection.FeatureSelection;
+import org.apache.ctakes.temporal.eval.Evaluation_ImplBase.XMLFormat;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -58,11 +59,21 @@ public class EvaluationOfEventSpans exte
public static void main(String[] args) throws Exception {
Options options = CliFactory.parseArguments(Options.class, args);
+ List<Integer> trainItems = null;
+ List<Integer> devItems = null;
+ List<Integer> testItems = null;
+
List<Integer> patientSets = options.getPatients().getList();
- List<Integer> trainItems = THYMEData.getTrainPatientSets(patientSets);
- List<Integer> devItems = THYMEData.getDevPatientSets(patientSets);
- List<Integer> testItems = THYMEData.getTestPatientSets(patientSets);
-
+ if(options.getXMLFormat() == XMLFormat.I2B2){
+ trainItems = I2B2Data.getTrainPatientSets(options.getXMLDirectory());
+ devItems = I2B2Data.getDevPatientSets(options.getXMLDirectory());
+ testItems = I2B2Data.getTestPatientSets(options.getXMLDirectory());
+ }else{
+ trainItems = THYMEData.getTrainPatientSets(patientSets);
+ devItems = THYMEData.getDevPatientSets(patientSets);
+ testItems = THYMEData.getTestPatientSets(patientSets);
+ }
+
List<Integer> allTraining = new ArrayList<Integer>(trainItems);
List<Integer> allTest = null;
if (options.getTest()) {
@@ -82,6 +93,7 @@ public class EvaluationOfEventSpans exte
options.getSMOTENeighborNumber());
evaluation.prepareXMIsFor(patientSets);
evaluation.setLogging(Level.FINE, new File("target/eval/ctakes-event-errors.log"));
+ if(options.getI2B2Output()!=null) evaluation.setI2B2Output(options.getI2B2Output() + "/event-spans");
AnnotationStatistics<String> stats = evaluation.trainAndTest(allTraining, allTest);
System.err.println(stats);
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1586359&r1=1586358&r2=1586359&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java Thu Apr 10 16:11:39 2014
@@ -24,8 +24,10 @@ import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -54,7 +56,6 @@ import org.apache.ctakes.core.resource.F
import org.apache.ctakes.core.resource.FileResourceImpl;
import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
import org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl;
-import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
import org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE;
import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator;
@@ -65,12 +66,14 @@ import org.apache.ctakes.temporal.ae.I2B
import org.apache.ctakes.temporal.ae.THYMEAnaforaXMLReader;
import org.apache.ctakes.temporal.ae.THYMEKnowtatorXMLReader;
import org.apache.ctakes.temporal.ae.THYMETreebankReader;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.Chunk;
import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.textsem.EntityMention;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Segment;
@@ -829,11 +832,12 @@ public abstract class Evaluation_ImplBas
rootElement.appendChild(tagsElement);
doc.appendChild(rootElement);
+ Map<IdentifiedAnnotation,String> argToId = new HashMap<>();
int id=0;
for(TimeMention timex : JCasUtil.select(jcas, TimeMention.class)){
Element timexElement = doc.createElement("TIMEX3");
- String timexID = "T"+id;
- id++;
+ String timexID = "T"+id; id++;
+ argToId.put(timex, timexID);
timexElement.setAttribute("id", timexID);
timexElement.setAttribute("start", String.valueOf(timex.getBegin()+1));
timexElement.setAttribute("end", String.valueOf(timex.getEnd()+1));
@@ -844,6 +848,39 @@ public abstract class Evaluation_ImplBas
tagsElement.appendChild(timexElement);
}
+ id = 0;
+ for(EventMention event : JCasUtil.select(jcas, EventMention.class)){
+ if (event.getClass().equals(EventMention.class)) {
+ // this ensures we are only looking at THYME events and not ctakes-dictionary-lookup events
+ Element eventEl = doc.createElement("EVENT");
+ String eventID = "E"+id; id++;
+ argToId.put(event, eventID);
+ eventEl.setAttribute("id", eventID);
+ eventEl.setAttribute("start", String.valueOf(event.getBegin()+1));
+ eventEl.setAttribute("end", String.valueOf(event.getEnd()+1));
+ eventEl.setAttribute("text", event.getCoveredText());
+ eventEl.setAttribute("modality", "NA");
+ eventEl.setAttribute("polarity", "NA");
+ eventEl.setAttribute("type", "NA");
+ tagsElement.appendChild(eventEl);
+ }
+ }
+
+ id = 0;
+ for(TemporalTextRelation rel : JCasUtil.select(jcas, TemporalTextRelation.class)){
+ Element linkEl = doc.createElement("TLINK");
+ String linkID = "TL"+id; id++;
+ linkEl.setAttribute("id", linkID);
+ Annotation arg1 = rel.getArg1().getArgument();
+ linkEl.setAttribute("fromID", argToId.get(arg1));
+ linkEl.setAttribute("fromText", arg1.getCoveredText());
+ Annotation arg2 = rel.getArg2().getArgument();
+ linkEl.setAttribute("toID", argToId.get(arg2));
+ linkEl.setAttribute("toText", arg2.getCoveredText());
+ linkEl.setAttribute("type", rel.getCategory());
+ tagsElement.appendChild(linkEl);
+ }
+
// boilerplate xml-writing code:
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();