You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/04/09 20:53:10 UTC

svn commit: r1586082 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval: EvaluationOfAnnotationSpans_ImplBase.java EvaluationOfTimeSpans.java Evaluation_ImplBase.java

Author: tmill
Date: Wed Apr  9 18:53:09 2014
New Revision: 1586082

URL: http://svn.apache.org/r1586082
Log:
CTAKES-82: Added option for writing output to i2b2 xml format.

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java?rev=1586082&r1=1586081&r2=1586082&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java Wed Apr  9 18:53:09 2014
@@ -42,6 +42,7 @@ import org.apache.uima.resource.Resource
 import org.cleartk.eval.AnnotationStatistics;
 import org.cleartk.util.ViewURIUtil;
 import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.pipeline.JCasIterable;
 import org.uimafit.pipeline.SimplePipeline;
 import org.uimafit.util.JCasUtil;
@@ -118,7 +119,9 @@ protected abstract AnalysisEngineDescrip
       throws Exception {
     AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
     aggregateBuilder.add(this.getAnnotatorDescription(directory), "TimexView", CAS.NAME_DEFAULT_SOFA);
-
+    if(this.i2b2Output != null){
+      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(WriteI2B2XML.class, WriteI2B2XML.PARAM_OUTPUT_DIR, this.i2b2Output), "TimexView", CAS.NAME_DEFAULT_SOFA);
+    }
     AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
     Ordering<Annotation> bySpans = Ordering.<Integer> natural().lexicographical().onResultOf(
         new Function<Annotation, List<Integer>>() {

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java?rev=1586082&r1=1586081&r2=1586082&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java Wed Apr  9 18:53:09 2014
@@ -128,6 +128,7 @@ public class EvaluationOfTimeSpans exten
 					options.getPrintOverlappingSpans(),
 					annotatorTrainingArguments.get(annotatorClass));
 			evaluation.prepareXMIsFor(patientSets);
+			if(options.getI2B2Output()!=null) evaluation.setI2B2Output(options.getI2B2Output() + "/" + annotatorClass.getSimpleName());
 			String name = String.format("%s.errors", annotatorClass.getSimpleName());
 			evaluation.setLogging(Level.FINE, new File("target/eval", name));
 			AnnotationStatistics<String> stats = evaluation.trainAndTest(allTrain, allTest);

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1586082&r1=1586081&r2=1586082&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java Wed Apr  9 18:53:09 2014
@@ -30,6 +30,17 @@ import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
 import org.apache.ctakes.chunker.ae.Chunker;
 import org.apache.ctakes.chunker.ae.DefaultChunkCreator;
 import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
@@ -43,12 +54,12 @@ import org.apache.ctakes.core.resource.F
 import org.apache.ctakes.core.resource.FileResourceImpl;
 import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
 import org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
 import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
 import org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE;
 import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator;
 import org.apache.ctakes.lvg.ae.LvgAnnotator;
 import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
-import org.apache.ctakes.parser.berkeley.BerkeleyParserWrapper;
 import org.apache.ctakes.postagger.POSTagger;
 import org.apache.ctakes.temporal.ae.I2B2TemporalXMLReader;
 import org.apache.ctakes.temporal.ae.THYMEAnaforaXMLReader;
@@ -96,6 +107,8 @@ import org.uimafit.factory.TypePrioritie
 import org.uimafit.factory.TypeSystemDescriptionFactory;
 import org.uimafit.pipeline.SimplePipeline;
 import org.uimafit.util.JCasUtil;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -147,7 +160,10 @@ public abstract class Evaluation_ImplBas
     public boolean getTest();
 
     @Option(longName = "kernelParams", defaultToNull=true)
-    public String getKernelParams();    
+    public String getKernelParams();
+    
+    @Option(defaultToNull=true)
+    public String getI2B2Output();
   }
 
   protected File rawTextDirectory;
@@ -166,6 +182,8 @@ public abstract class Evaluation_ImplBas
   
   protected boolean printOverlapping = false;
   
+  protected String i2b2Output = null;
+  
   protected String[] kernelParams;
   
   public Evaluation_ImplBase(
@@ -184,6 +202,10 @@ public abstract class Evaluation_ImplBas
     this.treebankDirectory = treebankDirectory;
   }
 
+  public void setI2B2Output(String outDir){
+    i2b2Output = outDir;
+  }
+  
   public void prepareXMIsFor(List<Integer> patientSets) throws Exception {
     boolean needsXMIs = false;
     for (File textFile : this.getFilesFor(patientSets)) {
@@ -780,4 +802,68 @@ public abstract class Evaluation_ImplBas
       }
     }
   }
+  
+  public static class WriteI2B2XML extends JCasAnnotator_ImplBase {
+    public static final String PARAM_OUTPUT_DIR="PARAM_OUTPUT_DIR";
+    @ConfigurationParameter(mandatory=true,description="Output directory to write xml files to.",name=PARAM_OUTPUT_DIR)
+    protected String outputDir;
+    
+    @Override
+    public void process(JCas jcas) throws AnalysisEngineProcessException {
+      try {
+        // get the output file name from the input file name and output directory.
+        File outDir = new File(outputDir);
+        if(!outDir.exists()) outDir.mkdirs();
+        File inFile = new File(ViewURIUtil.getURI(jcas));
+        String outFile = inFile.getName().replace(".txt", "");
+        
+        // build the xml
+        DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
+        DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
+        Document doc = docBuilder.newDocument();
+        Element rootElement = doc.createElement("ClinicalNarrativeTemporalAnnotation");
+        Element textElement = doc.createElement("TEXT");
+        Element tagsElement = doc.createElement("TAGS");
+        textElement.setTextContent(jcas.getDocumentText());
+        rootElement.appendChild(textElement);
+        rootElement.appendChild(tagsElement);
+        doc.appendChild(rootElement);
+        
+        int id=0;
+        for(TimeMention timex : JCasUtil.select(jcas, TimeMention.class)){
+          Element timexElement = doc.createElement("TIMEX3");
+          String timexID = "T"+id;
+          id++;
+          timexElement.setAttribute("id", timexID);
+          timexElement.setAttribute("start", String.valueOf(timex.getBegin()+1));
+          timexElement.setAttribute("end", String.valueOf(timex.getEnd()+1));
+          timexElement.setAttribute("text", timex.getCoveredText());
+          timexElement.setAttribute("type", "NA");
+          timexElement.setAttribute("val", "NA");
+          timexElement.setAttribute("mod", "NA");
+          tagsElement.appendChild(timexElement);
+        }
+        
+        // boilerplate xml-writing code:
+        TransformerFactory transformerFactory = TransformerFactory.newInstance();
+        Transformer transformer = transformerFactory.newTransformer();
+        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
+        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
+        DOMSource source = new DOMSource(doc);
+        StreamResult result = new StreamResult(new File(outputDir, outFile));
+        transformer.transform(source, result);
+      } catch (ParserConfigurationException e) {
+        e.printStackTrace();
+        throw new AnalysisEngineProcessException(e);
+      } catch (TransformerConfigurationException e) {
+        e.printStackTrace();
+        throw new AnalysisEngineProcessException(e);
+      } catch (TransformerException e) {
+        e.printStackTrace();
+        throw new AnalysisEngineProcessException(e);
+      }
+
+    }
+    
+  }
 }