You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2014/04/02 17:20:39 UTC
svn commit: r1584068 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal:
data/analysis/EventDurationDistribution.java
duration/EventDurationDistribution.java
Author: dligach
Date: Wed Apr 2 15:20:38 2014
New Revision: 1584068
URL: http://svn.apache.org/r1584068
Log:
now outputting event duration distributions to a file
Removed:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java?rev=1584068&r1=1584067&r2=1584068&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java Wed Apr 2 15:20:38 2014
@@ -1,6 +1,7 @@
package org.apache.ctakes.temporal.duration;
import java.io.File;
+import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
@@ -18,23 +19,28 @@ import org.apache.ctakes.typesystem.type
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.MedicationMention;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.CollectionReaderFactory;
import org.uimafit.pipeline.SimplePipeline;
import org.uimafit.util.JCasUtil;
+import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Multiset;
+import com.google.common.io.Files;
/**
* Extract durations of event mentions (e.g. sign/symptom or disease/disorder).
@@ -46,12 +52,17 @@ public class EventDurationDistribution {
private static Class<? extends EventMention> targetClass = MedicationMention.class;
public static class Options {
-
@Option(
name = "--input-dir",
usage = "specify the path to the directory containing the xmi files",
required = true)
public File inputDirectory;
+
+ @Option(
+ name = "--output-file",
+ usage = "specify the path to the output file",
+ required = true)
+ public String outputFile;
}
public static void main(String[] args) throws Exception {
@@ -65,13 +76,22 @@ public class EventDurationDistribution {
CollectionReader collectionReader = getCollectionReader(trainFiles);
AnalysisEngine temporalDurationExtractor = AnalysisEngineFactory.createPrimitive(
- TemporalDurationExtractor.class);
+ TemporalDurationExtractor.class,
+ "OutputFile",
+ options.outputFile);
SimplePipeline.runPipeline(collectionReader, temporalDurationExtractor);
}
public static class TemporalDurationExtractor extends JCasAnnotator_ImplBase {
+ @ConfigurationParameter(
+ name = "OutputFile",
+ mandatory = true,
+ description = "path to the output file that will store the distributions")
+ private String outputFilePath;
+ private File outputFile;
+
// regular expression to match temporal durations in time mention annotations
private final static String regex = "(sec|min|hour|hrs|day|week|wk|month|year|yr|decade)";
@@ -97,6 +117,17 @@ public class EventDurationDistribution {
Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
@Override
+ public void initialize(UimaContext context) throws ResourceInitializationException {
+ super.initialize(context);
+ outputFile = new File(outputFilePath);
+ if(outputFile.exists()) {
+ System.out.println(outputFile + " exists... deleting...");
+ outputFile.delete();
+ }
+ }
+
+
+ @Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
Collection<DocumentID> ids = JCasUtil.select(jCas, DocumentID.class);
@@ -137,7 +168,11 @@ public class EventDurationDistribution {
}
if(durationDistribution.size() > 0) {
- System.out.println(Utils.formatDistribution(mentionText, durationDistribution, ", ", false));
+ try {
+ Files.append(Utils.formatDistribution(mentionText, durationDistribution, ", ", false) + "\n", outputFile, Charsets.UTF_8);
+ } catch (IOException e) {
+ System.out.println("Could not open output file: " + outputFile);
+ }
}
}