You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2014/03/13 20:04:24 UTC
svn commit: r1577288 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae:
PreserveCertainEventEventRelationsInGold.java
feature/duration/DurationEventEventFeatureExtractor.java
feature/duration/Utils.java
Author: dligach
Date: Thu Mar 13 19:04:24 2014
New Revision: 1577288
URL: http://svn.apache.org/r1577288
Log:
Code related to evaluating the effect of adding duration features for event-event relations
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventEventRelationsInGold.java (with props)
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventEventRelationsInGold.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventEventRelationsInGold.java?rev=1577288&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventEventRelationsInGold.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventEventRelationsInGold.java Thu Mar 13 19:04:24 2014
@@ -0,0 +1,82 @@
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.ctakes.temporal.ae.feature.duration.Utils;
+import org.apache.ctakes.temporal.ae.feature.duration.Utils.Callback;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.jcas.JCas;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
+
+/**
+ * Preserve only those event-event relations whose both event arguments have duration data.
+ */
+public class PreserveCertainEventEventRelationsInGold extends JCasAnnotator_ImplBase {
+
+ public static final String GOLD_VIEW_NAME = "GoldView";
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+
+ Map<String, Map<String, Float>> textToDistribution = null;
+ try {
+ textToDistribution = Files.readLines(new File(Utils.durationDistributionPath), Charsets.UTF_8, new Callback());
+ } catch(IOException e) {
+ e.printStackTrace();
+ return;
+ }
+
+ JCas goldView;
+ try {
+ goldView = jCas.getView(GOLD_VIEW_NAME);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+
+ // remove relations where one or both arguments have no duration data
+ for(BinaryTextRelation relation : Lists.newArrayList(JCasUtil.select(goldView, BinaryTextRelation.class))) {
+ RelationArgument arg1 = relation.getArg1();
+ RelationArgument arg2 = relation.getArg2();
+
+ String event2Text;
+ String event1Text;
+ if(arg1.getArgument() instanceof EventMention && arg2.getArgument() instanceof EventMention) {
+ event1Text = arg1.getArgument().getCoveredText().toLowerCase();
+ event2Text = arg2.getArgument().getCoveredText().toLowerCase();
+ } else {
+ // this is not an event-event relation
+ continue;
+ }
+
+ if(textToDistribution.containsKey(event1Text) && textToDistribution.containsKey(event2Text)) {
+ // we have duration distributions for both arguments, so keep it
+ continue;
+ }
+
+ arg1.removeFromIndexes();
+ arg2.removeFromIndexes();
+ relation.removeFromIndexes();
+ }
+
+ // remove events (that didn't participate in relations) that have no data
+ for(EventMention mention : Lists.newArrayList(JCasUtil.select(goldView, EventMention.class))) {
+ if(textToDistribution.containsKey(mention.getCoveredText().toLowerCase())) {
+ // these are the kind we keep
+ continue;
+ }
+
+ mention.removeFromIndexes();
+ }
+ }
+}
\ No newline at end of file
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventEventRelationsInGold.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java?rev=1577288&r1=1577287&r2=1577288&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java Thu Mar 13 19:04:24 2014
@@ -40,13 +40,14 @@ public class DurationEventEventFeatureEx
throws AnalysisEngineProcessException {
List<Feature> features = new ArrayList<Feature>();
- File durationLookup = new File(Utils.durationDistributionPath);
String arg1text = arg1.getCoveredText().toLowerCase();
String arg2text = arg2.getCoveredText().toLowerCase();
+ Float expectedDuration1;
+ Float expectedDuration2;
Map<String, Map<String, Float>> textToDistribution = null;
try {
- textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Utils.Callback());
+ textToDistribution = Files.readLines(new File(Utils.durationDistributionPath), Charsets.UTF_8, new Utils.Callback());
} catch(IOException e) {
e.printStackTrace();
return features;
@@ -55,25 +56,19 @@ public class DurationEventEventFeatureEx
Map<String, Float> arg1Distribution = textToDistribution.get(arg1text);
if(arg1Distribution == null) {
features.add(new Feature("arg1_no_duration_info"));
- } else {
-// float expectation1 = DurationExpectationFeatureExtractor.expectedDuration(arg1Distribution);
-// features.add(new Feature("arg1_expected_duration", expectation1));
- for(String timeUnit : arg1Distribution.keySet()) {
- features.add(new Feature("duration_" + timeUnit, arg1Distribution.get(timeUnit)));
- }
- }
+ return features;
+ }
+
+ expectedDuration1 = Utils.expectedDuration(arg1Distribution);
Map<String, Float> arg2Distribution = textToDistribution.get(arg2text);
if(arg2Distribution == null) {
features.add(new Feature("arg2_no_duration_info"));
- } else {
-// float expectation2 = DurationExpectationFeatureExtractor.expectedDuration(arg2Distribution);
-// features.add(new Feature("arg_expected_duration", expectation2));
- for(String timeUnit : arg2Distribution.keySet()) {
- features.add(new Feature("duration_" + timeUnit, arg2Distribution.get(timeUnit)));
- }
+ return features;
}
+ expectedDuration2 = Utils.expectedDuration(arg2Distribution);
+ features.add(new Feature("expected_duration_difference", expectedDuration1 - expectedDuration2));
return features;
}
}
\ No newline at end of file
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java?rev=1577288&r1=1577287&r2=1577288&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java Thu Mar 13 19:04:24 2014
@@ -26,7 +26,7 @@ import com.google.common.io.LineProcesso
*/
public class Utils {
- public static final String durationDistributionPath = "/Users/dima/Boston/Thyme/Duration/Data/SignSymptom/Distribution/distribution.txt";
+ public static final String durationDistributionPath = "/Users/dima/Boston/Thyme/Duration/Data/Combined/Distribution/mimic.txt";
/**
* Compute expected duration in seconds. Normalize by number of seconds in a year.