You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2016/06/02 17:55:43 UTC
svn commit: r1746611 -
/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
Author: dligach
Date: Thu Jun 2 17:55:43 2016
New Revision: 1746611
URL: http://svn.apache.org/viewvc?rev=1746611&view=rev
Log:
printing entire sentence with args marked
Modified:
ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java?rev=1746611&r1=1746610&r2=1746611&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java Thu Jun 2 17:55:43 2016
@@ -73,6 +73,11 @@ public class PositiveAndNegativeExampleP
public static void main(String[] args) throws Exception {
+ File trainFile = new File(trainDataFile);
+ trainFile.createNewFile();
+ File devFile = new File(devDataFile);
+ devFile.createNewFile();
+
Options options = CliFactory.parseArguments(Options.class, args);
List<Integer> patientSets = options.getPatients().getList();
@@ -160,7 +165,7 @@ public class PositiveAndNegativeExampleP
label = relation.getCategory().toLowerCase();
}
- String context = getTextBetween(systemView, mention1, mention2);
+ String context = getSentenceWithMarkedArgs(systemView, sentence, mention1, mention2);
String text = String.format("%s|%s", label, context);
eventEventRelationsInSentence.add(text.toLowerCase());
}
@@ -175,6 +180,26 @@ public class PositiveAndNegativeExampleP
}
}
+ public static String getSentenceWithMarkedArgs(JCas jCas, Sentence sent, Annotation arg1, Annotation arg2) {
+
+ List<String> tokens = new ArrayList<>();
+ for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
+ if(baseToken.getBegin() == arg1.getBegin() && (baseToken.getEnd() == arg1.getEnd())) {
+ tokens.add("<e1>");
+ tokens.add(baseToken.getCoveredText());
+ tokens.add("</e1>");
+ } else if(baseToken.getBegin() == arg2.getBegin() && (baseToken.getEnd() == arg2.getEnd())) {
+ tokens.add("<e2>");
+ tokens.add(baseToken.getCoveredText());
+ tokens.add("</e2>");
+ } else {
+ tokens.add(baseToken.getCoveredText());
+ }
+ }
+
+ return String.join(" ", tokens).replaceAll("[\r\n]", " ");
+ }
+
public static String getTextBetween(JCas jCas, Annotation arg1, Annotation arg2) {
// todo: use 2 tokens for things like 'during those hospitalizations ...'