You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2016/06/02 17:55:43 UTC

svn commit: r1746611 - /ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java

Author: dligach
Date: Thu Jun  2 17:55:43 2016
New Revision: 1746611

URL: http://svn.apache.org/viewvc?rev=1746611&view=rev
Log:
printing entire sentence with args marked

Modified:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java?rev=1746611&r1=1746610&r2=1746611&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java Thu Jun  2 17:55:43 2016
@@ -73,6 +73,11 @@ public class PositiveAndNegativeExampleP
 
   public static void main(String[] args) throws Exception {
     
+    File trainFile = new File(trainDataFile);
+    trainFile.createNewFile();
+    File devFile = new File(devDataFile);
+    devFile.createNewFile();
+    
     Options options = CliFactory.parseArguments(Options.class, args);
 
     List<Integer> patientSets = options.getPatients().getList();
@@ -160,7 +165,7 @@ public class PositiveAndNegativeExampleP
               label = relation.getCategory().toLowerCase();
             }
 
-            String context = getTextBetween(systemView, mention1, mention2); 
+            String context = getSentenceWithMarkedArgs(systemView, sentence, mention1, mention2); 
             String text = String.format("%s|%s", label, context);
             eventEventRelationsInSentence.add(text.toLowerCase());
           }
@@ -175,6 +180,26 @@ public class PositiveAndNegativeExampleP
     }
   }
 
+  public static String getSentenceWithMarkedArgs(JCas jCas, Sentence sent, Annotation arg1, Annotation arg2) {
+    
+    List<String> tokens = new ArrayList<>();
+    for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
+      if(baseToken.getBegin() == arg1.getBegin() && (baseToken.getEnd() == arg1.getEnd())) {
+        tokens.add("<e1>");
+        tokens.add(baseToken.getCoveredText());
+        tokens.add("</e1>");
+      } else if(baseToken.getBegin() == arg2.getBegin() && (baseToken.getEnd() == arg2.getEnd())) {
+        tokens.add("<e2>");
+        tokens.add(baseToken.getCoveredText());
+        tokens.add("</e2>");
+      } else {
+        tokens.add(baseToken.getCoveredText());
+      }
+    }
+    
+    return String.join(" ", tokens).replaceAll("[\r\n]", " ");
+  }
+  
   public static String getTextBetween(JCas jCas, Annotation arg1, Annotation arg2)  {
     
     // todo: use 2 tokens for things like 'during those hospitalizations ...'