You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2016/10/14 14:35:58 UTC

svn commit: r1764916 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventEventRelPrinter.java

Author: dligach
Date: Fri Oct 14 14:35:58 2016
New Revision: 1764916

URL: http://svn.apache.org/viewvc?rev=1764916&view=rev
Log:
added region-based data printer

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventEventRelPrinter.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventEventRelPrinter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventEventRelPrinter.java?rev=1764916&r1=1764915&r2=1764916&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventEventRelPrinter.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventEventRelPrinter.java Fri Oct 14 14:35:58 2016
@@ -188,6 +188,7 @@ public class EventEventRelPrinter {
               }
             } 
 
+            // sanity check
             if(mention1.getBegin() > mention2.getBegin())  {
               System.out.println("We assumed mention1 is always before mention2");
               System.out.println(sentence.getCoveredText());
@@ -200,9 +201,9 @@ public class EventEventRelPrinter {
             if(isTraining && label.equals("none") && coin.nextDouble() <= 0.5) {
               continue; // skip this negative example
             }
-
             
             String context = getTokensBetween(systemView, sentence, mention1, "e1", mention2, "e2", 2);
+            // String context = getRegions(systemView, sentence, mention1, mention2, 2);
             String text = String.format("%s|%s", label, context);
             eventEventRelationsInSentence.add(text.toLowerCase());
           }
@@ -221,39 +222,43 @@ public class EventEventRelPrinter {
    * Return tokens between arg1 and arg2 as string 
    * @param contextSize number of tokens to include on the left of arg1 and on the right of arg2
    */
-  public static String getTokensBetween(
-      JCas jCas, 
-      Sentence sent, 
-      Annotation left,
-      String leftType,
-      Annotation right,
-      String rightType,
-      int contextSize) {
+  public static String getRegions(JCas jCas, Sentence sent, Annotation left, Annotation right, int contextSize) {
 
-    List<String> tokens = new ArrayList<>();
+    
+    // tokens to the left from the left argument and the argument itself
+    List<String> leftTokens = new ArrayList<>();
     for(BaseToken baseToken :  JCasUtil.selectPreceding(jCas, BaseToken.class, left, contextSize)) {
       if(sent.getBegin() <= baseToken.getBegin()) {
-        tokens.add(baseToken.getCoveredText()); 
+        leftTokens.add(baseToken.getCoveredText()); 
       }
     }
-    tokens.add("<" + leftType + ">");
-    tokens.add(left.getCoveredText());
-    tokens.add("</" + leftType + ">");
+    for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, left)) {
+      leftTokens.add(baseToken.getCoveredText());
+    }
+    String leftAsString = String.join(" ", leftTokens).replaceAll("[\r\n]", " ");
+    
+    // tokens between the arguments
+    List<String> betweenTokens = new ArrayList<>();
     for(BaseToken baseToken : JCasUtil.selectBetween(jCas, BaseToken.class, left, right)) {
-      tokens.add(baseToken.getCoveredText());
+      betweenTokens.add(baseToken.getCoveredText());
+    }
+    String betweenAsString = String.join(" ", betweenTokens).replaceAll("[\r\n]", " ");
+    
+    // tokens to the right from the right argument and the argument itself
+    List<String> rightTokens = new ArrayList<>();
+    for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, right)) {
+      rightTokens.add(baseToken.getCoveredText());
     }
-    tokens.add("<" + rightType + ">");
-    tokens.add(right.getCoveredText());
-    tokens.add("</" + rightType + ">");
     for(BaseToken baseToken : JCasUtil.selectFollowing(jCas, BaseToken.class, right, contextSize)) {
       if(baseToken.getEnd() <= sent.getEnd()) {
-        tokens.add(baseToken.getCoveredText());
+        rightTokens.add(baseToken.getCoveredText());
       }
     }
-
-    return String.join(" ", tokens).replaceAll("[\r\n]", " ");
+    String rightAsString = String.join(" ", rightTokens).replaceAll("[\r\n]", " ");
+    
+    return leftAsString + "|" + betweenAsString + "|" + rightAsString;
   }
-
+  
   /**
    * Return tokens between arg1 and arg2 as string 
    * @param contextSize number of tokens to include on the left of arg1 and on the right of arg2
@@ -262,7 +267,9 @@ public class EventEventRelPrinter {
       JCas jCas, 
       Sentence sent, 
       Annotation left,
+      String leftType,
       Annotation right,
+      String rightType,
       int contextSize) {
 
     List<String> tokens = new ArrayList<>();
@@ -270,12 +277,16 @@ public class EventEventRelPrinter {
       if(sent.getBegin() <= baseToken.getBegin()) {
         tokens.add(baseToken.getCoveredText()); 
       }
-    } 
+    }
+    tokens.add("<" + leftType + ">");
     tokens.add(left.getCoveredText());
+    tokens.add("</" + leftType + ">");
     for(BaseToken baseToken : JCasUtil.selectBetween(jCas, BaseToken.class, left, right)) {
       tokens.add(baseToken.getCoveredText());
     }
+    tokens.add("<" + rightType + ">");
     tokens.add(right.getCoveredText());
+    tokens.add("</" + rightType + ">");
     for(BaseToken baseToken : JCasUtil.selectFollowing(jCas, BaseToken.class, right, contextSize)) {
       if(baseToken.getEnd() <= sent.getEnd()) {
         tokens.add(baseToken.getCoveredText());