You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2016/06/20 20:43:48 UTC

svn commit: r1749398 - /ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java

Author: dligach
Date: Mon Jun 20 20:43:48 2016
New Revision: 1749398

URL: http://svn.apache.org/viewvc?rev=1749398&view=rev
Log:
now including arg2 ... arg1 event-event relations

Modified:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java?rev=1749398&r1=1749397&r2=1749398&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java Mon Jun 20 20:43:48 2016
@@ -159,10 +159,8 @@ public class PositiveAndNegativeExampleP
             if(mention1 == mention2) {
               continue;
             }
+            
             BinaryTextRelation relation = relationLookup.get(Arrays.asList(mention1, mention2));
-            if(mention1.getBegin() > mention2.getBegin()) {
-              continue; // will worry about these later  
-            }
             
             String label;
             if(relation == null) {
@@ -170,8 +168,16 @@ public class PositiveAndNegativeExampleP
             } else {
               label = relation.getCategory().toLowerCase();
             }
-
-            String context = getTokensBetween(systemView, sentence, mention1, mention2, 2);
+            
+            String context;
+            if(mention1.getBegin() < mention2.getBegin()) {
+              // arg1 ... arg2, e.g. <arg1> biopsy </arg1> positive for <arg2> malignancy </arg2>
+              context = getTokensBetween(systemView, sentence, mention1, "arg1", mention2, "arg2", 2);
+            } else {
+              // arg2 ... arg1, e.g. <arg2> disease </arg2> seen on the <arg1> scan </arg1>
+              context = getTokensBetween(systemView, sentence, mention2, "arg2", mention1, "arg1", 2);
+            }
+            
             String text = String.format("%s|%s", label, context);
             eventEventRelationsInSentence.add(text.toLowerCase());
           }
@@ -185,51 +191,37 @@ public class PositiveAndNegativeExampleP
       }
     }
   }
-
-  public static String getSentenceWithMarkedArgs(JCas jCas, Sentence sent, Annotation arg1, Annotation arg2) {
-    
-    List<String> tokens = new ArrayList<>();
-    for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
-      // for a few hundred examples, begin/end offsets don't both match token begin/end
-      // so using an 'or' rather than 'and'
-      if(baseToken.getBegin() == arg1.getBegin() || (baseToken.getEnd() == arg1.getEnd())) {
-        tokens.add("<e>");
-        tokens.add(baseToken.getCoveredText());
-        tokens.add("</e>");
-      } else if(baseToken.getBegin() == arg2.getBegin() || (baseToken.getEnd() == arg2.getEnd())) {
-        tokens.add("<e>");
-        tokens.add(baseToken.getCoveredText());
-        tokens.add("</e>");
-      } else {
-        tokens.add(baseToken.getCoveredText());
-      }
-    }
-    
-    return String.join(" ", tokens).replaceAll("[\r\n]", " ");
-  }
   
   /**
    * Return tokens between arg1 and arg2 as string 
    * @param contextSize number of tokens to include on the left of arg1 and on the right of arg2
    */
-  public static String getTokensBetween(JCas jCas, Sentence sent, Annotation arg1, Annotation arg2, int contextSize) throws AnalysisEngineProcessException {
+  public static String getTokensBetween(
+      JCas jCas, 
+      Sentence sent, 
+      Annotation left,
+      String leftType,
+      Annotation right,
+      String rightType,
+      int contextSize) 
+          throws AnalysisEngineProcessException {
     
     List<String> tokens = new ArrayList<>();
-    for(BaseToken baseToken :  JCasUtil.selectPreceding(jCas, BaseToken.class, arg1, contextSize)) {
+    for(BaseToken baseToken :  JCasUtil.selectPreceding(jCas, BaseToken.class, left, contextSize)) {
       if(sent.getBegin() <= baseToken.getBegin()) {
         tokens.add(baseToken.getCoveredText()); 
       }
     }
-    tokens.add("<e1>");
-    tokens.add(arg1.getCoveredText());
-    tokens.add("</e1>");
-    for(BaseToken baseToken : JCasUtil.selectBetween(jCas, BaseToken.class, arg1, arg2)) {
+    tokens.add("<" + leftType + ">");
+    tokens.add(left.getCoveredText());
+    tokens.add("</" + leftType + ">");
+    for(BaseToken baseToken : JCasUtil.selectBetween(jCas, BaseToken.class, left, right)) {
       tokens.add(baseToken.getCoveredText());
     }
-    tokens.add("<e2>");
-    tokens.add(arg2.getCoveredText());
-    tokens.add("</e2>");
-    for(BaseToken baseToken : JCasUtil.selectFollowing(jCas, BaseToken.class, arg2, contextSize)) {
+    tokens.add("<" + rightType + ">");
+    tokens.add(right.getCoveredText());
+    tokens.add("</" + rightType + ">");
+    for(BaseToken baseToken : JCasUtil.selectFollowing(jCas, BaseToken.class, right, contextSize)) {
       if(baseToken.getEnd() <= sent.getEnd()) {
         tokens.add(baseToken.getCoveredText());
       }
@@ -258,4 +250,26 @@ public class PositiveAndNegativeExampleP
       return baseToken.getCoveredText();
     }
   }
+  
+  public static String getSentenceWithMarkedArgs(JCas jCas, Sentence sent, Annotation arg1, Annotation arg2) {
+    
+    List<String> tokens = new ArrayList<>();
+    for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
+      // for a few hundred examples, begin/end offsets don't both match token begin/end
+      // so using an 'or' rather than 'and'
+      if(baseToken.getBegin() == arg1.getBegin() || (baseToken.getEnd() == arg1.getEnd())) {
+        tokens.add("<e>");
+        tokens.add(baseToken.getCoveredText());
+        tokens.add("</e>");
+      } else if(baseToken.getBegin() == arg2.getBegin() || (baseToken.getEnd() == arg2.getEnd())) {
+        tokens.add("<e>");
+        tokens.add(baseToken.getCoveredText());
+        tokens.add("</e>");
+      } else {
+        tokens.add(baseToken.getCoveredText());
+      }
+    }
+    
+    return String.join(" ", tokens).replaceAll("[\r\n]", " ");
+  }
 }