You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2016/06/23 17:39:32 UTC

svn commit: r1749949 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/uniquePairsCandidateEventPrinter.java

Author: dligach
Date: Thu Jun 23 17:39:32 2016
New Revision: 1749949

URL: http://svn.apache.org/viewvc?rev=1749949&view=rev
Log:
now including arg1 arg2 event markers

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/uniquePairsCandidateEventPrinter.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/uniquePairsCandidateEventPrinter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/uniquePairsCandidateEventPrinter.java?rev=1749949&r1=1749948&r2=1749949&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/uniquePairsCandidateEventPrinter.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/uniquePairsCandidateEventPrinter.java Thu Jun 23 17:39:32 2016
@@ -57,7 +57,7 @@ import com.lexicalscope.jewel.cli.Option
  * @author dmitriy dligach
  */
 public class uniquePairsCandidateEventPrinter {
-  
+
   static interface Options {
 
     @Option(longName = "xmi-dir")
@@ -65,18 +65,18 @@ public class uniquePairsCandidateEventPr
 
     @Option(longName = "patients")
     public CommandLine.IntegerRanges getPatients();
-    
+
     @Option(longName = "output-train")
     public File getTrainOutputDirectory();
-    
+
     @Option(longName = "output-test")
     public File getTestOutputDirectory();
   }
 
   public static void main(String[] args) throws Exception {
-    
+
     Options options = CliFactory.parseArguments(Options.class, args);
-    
+
     File trainFile = options.getTrainOutputDirectory();
     if(trainFile.exists()) {
       trainFile.delete();
@@ -91,10 +91,10 @@ public class uniquePairsCandidateEventPr
     List<Integer> patientSets = options.getPatients().getList();
     List<Integer> trainItems = THYMEData.getPatientSets(patientSets, THYMEData.TRAIN_REMAINDERS);
     List<Integer> devItems = THYMEData.getPatientSets(patientSets, THYMEData.DEV_REMAINDERS);
-    
+
     List<File> trainFiles = Utils.getFilesFor(trainItems, options.getInputDirectory());
     List<File> devFiles = Utils.getFilesFor(devItems, options.getInputDirectory());
-    
+
     // write training data to file
     CollectionReader trainCollectionReader = Utils.getCollectionReader(trainFiles);
     AnalysisEngine trainDataWriter = AnalysisEngineFactory.createEngine(
@@ -102,7 +102,7 @@ public class uniquePairsCandidateEventPr
         "OutputFile",
         trainFile.getAbsoluteFile());
     SimplePipeline.runPipeline(trainCollectionReader, trainDataWriter);
-    
+
     // write dev data to file
     CollectionReader devCollectionReader = Utils.getCollectionReader(devFiles);
     AnalysisEngine devDataWriter = AnalysisEngineFactory.createEngine(
@@ -124,7 +124,7 @@ public class uniquePairsCandidateEventPr
         mandatory = true,
         description = "path to the output file")
     private String outputFile;
-    
+
     @Override
     public void process(JCas jCas) throws AnalysisEngineProcessException {
 
@@ -155,7 +155,7 @@ public class uniquePairsCandidateEventPr
       for(Sentence sentence : JCasUtil.select(systemView, Sentence.class)) {
         List<String> eventEventRelationsInSentence = new ArrayList<>();
         ArrayList<EventMention> eventMentionsInSentence = new ArrayList<>(JCasUtil.selectCovered(goldView, EventMention.class, sentence));
-        
+
         // retrieve event-event relations in this sentence
         for(int i = 0; i < eventMentionsInSentence.size(); i++) {
           for(int j = i + 1; j < eventMentionsInSentence.size(); j++) {
@@ -163,7 +163,7 @@ public class uniquePairsCandidateEventPr
             EventMention mention2 = eventMentionsInSentence.get(j);
             BinaryTextRelation forwardRelation = relationLookup.get(Arrays.asList(mention1, mention2));
             BinaryTextRelation reverseRelation = relationLookup.get(Arrays.asList(mention2, mention1));
-            
+
             String label;            
             if(forwardRelation != null) {
               if(forwardRelation.getCategory().equals("CONTAINS")) {
@@ -180,8 +180,8 @@ public class uniquePairsCandidateEventPr
             } else {
               label = "none";         // no relation between mentions
             }
-            
-            String context = getTokensBetween(systemView, sentence, mention1, mention2, 0);
+
+            String context = getTokensBetween(systemView, sentence, mention1, "e1", mention2, "e2", 2);
             String text = String.format("%s|%s", label, context);
             eventEventRelationsInSentence.add(text.toLowerCase());
           }
@@ -195,7 +195,44 @@ public class uniquePairsCandidateEventPr
       }
     }
   }
-  
+
+  /**
+   * Return tokens between arg1 and arg2 as string 
+   * @param contextSize number of tokens to include on the left of arg1 and on the right of arg2
+   */
+  public static String getTokensBetween(
+      JCas jCas, 
+      Sentence sent, 
+      Annotation left,
+      String leftType,
+      Annotation right,
+      String rightType,
+      int contextSize) {
+
+    List<String> tokens = new ArrayList<>();
+    for(BaseToken baseToken :  JCasUtil.selectPreceding(jCas, BaseToken.class, left, contextSize)) {
+      if(sent.getBegin() <= baseToken.getBegin()) {
+        tokens.add(baseToken.getCoveredText()); 
+      }
+    }
+    tokens.add("<" + leftType + ">");
+    tokens.add(left.getCoveredText());
+    tokens.add("</" + leftType + ">");
+    for(BaseToken baseToken : JCasUtil.selectBetween(jCas, BaseToken.class, left, right)) {
+      tokens.add(baseToken.getCoveredText());
+    }
+    tokens.add("<" + rightType + ">");
+    tokens.add(right.getCoveredText());
+    tokens.add("</" + rightType + ">");
+    for(BaseToken baseToken : JCasUtil.selectFollowing(jCas, BaseToken.class, right, contextSize)) {
+      if(baseToken.getEnd() <= sent.getEnd()) {
+        tokens.add(baseToken.getCoveredText());
+      }
+    }
+
+    return String.join(" ", tokens).replaceAll("[\r\n]", " ");
+  }
+
   /**
    * Return tokens between arg1 and arg2 as string 
    * @param contextSize number of tokens to include on the left of arg1 and on the right of arg2
@@ -206,7 +243,7 @@ public class uniquePairsCandidateEventPr
       Annotation left,
       Annotation right,
       int contextSize) {
-    
+
     List<String> tokens = new ArrayList<>();
     for(BaseToken baseToken :  JCasUtil.selectPreceding(jCas, BaseToken.class, left, contextSize)) {
       if(sent.getBegin() <= baseToken.getBegin()) {
@@ -223,7 +260,7 @@ public class uniquePairsCandidateEventPr
         tokens.add(baseToken.getCoveredText());
       }
     }
-    
+
     return String.join(" ", tokens).replaceAll("[\r\n]", " ");
   }
 }