You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2015/10/30 16:54:56 UTC

svn commit: r1711499 - /ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java

Author: dligach
Date: Fri Oct 30 15:54:56 2015
New Revision: 1711499

URL: http://svn.apache.org/viewvc?rev=1711499&view=rev
Log:
added code to capture and display begin and end offsets of lists

Modified:
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java?rev=1711499&r1=1711498&r2=1711499&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java Fri Oct 30 15:54:56 2015
@@ -44,6 +44,7 @@ import com.lexicalscope.jewel.cli.Option
 
 /**
  * Pipeline for detecting very simple lists of anatomical sites.
+ * 
  * @author dmitriy dligach
  */
 public class AnatomicalSiteListExtractorPipeline {
@@ -67,14 +68,24 @@ public class AnatomicalSiteListExtractor
   /**
    * Implements a finate state machine for detecting 
    * extremely simple lists and conjunctions of anatomical sites.
+   * 
    * E.g. CT chest, abdomen and pelvis.
-   *  
-   * @author dmitriy dligach
+   * 
+   * FSA:
+   * 
+   * start -{anatomical site}-> anatsite1
+   * start -{any other input}-> start
+   * annatsite1 -{list connector}-> listconn
+   * annatsite1 -{any other input}-> start 
+   * listconn -{anatomical site}-> anatsite2
+   * listconn -{any other input}-> start
+   * anatsite2 -{list connector}-> listconn
+   * anatsite2 -{any other input}-> accept
    */
   public static class ListAndConjunctionAe extends JCasAnnotator_ImplBase {
 
     public enum State {
-      START, ANATSITE1, PUNCTUATION, ANATSITE2, ACCEPT
+      START, ANATSITE1, LISTCONN, ANATSITE2, ACCEPT
     }
 
     @Override
@@ -88,12 +99,23 @@ public class AnatomicalSiteListExtractor
       }
       
       for(Sentence sentence : JCasUtil.select(systemView, Sentence.class)) {
+        int beginOffset = -1;
+        int endOffset = -1;
         State state = State.START;
         for(BaseToken input : JCasUtil.selectCovered(systemView, BaseToken.class, sentence)) {
-          state = getNextState(systemView, state, input); 
-          if(state == State.ACCEPT) {
-            System.out.println("found a list in: " + sentence.getCoveredText());
+          state = getNextState(systemView, state, input);
+          if(state == State.ANATSITE1) {
+            beginOffset = input.getBegin();
+          } else if(state == State.ANATSITE2) {
+            endOffset = input.getEnd();
+          } else if(state == State.ACCEPT) {
             state = State.START;
+
+            int begin = beginOffset - sentence.getBegin();
+            int end = endOffset - sentence.getBegin();
+            System.out.println(sentence.getCoveredText());
+            System.out.println(sentence.getCoveredText().substring(begin, end));
+            System.out.println();
           } 
         }
       }
@@ -119,11 +141,11 @@ public class AnatomicalSiteListExtractor
         }
       } else if(currentState == State.ANATSITE1) {
         if(listConnectors.contains(tokenText)) {
-          nextState = State.PUNCTUATION;
+          nextState = State.LISTCONN;
         } else {
           nextState = State.START;
         }
-      } else if(currentState == State.PUNCTUATION) {
+      } else if(currentState == State.LISTCONN) {
         if(tokenSemType == CONST.NE_TYPE_ID_ANATOMICAL_SITE) {
           nextState = State.ANATSITE2;
         } else {
@@ -131,7 +153,7 @@ public class AnatomicalSiteListExtractor
         } 
       } else if(currentState == State.ANATSITE2) {
         if(listConnectors.contains(tokenText)) {
-          nextState = State.PUNCTUATION;
+          nextState = State.LISTCONN;
         } else {
           nextState = State.ACCEPT;
         }