You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2015/10/30 16:54:56 UTC
svn commit: r1711499 -
/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java
Author: dligach
Date: Fri Oct 30 15:54:56 2015
New Revision: 1711499
URL: http://svn.apache.org/viewvc?rev=1711499&view=rev
Log:
added code to capture and display begin and end offsets of lists
Modified:
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java
Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java?rev=1711499&r1=1711498&r2=1711499&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/AnatomicalSiteListExtractorPipeline.java Fri Oct 30 15:54:56 2015
@@ -44,6 +44,7 @@ import com.lexicalscope.jewel.cli.Option
/**
* Pipeline for detecting very simple lists of anatomical sites.
+ *
* @author dmitriy dligach
*/
public class AnatomicalSiteListExtractorPipeline {
@@ -67,14 +68,24 @@ public class AnatomicalSiteListExtractor
/**
* Implements a finate state machine for detecting
* extremely simple lists and conjunctions of anatomical sites.
+ *
* E.g. CT chest, abdomen and pelvis.
- *
- * @author dmitriy dligach
+ *
+ * FSA:
+ *
+ * start -{anatomical site}-> anatsite1
+ * start -{any other input}-> start
+ * annatsite1 -{list connector}-> listconn
+ * annatsite1 -{any other input}-> start
+ * listconn -{anatomical site}-> anatsite2
+ * listconn -{any other input}-> start
+ * anatsite2 -{list connector}-> listconn
+ * anatsite2 -{any other input}-> accept
*/
public static class ListAndConjunctionAe extends JCasAnnotator_ImplBase {
public enum State {
- START, ANATSITE1, PUNCTUATION, ANATSITE2, ACCEPT
+ START, ANATSITE1, LISTCONN, ANATSITE2, ACCEPT
}
@Override
@@ -88,12 +99,23 @@ public class AnatomicalSiteListExtractor
}
for(Sentence sentence : JCasUtil.select(systemView, Sentence.class)) {
+ int beginOffset = -1;
+ int endOffset = -1;
State state = State.START;
for(BaseToken input : JCasUtil.selectCovered(systemView, BaseToken.class, sentence)) {
- state = getNextState(systemView, state, input);
- if(state == State.ACCEPT) {
- System.out.println("found a list in: " + sentence.getCoveredText());
+ state = getNextState(systemView, state, input);
+ if(state == State.ANATSITE1) {
+ beginOffset = input.getBegin();
+ } else if(state == State.ANATSITE2) {
+ endOffset = input.getEnd();
+ } else if(state == State.ACCEPT) {
state = State.START;
+
+ int begin = beginOffset - sentence.getBegin();
+ int end = endOffset - sentence.getBegin();
+ System.out.println(sentence.getCoveredText());
+ System.out.println(sentence.getCoveredText().substring(begin, end));
+ System.out.println();
}
}
}
@@ -119,11 +141,11 @@ public class AnatomicalSiteListExtractor
}
} else if(currentState == State.ANATSITE1) {
if(listConnectors.contains(tokenText)) {
- nextState = State.PUNCTUATION;
+ nextState = State.LISTCONN;
} else {
nextState = State.START;
}
- } else if(currentState == State.PUNCTUATION) {
+ } else if(currentState == State.LISTCONN) {
if(tokenSemType == CONST.NE_TYPE_ID_ANATOMICAL_SITE) {
nextState = State.ANATSITE2;
} else {
@@ -131,7 +153,7 @@ public class AnatomicalSiteListExtractor
}
} else if(currentState == State.ANATSITE2) {
if(listConnectors.contains(tokenText)) {
- nextState = State.PUNCTUATION;
+ nextState = State.LISTCONN;
} else {
nextState = State.ACCEPT;
}