You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2015/10/29 21:09:36 UTC
svn commit: r1711353 -
/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/ListAndConjunctionAnnotatorPipeline.java
Author: dligach
Date: Thu Oct 29 20:09:36 2015
New Revision: 1711353
URL: http://svn.apache.org/viewvc?rev=1711353&view=rev
Log:
added reject/accept states
Modified:
ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/ListAndConjunctionAnnotatorPipeline.java
Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/ListAndConjunctionAnnotatorPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/ListAndConjunctionAnnotatorPipeline.java?rev=1711353&r1=1711352&r2=1711353&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/ListAndConjunctionAnnotatorPipeline.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/ListAndConjunctionAnnotatorPipeline.java Thu Oct 29 20:09:36 2015
@@ -24,6 +24,7 @@ import java.util.List;
import java.util.Set;
import org.apache.ctakes.relationextractor.data.analysis.Utils;
+import org.apache.ctakes.typesystem.type.constants.CONST;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
@@ -37,6 +38,7 @@ import org.apache.uima.fit.pipeline.Simp
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
+import com.google.common.collect.Lists;
import com.lexicalscope.jewel.cli.CliFactory;
import com.lexicalscope.jewel.cli.Option;
@@ -56,7 +58,6 @@ public class ListAndConjunctionAnnotator
public static void main(String[] args) throws Exception {
- System.out.println("beginning...");
Options options = CliFactory.parseArguments(Options.class, args);
CollectionReader collectionReader = Utils.getCollectionReader(options.getInputDirectory());
AnalysisEngine listAndConjunctionAnnotator = AnalysisEngineFactory.createEngine(ListAndConjunctionAe.class);
@@ -83,42 +84,61 @@ public class ListAndConjunctionAnnotator
for(Sentence sentence : JCasUtil.select(systemView, Sentence.class)) {
- String state = "start";
+ String currentState = "start";
String list = "";
+
for(BaseToken token : JCasUtil.selectCovered(systemView, BaseToken.class, sentence)) {
- state = getNextState(systemView, state, token);
- if(state == "a/s" || state == "punct" || state == "done") {
- list = list + " " + token.getCoveredText();
+ String nextState = getNextState(systemView, currentState, token);
+ if(nextState == "accept") {
+ System.out.println("found list in: " + sentence.getCoveredText());
+ break;
+ } else if(nextState != "reject") {
+ currentState = nextState;
+ } else {
+ currentState = "start";
}
- }
+ }
+
if(list != "") {
System.out.println(sentence.getCoveredText() + "/" + list);
}
}
}
-
+
public String getNextState(JCas systemView, String currentState, BaseToken nextToken) {
- Set<String> listConnectors = new HashSet();
- listConnectors.add("and");
- listConnectors.add(",");
+ Set<String> listConnectors = new HashSet<>(Lists.newArrayList("and", ","));
+
+ String nextState = "reject";
+ int nextTokenSemType = getSemanticType(systemView, nextToken);
+ String nextTokenText = nextToken.getCoveredText().toLowerCase();
- String nextState = "";
- int tokenSemType = getSemanticType(systemView, nextToken);
- if(currentState == "start" && tokenSemType == 6) {
- nextState = "a/s";
- } else if(currentState == "start" && tokenSemType != 6) {
- nextState = "start";
- } else if(currentState == "a/s" && listConnectors.contains(nextToken.getCoveredText().toLowerCase())) {
- nextState = "punct";
- } else if(currentState == "a/s" && ! listConnectors.contains(nextToken.getCoveredText().toLowerCase())) {
- nextState = "done";
- } else if(currentState == "punct" && tokenSemType == 6) {
- nextState = "a/s";
- } else if(currentState == "punct" && tokenSemType != 6) {
- nextState = "reject";
+ if(currentState == "start") {
+ if(nextTokenSemType == CONST.NE_TYPE_ID_ANATOMICAL_SITE) {
+ nextState = "as1";
+ } else {
+ nextState = "reject";
+ }
+ } else if(currentState == "as1") {
+ if(listConnectors.contains(nextTokenText)) {
+ nextState = "punct";
+ } else {
+ nextState = "reject";
+ }
+ } else if(currentState == "punct") {
+ if(nextTokenSemType == CONST.NE_TYPE_ID_ANATOMICAL_SITE) {
+ nextState = "as2";
+ } else {
+ nextState = "reject";
+ }
+ } else if(currentState == "as2") {
+ if(listConnectors.contains(nextTokenText)) {
+ nextState = "punct";
+ } else {
+ nextState = "accept";
+ }
}
-
+
return nextState;
}