You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2016/06/20 20:43:48 UTC
svn commit: r1749398 -
/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
Author: dligach
Date: Mon Jun 20 20:43:48 2016
New Revision: 1749398
URL: http://svn.apache.org/viewvc?rev=1749398&view=rev
Log:
now including arg2 ... arg1 event-event relations
Modified:
ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java?rev=1749398&r1=1749397&r2=1749398&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java Mon Jun 20 20:43:48 2016
@@ -159,10 +159,8 @@ public class PositiveAndNegativeExampleP
if(mention1 == mention2) {
continue;
}
+
BinaryTextRelation relation = relationLookup.get(Arrays.asList(mention1, mention2));
- if(mention1.getBegin() > mention2.getBegin()) {
- continue; // will worry about these later
- }
String label;
if(relation == null) {
@@ -170,8 +168,16 @@ public class PositiveAndNegativeExampleP
} else {
label = relation.getCategory().toLowerCase();
}
-
- String context = getTokensBetween(systemView, sentence, mention1, mention2, 2);
+
+ String context;
+ if(mention1.getBegin() < mention2.getBegin()) {
+ // arg1 ... arg2, e.g. <arg1> biopsy </arg1> positive for <arg2> malignancy </arg2>
+ context = getTokensBetween(systemView, sentence, mention1, "arg1", mention2, "arg2", 2);
+ } else {
+ // arg2 ... arg1, e.g. <arg2> disease </arg2> seen on the <arg1> scan </arg1>
+ context = getTokensBetween(systemView, sentence, mention2, "arg2", mention1, "arg1", 2);
+ }
+
String text = String.format("%s|%s", label, context);
eventEventRelationsInSentence.add(text.toLowerCase());
}
@@ -185,51 +191,37 @@ public class PositiveAndNegativeExampleP
}
}
}
-
- public static String getSentenceWithMarkedArgs(JCas jCas, Sentence sent, Annotation arg1, Annotation arg2) {
-
- List<String> tokens = new ArrayList<>();
- for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
- // for a few hundred examples, begin/end offsets don't both match token begin/end
- // so using an 'or' rather than 'and'
- if(baseToken.getBegin() == arg1.getBegin() || (baseToken.getEnd() == arg1.getEnd())) {
- tokens.add("<e>");
- tokens.add(baseToken.getCoveredText());
- tokens.add("</e>");
- } else if(baseToken.getBegin() == arg2.getBegin() || (baseToken.getEnd() == arg2.getEnd())) {
- tokens.add("<e>");
- tokens.add(baseToken.getCoveredText());
- tokens.add("</e>");
- } else {
- tokens.add(baseToken.getCoveredText());
- }
- }
-
- return String.join(" ", tokens).replaceAll("[\r\n]", " ");
- }
/**
* Return tokens between arg1 and arg2 as string
* @param contextSize number of tokens to include on the left of arg1 and on the right of arg2
*/
- public static String getTokensBetween(JCas jCas, Sentence sent, Annotation arg1, Annotation arg2, int contextSize) throws AnalysisEngineProcessException {
+ public static String getTokensBetween(
+ JCas jCas,
+ Sentence sent,
+ Annotation left,
+ String leftType,
+ Annotation right,
+ String rightType,
+ int contextSize)
+ throws AnalysisEngineProcessException {
List<String> tokens = new ArrayList<>();
- for(BaseToken baseToken : JCasUtil.selectPreceding(jCas, BaseToken.class, arg1, contextSize)) {
+ for(BaseToken baseToken : JCasUtil.selectPreceding(jCas, BaseToken.class, left, contextSize)) {
if(sent.getBegin() <= baseToken.getBegin()) {
tokens.add(baseToken.getCoveredText());
}
}
- tokens.add("<e1>");
- tokens.add(arg1.getCoveredText());
- tokens.add("</e1>");
- for(BaseToken baseToken : JCasUtil.selectBetween(jCas, BaseToken.class, arg1, arg2)) {
+ tokens.add("<" + leftType + ">");
+ tokens.add(left.getCoveredText());
+ tokens.add("</" + leftType + ">");
+ for(BaseToken baseToken : JCasUtil.selectBetween(jCas, BaseToken.class, left, right)) {
tokens.add(baseToken.getCoveredText());
}
- tokens.add("<e2>");
- tokens.add(arg2.getCoveredText());
- tokens.add("</e2>");
- for(BaseToken baseToken : JCasUtil.selectFollowing(jCas, BaseToken.class, arg2, contextSize)) {
+ tokens.add("<" + rightType + ">");
+ tokens.add(right.getCoveredText());
+ tokens.add("</" + rightType + ">");
+ for(BaseToken baseToken : JCasUtil.selectFollowing(jCas, BaseToken.class, right, contextSize)) {
if(baseToken.getEnd() <= sent.getEnd()) {
tokens.add(baseToken.getCoveredText());
}
@@ -258,4 +250,26 @@ public class PositiveAndNegativeExampleP
return baseToken.getCoveredText();
}
}
+
+ public static String getSentenceWithMarkedArgs(JCas jCas, Sentence sent, Annotation arg1, Annotation arg2) {
+
+ List<String> tokens = new ArrayList<>();
+ for(BaseToken baseToken : JCasUtil.selectCovered(jCas, BaseToken.class, sent)) {
+ // for a few hundred examples, begin/end offsets don't both match token begin/end
+ // so using an 'or' rather than 'and'
+ if(baseToken.getBegin() == arg1.getBegin() || (baseToken.getEnd() == arg1.getEnd())) {
+ tokens.add("<e>");
+ tokens.add(baseToken.getCoveredText());
+ tokens.add("</e>");
+ } else if(baseToken.getBegin() == arg2.getBegin() || (baseToken.getEnd() == arg2.getEnd())) {
+ tokens.add("<e>");
+ tokens.add(baseToken.getCoveredText());
+ tokens.add("</e>");
+ } else {
+ tokens.add(baseToken.getCoveredText());
+ }
+ }
+
+ return String.join(" ", tokens).replaceAll("[\r\n]", " ");
+ }
}