You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2015/05/19 23:30:44 UTC
svn commit: r1680396 -
/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java
Author: tmill
Date: Tue May 19 21:30:43 2015
New Revision: 1680396
URL: http://svn.apache.org/r1680396
Log:
Added time expressions to markables, allow whole sentence to be markable (if it's an NP), a few other edge cases.
Modified:
ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java
Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java?rev=1680396&r1=1680395&r2=1680396&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java Tue May 19 21:30:43 2015
@@ -10,11 +10,11 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.ctakes.dependency.parser.util.DependencyUtility;
-import org.apache.ctakes.temporal.eval.THYMEData;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.textsem.Markable;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -40,6 +40,19 @@ public class DeterministicMarkableAnnota
// createMarkablesUsingConstituencyTrees(jCas);
createMarkablesUsingDependencyTrees(jCas);
+ for(TimeMention timex : JCasUtil.select(jCas, TimeMention.class)){
+ boolean collision = false;
+ for(Markable other : JCasUtil.selectCovered(jCas, Markable.class, timex.getBegin(), timex.getEnd())){
+ if(other.getBegin() == timex.getBegin() && other.getEnd() == timex.getEnd()){
+ collision = true;
+ break;
+ }
+ }
+ if(!collision){
+ Markable m = new Markable(jCas, timex.getBegin(), timex.getEnd());
+ m.addToIndexes(jCas);
+ }
+ }
}
private static void createMarkablesUsingDependencyTrees(JCas jCas) {
@@ -65,7 +78,7 @@ public class DeterministicMarkableAnnota
}
int begin = node.getBegin();
int end = node.getEnd();
- if(node.getHead().getId() != 0){
+// if(node.getHead().getId() != 0){
List<ConllDependencyNode> progeny = getProgeny(node, getDependencyNodes(jCas, getSentence(jCas, node)));
progeny = removeConjunctionNodes(node, progeny);
if(progeny.size() > 0){
@@ -78,13 +91,14 @@ public class DeterministicMarkableAnnota
}
}
}
- }
+// }
Markable markable = new Markable(jCas, begin, end);
markable.addToIndexes();
}else if(node.getPostag().equals("DT") && !node.getDeprel().equals("det")){
Markable markable = new Markable(jCas, node.getBegin(), node.getEnd());
markable.addToIndexes();
- }else if(node.getCoveredText().toLowerCase().equals("it") && !node.getDeprel().contains("pass")){
+ }else if(node.getCoveredText().toLowerCase().equals("it") && node.getDeprel().contains("bj")){
+ // contains "bj" includes nsubj, all the obj's, and all the *bjpass*'s.
Markable markable = new Markable(jCas, node.getBegin(), node.getEnd());
markable.addToIndexes();
}
@@ -102,7 +116,7 @@ public class DeterministicMarkableAnnota
boolean blockedByConj = false;
for(ConllDependencyNode pathEl : DependencyUtility.getPath(progeny, node, originalNode)){
if(pathEl == originalNode) continue;
- if(pathEl.getDeprel().equals("conj") || pathEl.getDeprel().equals("cc") || pathEl.getPostag().equals(".") || pathEl.getPostag().equals(",")){
+ if(pathEl.getDeprel().equals("conj") || pathEl.getDeprel().equals("cc") || pathEl.getPostag().equals(".") || pathEl.getPostag().equals(",") || pathEl.getDeprel().equals("meta")){
blockedByConj = true;
break;
}