You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2015/05/19 23:30:44 UTC

svn commit: r1680396 - /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java

Author: tmill
Date: Tue May 19 21:30:43 2015
New Revision: 1680396

URL: http://svn.apache.org/r1680396
Log:
Added time expressions to markables, allow whole sentence to be markable (if it's an NP), a few other edge cases.

Modified:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java?rev=1680396&r1=1680395&r2=1680396&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java (original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java Tue May 19 21:30:43 2015
@@ -10,11 +10,11 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.ctakes.dependency.parser.util.DependencyUtility;
-import org.apache.ctakes.temporal.eval.THYMEData;
 import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
 import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
 import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
 import org.apache.ctakes.typesystem.type.textsem.Markable;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
 import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -40,6 +40,19 @@ public class DeterministicMarkableAnnota
 //    createMarkablesUsingConstituencyTrees(jCas);
     createMarkablesUsingDependencyTrees(jCas);
     
+    for(TimeMention timex : JCasUtil.select(jCas, TimeMention.class)){
+      boolean collision = false;
+      for(Markable other : JCasUtil.selectCovered(jCas, Markable.class, timex.getBegin(), timex.getEnd())){
+        if(other.getBegin() == timex.getBegin() && other.getEnd() == timex.getEnd()){
+          collision = true;
+          break;
+        }
+      }
+      if(!collision){
+        Markable m = new Markable(jCas, timex.getBegin(), timex.getEnd());
+        m.addToIndexes(jCas);
+      }
+    }
   }
 
   private static void createMarkablesUsingDependencyTrees(JCas jCas) {
@@ -65,7 +78,7 @@ public class DeterministicMarkableAnnota
           }
           int begin = node.getBegin();
           int end = node.getEnd();
-          if(node.getHead().getId() != 0){
+//          if(node.getHead().getId() != 0){
             List<ConllDependencyNode> progeny = getProgeny(node, getDependencyNodes(jCas, getSentence(jCas, node)));
             progeny = removeConjunctionNodes(node, progeny);
             if(progeny.size() > 0){
@@ -78,13 +91,14 @@ public class DeterministicMarkableAnnota
                 }
               }
             }
-          }
+//          }
           Markable markable = new Markable(jCas, begin, end);
           markable.addToIndexes();
         }else if(node.getPostag().equals("DT") && !node.getDeprel().equals("det")){
           Markable markable = new Markable(jCas, node.getBegin(), node.getEnd());
           markable.addToIndexes();
-        }else if(node.getCoveredText().toLowerCase().equals("it") && !node.getDeprel().contains("pass")){
+        }else if(node.getCoveredText().toLowerCase().equals("it") && node.getDeprel().contains("bj")){
+          // contains "bj" includes nsubj, all the obj's, and all the *bjpass*'s.
           Markable markable = new Markable(jCas, node.getBegin(), node.getEnd());
           markable.addToIndexes();
         }
@@ -102,7 +116,7 @@ public class DeterministicMarkableAnnota
       boolean blockedByConj = false;
       for(ConllDependencyNode pathEl : DependencyUtility.getPath(progeny, node, originalNode)){
         if(pathEl == originalNode) continue;
-        if(pathEl.getDeprel().equals("conj") || pathEl.getDeprel().equals("cc") || pathEl.getPostag().equals(".") || pathEl.getPostag().equals(",")){
+        if(pathEl.getDeprel().equals("conj") || pathEl.getDeprel().equals("cc") || pathEl.getPostag().equals(".") || pathEl.getPostag().equals(",") || pathEl.getDeprel().equals("meta")){
           blockedByConj = true;
           break;
         }