You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/02/07 21:18:10 UTC

svn commit: r1565782 - /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionDepUtils.java

Author: tmill
Date: Fri Feb  7 20:18:09 2014
New Revision: 1565782

URL: http://svn.apache.org/r1565782
Log:
Check in missing utility class from yesterday's checkins.

Added:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionDepUtils.java

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionDepUtils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionDepUtils.java?rev=1565782&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionDepUtils.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionDepUtils.java Fri Feb  7 20:18:09 2014
@@ -0,0 +1,146 @@
+package org.apache.ctakes.assertion.util;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.uimafit.util.JCasUtil;
+
+public class AssertionDepUtils {
+
+  public static SimpleTree getTokenTreeString(JCas jCas, List<ConllDependencyNode> nodes, Annotation annotation){
+    Map<ConllDependencyNode, SimpleTree> node2tree = new HashMap<ConllDependencyNode, SimpleTree>();
+    for(ConllDependencyNode node : nodes){
+      if(node.getHead() == null){
+        // do absolutely nothing with this -- it covers the whole sentence and has no useful info
+        continue;
+      }
+      
+      SimpleTree curTree = null;
+      SimpleTree headTree = null;
+      if(!node2tree.containsKey(node)){
+        curTree = new SimpleTree(node.getCoveredText().toLowerCase());
+        node2tree.put(node, curTree);
+      }else{
+        curTree = node2tree.get(node);
+      }
+      
+      
+          
+      if(curTree.parent == null && node.getHead().getHead() != null){
+        if(node2tree.containsKey(node.getHead())){
+          headTree = node2tree.get(node.getHead());
+        }else{
+          headTree = new SimpleTree(node.getHead().getCoveredText().toLowerCase());
+          node2tree.put(node.getHead(), headTree);
+        }
+
+        curTree.parent = headTree;
+        headTree.addChild(curTree);
+      }
+    }
+
+    List<ConllDependencyNode> coveredNodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class, annotation);
+    if(coveredNodes == null || coveredNodes.size() == 0) return null;
+    ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(coveredNodes);
+    SimpleTree localTree = node2tree.get(headNode.getHead().getHead() == null ? headNode : headNode.getHead());
+    String realCat = node2tree.get(headNode).cat;
+    // have to do this so that we have a placeholder so we can lowercase tokens, then insert the upper-case CONCEPT signifier token later.
+    node2tree.get(headNode).cat = "CONCEPT";
+
+//    String treeStr = localTree.toString();
+//    treeStr = "(TOP " + treeStr.replaceAll("\\(([^\\(]+) \\)", "($1 nil)").toLowerCase().replace("conceptplaceholder", "CONCEPT") + ")";
+//    treeStr = "(TOP " + treeStr.toLowerCase().replace("conceptplaceholder", "CONCEPT") + ")";
+//    node2tree.get(headNode).cat = realCat;
+    return localTree;
+  }
+  
+  public static String getTokenRelTreeString(JCas jCas, List<ConllDependencyNode> nodes, Annotation annotation, String label){
+    return getTokenRelTreeString(jCas, nodes, new Annotation[]{annotation}, new String[]{label});
+  }
+  
+  public static String getTokenRelTreeString(JCas jCas, List<ConllDependencyNode> nodes, Annotation[] annotations, String[] labels){
+    Map<ConllDependencyNode, SimpleTree> node2tree = new HashMap<ConllDependencyNode, SimpleTree>();
+    // create a SimpleTree object that corresponds to this dependency tree, where the
+    // root is the head of the sentence and the children are all the words such that the parent
+    // is their head. In this case every word is represented by its relationship as well as
+    // its word
+    for(ConllDependencyNode node : nodes){
+      if(node.getHead() == null){
+        // do absolutely nothing with this -- it covers the whole sentence and has no useful info
+        continue;
+      }
+      
+      SimpleTree curTree = null;
+      SimpleTree headTree = null;
+      if(!node2tree.containsKey(node)){
+        curTree = SimpleTree.fromString(String.format("(%s %s)", node.getDeprel(), node.getCoveredText()));
+        node2tree.put(node, curTree);
+      }else{
+        curTree = node2tree.get(node);
+      }
+
+
+      if(curTree.parent == null && node.getHead().getHead() != null){
+        if(node2tree.containsKey(node.getHead())){
+          headTree = node2tree.get(node.getHead());
+        }else{
+          headTree = SimpleTree.fromString(String.format("(%s %s)", node.getHead().getDeprel(), node.getHead().getCoveredText()));
+          node2tree.put(node.getHead(), headTree);
+        }
+
+        curTree.parent = headTree.children.get(0);
+        headTree.children.get(0).addChild(curTree);
+      }
+    }
+    
+    ConllDependencyNode highestHead = null;
+    // take the set of input annotations and the corresponding labels and insert them into the SimpleTree
+    for(int i = 0; i < annotations.length; i++){
+      // get the node representing the head of this annotation
+      List<ConllDependencyNode> coveredNodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class, annotations[i]);
+      if(coveredNodes == null || coveredNodes.size() == 0) continue;
+      ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(coveredNodes);
+      
+      // is this the highest node of all the annotations we're looking at?
+      if(highestHead == null || (distanceFromRoot(headNode) < distanceFromRoot(highestHead))){
+        highestHead = headNode;
+      }
+      
+//      String realCat = node2tree.get(headNode).children.get(0).cat;
+      //  have to do this so that we have a placeholder so we can lowercase tokens, then insert the upper-case CONCEPT signifier token later.
+//      node2tree.get(headNode).children.get(0).cat = "conceptplaceholder";
+      SimpleTree insertionPoint = node2tree.get(headNode);
+      SimpleTree insertingTree = new SimpleTree(insertionPoint.cat);
+      insertionPoint.cat = labels[i];
+      insertingTree.children = insertionPoint.children;
+      insertingTree.children.get(0).parent = insertingTree;
+      insertionPoint.children = new ArrayList<SimpleTree>();
+      insertionPoint.addChild(insertingTree);
+      insertingTree.parent = insertionPoint;
+
+//      node2tree.get(headNode).children.get(0).cat = realCat;
+    }
+    SimpleTree localTree = node2tree.get(highestHead.getHead().getHead() == null ? highestHead : highestHead.getHead());
+    String treeStr = localTree.toString();
+    treeStr = treeStr.replaceAll("\\(([^\\(]+) \\)", "($1 nil)").toLowerCase();
+    
+    return treeStr;
+
+  }
+  
+  private static int distanceFromRoot(ConllDependencyNode node){
+    int dist = 0;
+    while(node.getHead() != null){
+      dist++;
+      node = node.getHead();
+    }
+    return dist;
+  }
+}