You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/02/07 21:18:10 UTC
svn commit: r1565782 -
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionDepUtils.java
Author: tmill
Date: Fri Feb 7 20:18:09 2014
New Revision: 1565782
URL: http://svn.apache.org/r1565782
Log:
Check in missing utility class from yesterday's checkins.
Added:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionDepUtils.java
Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionDepUtils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionDepUtils.java?rev=1565782&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionDepUtils.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/AssertionDepUtils.java Fri Feb 7 20:18:09 2014
@@ -0,0 +1,146 @@
+package org.apache.ctakes.assertion.util;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.uimafit.util.JCasUtil;
+
+public class AssertionDepUtils {
+
+ public static SimpleTree getTokenTreeString(JCas jCas, List<ConllDependencyNode> nodes, Annotation annotation){
+ Map<ConllDependencyNode, SimpleTree> node2tree = new HashMap<ConllDependencyNode, SimpleTree>();
+ for(ConllDependencyNode node : nodes){
+ if(node.getHead() == null){
+ // do absolutely nothing with this -- it covers the whole sentence and has no useful info
+ continue;
+ }
+
+ SimpleTree curTree = null;
+ SimpleTree headTree = null;
+ if(!node2tree.containsKey(node)){
+ curTree = new SimpleTree(node.getCoveredText().toLowerCase());
+ node2tree.put(node, curTree);
+ }else{
+ curTree = node2tree.get(node);
+ }
+
+
+
+ if(curTree.parent == null && node.getHead().getHead() != null){
+ if(node2tree.containsKey(node.getHead())){
+ headTree = node2tree.get(node.getHead());
+ }else{
+ headTree = new SimpleTree(node.getHead().getCoveredText().toLowerCase());
+ node2tree.put(node.getHead(), headTree);
+ }
+
+ curTree.parent = headTree;
+ headTree.addChild(curTree);
+ }
+ }
+
+ List<ConllDependencyNode> coveredNodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class, annotation);
+ if(coveredNodes == null || coveredNodes.size() == 0) return null;
+ ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(coveredNodes);
+ SimpleTree localTree = node2tree.get(headNode.getHead().getHead() == null ? headNode : headNode.getHead());
+ String realCat = node2tree.get(headNode).cat;
+ // have to do this so that we have a placeholder so we can lowercase tokens, then insert the upper-case CONCEPT signifier token later.
+ node2tree.get(headNode).cat = "CONCEPT";
+
+// String treeStr = localTree.toString();
+// treeStr = "(TOP " + treeStr.replaceAll("\\(([^\\(]+) \\)", "($1 nil)").toLowerCase().replace("conceptplaceholder", "CONCEPT") + ")";
+// treeStr = "(TOP " + treeStr.toLowerCase().replace("conceptplaceholder", "CONCEPT") + ")";
+// node2tree.get(headNode).cat = realCat;
+ return localTree;
+ }
+
+ public static String getTokenRelTreeString(JCas jCas, List<ConllDependencyNode> nodes, Annotation annotation, String label){
+ return getTokenRelTreeString(jCas, nodes, new Annotation[]{annotation}, new String[]{label});
+ }
+
+ public static String getTokenRelTreeString(JCas jCas, List<ConllDependencyNode> nodes, Annotation[] annotations, String[] labels){
+ Map<ConllDependencyNode, SimpleTree> node2tree = new HashMap<ConllDependencyNode, SimpleTree>();
+ // create a SimpleTree object that corresponds to this dependency tree, where the
+ // root is the head of the sentence and the children are all the words such that the parent
+ // is their head. In this case every word is represented by its relationship as well as
+ // its word
+ for(ConllDependencyNode node : nodes){
+ if(node.getHead() == null){
+ // do absolutely nothing with this -- it covers the whole sentence and has no useful info
+ continue;
+ }
+
+ SimpleTree curTree = null;
+ SimpleTree headTree = null;
+ if(!node2tree.containsKey(node)){
+ curTree = SimpleTree.fromString(String.format("(%s %s)", node.getDeprel(), node.getCoveredText()));
+ node2tree.put(node, curTree);
+ }else{
+ curTree = node2tree.get(node);
+ }
+
+
+ if(curTree.parent == null && node.getHead().getHead() != null){
+ if(node2tree.containsKey(node.getHead())){
+ headTree = node2tree.get(node.getHead());
+ }else{
+ headTree = SimpleTree.fromString(String.format("(%s %s)", node.getHead().getDeprel(), node.getHead().getCoveredText()));
+ node2tree.put(node.getHead(), headTree);
+ }
+
+ curTree.parent = headTree.children.get(0);
+ headTree.children.get(0).addChild(curTree);
+ }
+ }
+
+ ConllDependencyNode highestHead = null;
+ // take the set of input annotations and the corresponding labels and insert them into the SimpleTree
+ for(int i = 0; i < annotations.length; i++){
+ // get the node representing the head of this annotation
+ List<ConllDependencyNode> coveredNodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class, annotations[i]);
+ if(coveredNodes == null || coveredNodes.size() == 0) continue;
+ ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(coveredNodes);
+
+ // is this the highest node of all the annotations we're looking at?
+ if(highestHead == null || (distanceFromRoot(headNode) < distanceFromRoot(highestHead))){
+ highestHead = headNode;
+ }
+
+// String realCat = node2tree.get(headNode).children.get(0).cat;
+ // have to do this so that we have a placeholder so we can lowercase tokens, then insert the upper-case CONCEPT signifier token later.
+// node2tree.get(headNode).children.get(0).cat = "conceptplaceholder";
+ SimpleTree insertionPoint = node2tree.get(headNode);
+ SimpleTree insertingTree = new SimpleTree(insertionPoint.cat);
+ insertionPoint.cat = labels[i];
+ insertingTree.children = insertionPoint.children;
+ insertingTree.children.get(0).parent = insertingTree;
+ insertionPoint.children = new ArrayList<SimpleTree>();
+ insertionPoint.addChild(insertingTree);
+ insertingTree.parent = insertionPoint;
+
+// node2tree.get(headNode).children.get(0).cat = realCat;
+ }
+ SimpleTree localTree = node2tree.get(highestHead.getHead().getHead() == null ? highestHead : highestHead.getHead());
+ String treeStr = localTree.toString();
+ treeStr = treeStr.replaceAll("\\(([^\\(]+) \\)", "($1 nil)").toLowerCase();
+
+ return treeStr;
+
+ }
+
+ private static int distanceFromRoot(ConllDependencyNode node){
+ int dist = 0;
+ while(node.getHead() != null){
+ dist++;
+ node = node.getHead();
+ }
+ return dist;
+ }
+}