You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/06/07 17:51:29 UTC

svn commit: r1490695 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature: DependencyParseUtils.java DependencyPathFeaturesExtractor.java

Author: dligach
Date: Fri Jun  7 15:51:29 2013
New Revision: 1490695

URL: http://svn.apache.org/r1490695
Log:
modified the dependency path feature to exclude the words on both ends of the path

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java?rev=1490695&r1=1490694&r2=1490695&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java Fri Jun  7 15:51:29 2013
@@ -18,136 +18,139 @@
  */
 package org.apache.ctakes.temporal.ae.feature;
 
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.uimafit.util.JCasUtil;
-
-import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
-
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.uimafit.util.JCasUtil;
+
+/**
+ * This is a slightly modified version of the same class from relation extraction.
+ * TODO: eventually replace the relation extraction version with this one.
+ */
 public class DependencyParseUtils {
 
-	/**
-	 * Returns the paths from each node to the common ancestor between them
-	 */
-	public static List<LinkedList<ConllDependencyNode>> getPathsToCommonAncestor(ConllDependencyNode node1, ConllDependencyNode node2) {
-		List<LinkedList<ConllDependencyNode>> paths = new ArrayList<LinkedList<ConllDependencyNode>>(2);
-		LinkedList<ConllDependencyNode> node1ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node1);
-		LinkedList<ConllDependencyNode> node2ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node2);
-		
-		// We will remove the last item in each path until they diverge
-		ConllDependencyNode ancestor = null;
-		while (!node1ToHeadPath.isEmpty() && !node2ToHeadPath.isEmpty()) {
-			if (node1ToHeadPath.getLast() == node2ToHeadPath.getLast()) {
-				node1ToHeadPath.removeLast();
-				ancestor = node2ToHeadPath.removeLast();
-			} else {
-				break;
-			}
-		}
-		
-		// Put the common ancestor back on both paths
-		if (ancestor != null) {
-	    	 node1ToHeadPath.add(ancestor);
-	    	 node2ToHeadPath.add(ancestor);
-		}
-	     
-		paths.add(node1ToHeadPath);
-		paths.add(node2ToHeadPath);
-		return paths;
-	}
-
-	/**
-	 * Finds the head word within a given annotation span
-	 */
-	public static ConllDependencyNode findAnnotationHead(JCas jcas, Annotation annotation) {
-	
-	    for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
-	    	
-	    	ConllDependencyNode head = depNode.getHead();
-	    	if (head == null || head.getEnd() <= annotation.getBegin() || head.getBegin() > annotation.getEnd()) {
-	    		// The head is outside the bounds of the annotation, so this node must be the annotation's head
-	    		return depNode;
-	    	}
-	    }
-	    // Can this happen?
-	    return null;
-	}
-
-	public static LinkedList<ConllDependencyNode> getPathToSentenceHead(ConllDependencyNode node) {
-	     LinkedList<ConllDependencyNode> path = new LinkedList<ConllDependencyNode>();
-	     ConllDependencyNode currNode = node;
-	     while (currNode.getHead() != null) { 
-	         path.add(currNode);
-	         currNode = currNode.getHead();
-	     }
-	     return path;
-	}
-
-	/**
-	 * Finds the path between two dependency nodes
-	 */
-	public static LinkedList<ConllDependencyNode> getPathBetweenNodes(ConllDependencyNode srcNode, ConllDependencyNode tgtNode) {
-		LinkedList<ConllDependencyNode> path = new LinkedList<ConllDependencyNode>();
-		List<LinkedList<ConllDependencyNode>> paths = getPathsToCommonAncestor(srcNode, tgtNode);
-		LinkedList<ConllDependencyNode> srcToAncestorPath = paths.get(0);
-		LinkedList<ConllDependencyNode> tgtToAncestorPath = paths.get(1);
-		
-		if (srcNode == tgtNode) {
-			return path;
-		}
-		
-		// Join the two paths
-		if (!srcToAncestorPath.isEmpty()) {
-			srcToAncestorPath.removeLast();
-		}
-		path = srcToAncestorPath;
-		while (!tgtToAncestorPath.isEmpty()) {
-			path.add(tgtToAncestorPath.removeLast());
-		}
-		
-		return path;
-	}
-	
-
-	/**
-	 * This will convert a path into a string lexicalized at the end points with arc labels and POS tags in between
-	 */
-	
-	public static String pathToString(LinkedList<ConllDependencyNode> path) {
-		
-		StringBuilder builder = new StringBuilder();
-		for (ConllDependencyNode node : path) {
-			if (node == path.getFirst() || node == path.getLast()) {
-				builder.append(node.getCoveredText());
-			} else {
-				builder.append(node.getPostag());
-			}
-			
-			builder.append("-");
-			builder.append(node.getDeprel());
-			if (node != path.getLast()) {
-				builder.append("/");
-			}
-		}
-		return builder.toString();
-	}
-	
-
-	
-
-	public static String dumpDependencyRelations(JCas jcas, Annotation annotation) {
-		StringBuilder builder = new StringBuilder();
-	    for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
-	    	if (depNode.getHead() != null) {
-	    		builder.append(String.format("%s(%s,%s)\n", depNode.getDeprel(), depNode.getCoveredText(), depNode.getHead().getCoveredText()));
-	    	}
-	    }
-	    return builder.toString();
-		
-	}
-
+  /**
+   * Returns the paths from each node to the common ancestor between them
+   */
+  public static List<LinkedList<ConllDependencyNode>> getPathsToCommonAncestor(ConllDependencyNode node1, ConllDependencyNode node2) {
+
+    List<LinkedList<ConllDependencyNode>> paths = new ArrayList<LinkedList<ConllDependencyNode>>(2);
+    LinkedList<ConllDependencyNode> node1ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node1);
+    LinkedList<ConllDependencyNode> node2ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node2);
+
+    // We will remove the last item in each path until they diverge
+    ConllDependencyNode ancestor = null;
+    while (!node1ToHeadPath.isEmpty() && !node2ToHeadPath.isEmpty()) {
+      if (node1ToHeadPath.getLast() == node2ToHeadPath.getLast()) {
+        node1ToHeadPath.removeLast();
+        ancestor = node2ToHeadPath.removeLast();
+      } else {
+        break;
+      }
+    }
+
+    // Put the common ancestor back on both paths
+    if (ancestor != null) {
+      node1ToHeadPath.add(ancestor);
+      node2ToHeadPath.add(ancestor);
+    }
+
+    paths.add(node1ToHeadPath);
+    paths.add(node2ToHeadPath);
+
+    return paths;
+  }
+
+  /**
+   * Finds the head word within a given annotation span
+   */
+  public static ConllDependencyNode findAnnotationHead(JCas jcas, Annotation annotation) {
+
+    for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
+
+      ConllDependencyNode head = depNode.getHead();
+      if (head == null || head.getEnd() <= annotation.getBegin() || head.getBegin() > annotation.getEnd()) {
+        // The head is outside the bounds of the annotation, so this node must be the annotation's head
+        return depNode;
+      }
+    }
+
+    // Can this happen?
+    return null;
+  }
+
+  public static LinkedList<ConllDependencyNode> getPathToSentenceHead(ConllDependencyNode node) {
+
+    LinkedList<ConllDependencyNode> path = new LinkedList<ConllDependencyNode>();
+    ConllDependencyNode currNode = node;
+    while (currNode.getHead() != null) { 
+      path.add(currNode);
+      currNode = currNode.getHead();
+    }
+
+    return path;
+  }
+
+  /**
+   * Finds the path between two dependency nodes
+   */
+  public static LinkedList<ConllDependencyNode> getPathBetweenNodes(ConllDependencyNode srcNode, ConllDependencyNode tgtNode) {
+
+    LinkedList<ConllDependencyNode> path = new LinkedList<ConllDependencyNode>();
+    List<LinkedList<ConllDependencyNode>> paths = getPathsToCommonAncestor(srcNode, tgtNode);
+    LinkedList<ConllDependencyNode> srcToAncestorPath = paths.get(0);
+    LinkedList<ConllDependencyNode> tgtToAncestorPath = paths.get(1);
+
+    if (srcNode == tgtNode) {
+      return path;
+    }
+
+    // Join the two paths
+    if (!srcToAncestorPath.isEmpty()) {
+      srcToAncestorPath.removeLast();
+    }
+    path = srcToAncestorPath;
+    while (!tgtToAncestorPath.isEmpty()) {
+      path.add(tgtToAncestorPath.removeLast());
+    }
+
+    return path;
+  }
+
+
+  /**
+   * This will convert a path into a string lexicalized at the end points with arc labels and POS tags in between
+   */
+  public static String pathToString(LinkedList<ConllDependencyNode> path) {
+
+    StringBuilder builder = new StringBuilder();
+    for (ConllDependencyNode node : path) {
+      if (node != path.getFirst() && node != path.getLast()) {
+        builder.append(node.getPostag());
+        builder.append("-");
+      }
+      builder.append(node.getDeprel());
+      if (node != path.getLast()) {
+        builder.append("/");
+      }
+    }
+    
+    return builder.toString();
+  }
+
+  public static String dumpDependencyRelations(JCas jcas, Annotation annotation) {
+    
+    StringBuilder builder = new StringBuilder();
+    for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
+      if (depNode.getHead() != null) {
+        builder.append(String.format("%s(%s,%s)\n", depNode.getDeprel(), depNode.getCoveredText(), depNode.getHead().getCoveredText()));
+      }
+    }
+    
+    return builder.toString();
+  }
 }

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java?rev=1490695&r1=1490694&r2=1490695&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java Fri Jun  7 15:51:29 2013
@@ -30,30 +30,24 @@ import org.apache.uima.jcas.JCas;
 import org.cleartk.classifier.Feature;
 
 public class DependencyPathFeaturesExtractor implements RelationFeaturesExtractor {
-	
-	
-	@Override
-	public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
-			IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
-		
-		List<Feature> features = new ArrayList<Feature>();
-		
-	    ConllDependencyNode node1 = DependencyParseUtils.findAnnotationHead(jCas, arg1);
-	    ConllDependencyNode node2 = DependencyParseUtils.findAnnotationHead(jCas, arg2);
-	    if (node1 == null || node2 == null) { return features; }
-	    
-	    List<LinkedList<ConllDependencyNode>> paths = DependencyParseUtils.getPathsToCommonAncestor(node1, node2);
-	    LinkedList<ConllDependencyNode> path1 = paths.get(0);
-	    LinkedList<ConllDependencyNode> path2 = paths.get(1);
-	    
-	    features.add(new Feature("DEPENDENCY_PATH_MEAN_DISTANCE_TO_COMMON_ANCESTOR", (path1.size() + path2.size()) / 2.0));
-	    features.add(new Feature("DEPENDENCY_PATH_MAX_DISTANCE_TO_COMMON_ANCESTOR", Math.max(path1.size(), path2.size())));
-	    features.add(new Feature("DEPENDENCY_PATH_MIN_DISTANCE_TO_COMMON_ANCESTOR", Math.min(path1.size(), path2.size())));
-	    
-	    LinkedList<ConllDependencyNode> node1ToNode2Path = DependencyParseUtils.getPathBetweenNodes(node1, node2);
-	    features.add(new Feature("DEPENDENCY_PATH", DependencyParseUtils.pathToString(node1ToNode2Path)));
-	    
-	    return features;
-	}
 
+
+  @Override
+  public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1, 
+      IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+
+    List<Feature> features = new ArrayList<Feature>();
+
+    ConllDependencyNode node1 = DependencyParseUtils.findAnnotationHead(jCas, arg1);
+    ConllDependencyNode node2 = DependencyParseUtils.findAnnotationHead(jCas, arg2);
+    if (node1 == null || node2 == null) 
+    { 
+      return features; 
+    }
+
+    LinkedList<ConllDependencyNode> node1ToNode2Path = DependencyParseUtils.getPathBetweenNodes(node1, node2);
+    features.add(new Feature("dependency_path", DependencyParseUtils.pathToString(node1ToNode2Path)));
+
+    return features;
+  }
 }