You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/06/07 17:51:29 UTC
svn commit: r1490695 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature:
DependencyParseUtils.java DependencyPathFeaturesExtractor.java
Author: dligach
Date: Fri Jun 7 15:51:29 2013
New Revision: 1490695
URL: http://svn.apache.org/r1490695
Log:
modified the dependency path feature to exclude the words on both ends of the path
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java?rev=1490695&r1=1490694&r2=1490695&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java Fri Jun 7 15:51:29 2013
@@ -18,136 +18,139 @@
*/
package org.apache.ctakes.temporal.ae.feature;
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.uimafit.util.JCasUtil;
-
-import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
-
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.uimafit.util.JCasUtil;
+
+/**
+ * This is a slightly modified version of the same class from relation extraction.
+ * TODO: eventually replace the relation extraction version with this one.
+ */
public class DependencyParseUtils {
- /**
- * Returns the paths from each node to the common ancestor between them
- */
- public static List<LinkedList<ConllDependencyNode>> getPathsToCommonAncestor(ConllDependencyNode node1, ConllDependencyNode node2) {
- List<LinkedList<ConllDependencyNode>> paths = new ArrayList<LinkedList<ConllDependencyNode>>(2);
- LinkedList<ConllDependencyNode> node1ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node1);
- LinkedList<ConllDependencyNode> node2ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node2);
-
- // We will remove the last item in each path until they diverge
- ConllDependencyNode ancestor = null;
- while (!node1ToHeadPath.isEmpty() && !node2ToHeadPath.isEmpty()) {
- if (node1ToHeadPath.getLast() == node2ToHeadPath.getLast()) {
- node1ToHeadPath.removeLast();
- ancestor = node2ToHeadPath.removeLast();
- } else {
- break;
- }
- }
-
- // Put the common ancestor back on both paths
- if (ancestor != null) {
- node1ToHeadPath.add(ancestor);
- node2ToHeadPath.add(ancestor);
- }
-
- paths.add(node1ToHeadPath);
- paths.add(node2ToHeadPath);
- return paths;
- }
-
- /**
- * Finds the head word within a given annotation span
- */
- public static ConllDependencyNode findAnnotationHead(JCas jcas, Annotation annotation) {
-
- for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
-
- ConllDependencyNode head = depNode.getHead();
- if (head == null || head.getEnd() <= annotation.getBegin() || head.getBegin() > annotation.getEnd()) {
- // The head is outside the bounds of the annotation, so this node must be the annotation's head
- return depNode;
- }
- }
- // Can this happen?
- return null;
- }
-
- public static LinkedList<ConllDependencyNode> getPathToSentenceHead(ConllDependencyNode node) {
- LinkedList<ConllDependencyNode> path = new LinkedList<ConllDependencyNode>();
- ConllDependencyNode currNode = node;
- while (currNode.getHead() != null) {
- path.add(currNode);
- currNode = currNode.getHead();
- }
- return path;
- }
-
- /**
- * Finds the path between two dependency nodes
- */
- public static LinkedList<ConllDependencyNode> getPathBetweenNodes(ConllDependencyNode srcNode, ConllDependencyNode tgtNode) {
- LinkedList<ConllDependencyNode> path = new LinkedList<ConllDependencyNode>();
- List<LinkedList<ConllDependencyNode>> paths = getPathsToCommonAncestor(srcNode, tgtNode);
- LinkedList<ConllDependencyNode> srcToAncestorPath = paths.get(0);
- LinkedList<ConllDependencyNode> tgtToAncestorPath = paths.get(1);
-
- if (srcNode == tgtNode) {
- return path;
- }
-
- // Join the two paths
- if (!srcToAncestorPath.isEmpty()) {
- srcToAncestorPath.removeLast();
- }
- path = srcToAncestorPath;
- while (!tgtToAncestorPath.isEmpty()) {
- path.add(tgtToAncestorPath.removeLast());
- }
-
- return path;
- }
-
-
- /**
- * This will convert a path into a string lexicalized at the end points with arc labels and POS tags in between
- */
-
- public static String pathToString(LinkedList<ConllDependencyNode> path) {
-
- StringBuilder builder = new StringBuilder();
- for (ConllDependencyNode node : path) {
- if (node == path.getFirst() || node == path.getLast()) {
- builder.append(node.getCoveredText());
- } else {
- builder.append(node.getPostag());
- }
-
- builder.append("-");
- builder.append(node.getDeprel());
- if (node != path.getLast()) {
- builder.append("/");
- }
- }
- return builder.toString();
- }
-
-
-
-
- public static String dumpDependencyRelations(JCas jcas, Annotation annotation) {
- StringBuilder builder = new StringBuilder();
- for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
- if (depNode.getHead() != null) {
- builder.append(String.format("%s(%s,%s)\n", depNode.getDeprel(), depNode.getCoveredText(), depNode.getHead().getCoveredText()));
- }
- }
- return builder.toString();
-
- }
-
+ /**
+ * Returns the paths from each node to the common ancestor between them
+ */
+ public static List<LinkedList<ConllDependencyNode>> getPathsToCommonAncestor(ConllDependencyNode node1, ConllDependencyNode node2) {
+
+ List<LinkedList<ConllDependencyNode>> paths = new ArrayList<LinkedList<ConllDependencyNode>>(2);
+ LinkedList<ConllDependencyNode> node1ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node1);
+ LinkedList<ConllDependencyNode> node2ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node2);
+
+ // We will remove the last item in each path until they diverge
+ ConllDependencyNode ancestor = null;
+ while (!node1ToHeadPath.isEmpty() && !node2ToHeadPath.isEmpty()) {
+ if (node1ToHeadPath.getLast() == node2ToHeadPath.getLast()) {
+ node1ToHeadPath.removeLast();
+ ancestor = node2ToHeadPath.removeLast();
+ } else {
+ break;
+ }
+ }
+
+ // Put the common ancestor back on both paths
+ if (ancestor != null) {
+ node1ToHeadPath.add(ancestor);
+ node2ToHeadPath.add(ancestor);
+ }
+
+ paths.add(node1ToHeadPath);
+ paths.add(node2ToHeadPath);
+
+ return paths;
+ }
+
+ /**
+ * Finds the head word within a given annotation span
+ */
+ public static ConllDependencyNode findAnnotationHead(JCas jcas, Annotation annotation) {
+
+ for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
+
+ ConllDependencyNode head = depNode.getHead();
+ if (head == null || head.getEnd() <= annotation.getBegin() || head.getBegin() > annotation.getEnd()) {
+ // The head is outside the bounds of the annotation, so this node must be the annotation's head
+ return depNode;
+ }
+ }
+
+ // Can this happen?
+ return null;
+ }
+
+ public static LinkedList<ConllDependencyNode> getPathToSentenceHead(ConllDependencyNode node) {
+
+ LinkedList<ConllDependencyNode> path = new LinkedList<ConllDependencyNode>();
+ ConllDependencyNode currNode = node;
+ while (currNode.getHead() != null) {
+ path.add(currNode);
+ currNode = currNode.getHead();
+ }
+
+ return path;
+ }
+
+ /**
+ * Finds the path between two dependency nodes
+ */
+ public static LinkedList<ConllDependencyNode> getPathBetweenNodes(ConllDependencyNode srcNode, ConllDependencyNode tgtNode) {
+
+ LinkedList<ConllDependencyNode> path = new LinkedList<ConllDependencyNode>();
+ List<LinkedList<ConllDependencyNode>> paths = getPathsToCommonAncestor(srcNode, tgtNode);
+ LinkedList<ConllDependencyNode> srcToAncestorPath = paths.get(0);
+ LinkedList<ConllDependencyNode> tgtToAncestorPath = paths.get(1);
+
+ if (srcNode == tgtNode) {
+ return path;
+ }
+
+ // Join the two paths
+ if (!srcToAncestorPath.isEmpty()) {
+ srcToAncestorPath.removeLast();
+ }
+ path = srcToAncestorPath;
+ while (!tgtToAncestorPath.isEmpty()) {
+ path.add(tgtToAncestorPath.removeLast());
+ }
+
+ return path;
+ }
+
+
+ /**
+ * This will convert a path into a string lexicalized at the end points with arc labels and POS tags in between
+ */
+ public static String pathToString(LinkedList<ConllDependencyNode> path) {
+
+ StringBuilder builder = new StringBuilder();
+ for (ConllDependencyNode node : path) {
+ if (node != path.getFirst() && node != path.getLast()) {
+ builder.append(node.getPostag());
+ builder.append("-");
+ }
+ builder.append(node.getDeprel());
+ if (node != path.getLast()) {
+ builder.append("/");
+ }
+ }
+
+ return builder.toString();
+ }
+
+ public static String dumpDependencyRelations(JCas jcas, Annotation annotation) {
+
+ StringBuilder builder = new StringBuilder();
+ for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
+ if (depNode.getHead() != null) {
+ builder.append(String.format("%s(%s,%s)\n", depNode.getDeprel(), depNode.getCoveredText(), depNode.getHead().getCoveredText()));
+ }
+ }
+
+ return builder.toString();
+ }
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java?rev=1490695&r1=1490694&r2=1490695&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java Fri Jun 7 15:51:29 2013
@@ -30,30 +30,24 @@ import org.apache.uima.jcas.JCas;
import org.cleartk.classifier.Feature;
public class DependencyPathFeaturesExtractor implements RelationFeaturesExtractor {
-
-
- @Override
- public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
- IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
-
- List<Feature> features = new ArrayList<Feature>();
-
- ConllDependencyNode node1 = DependencyParseUtils.findAnnotationHead(jCas, arg1);
- ConllDependencyNode node2 = DependencyParseUtils.findAnnotationHead(jCas, arg2);
- if (node1 == null || node2 == null) { return features; }
-
- List<LinkedList<ConllDependencyNode>> paths = DependencyParseUtils.getPathsToCommonAncestor(node1, node2);
- LinkedList<ConllDependencyNode> path1 = paths.get(0);
- LinkedList<ConllDependencyNode> path2 = paths.get(1);
-
- features.add(new Feature("DEPENDENCY_PATH_MEAN_DISTANCE_TO_COMMON_ANCESTOR", (path1.size() + path2.size()) / 2.0));
- features.add(new Feature("DEPENDENCY_PATH_MAX_DISTANCE_TO_COMMON_ANCESTOR", Math.max(path1.size(), path2.size())));
- features.add(new Feature("DEPENDENCY_PATH_MIN_DISTANCE_TO_COMMON_ANCESTOR", Math.min(path1.size(), path2.size())));
-
- LinkedList<ConllDependencyNode> node1ToNode2Path = DependencyParseUtils.getPathBetweenNodes(node1, node2);
- features.add(new Feature("DEPENDENCY_PATH", DependencyParseUtils.pathToString(node1ToNode2Path)));
-
- return features;
- }
+
+ @Override
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+
+ List<Feature> features = new ArrayList<Feature>();
+
+ ConllDependencyNode node1 = DependencyParseUtils.findAnnotationHead(jCas, arg1);
+ ConllDependencyNode node2 = DependencyParseUtils.findAnnotationHead(jCas, arg2);
+ if (node1 == null || node2 == null)
+ {
+ return features;
+ }
+
+ LinkedList<ConllDependencyNode> node1ToNode2Path = DependencyParseUtils.getPathBetweenNodes(node1, node2);
+ features.add(new Feature("dependency_path", DependencyParseUtils.pathToString(node1ToNode2Path)));
+
+ return features;
+ }
}