You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/06/07 16:28:44 UTC

svn commit: r1490662 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae: EventEventRelationAnnotator.java feature/DependencyParseUtils.java feature/DependencyPathFeaturesExtractor.java

Author: dligach
Date: Fri Jun  7 14:28:44 2013
New Revision: 1490662

URL: http://svn.apache.org/r1490662
Log:
added dependency path features

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java
Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java?rev=1490662&r1=1490661&r2=1490662&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java Fri Jun  7 14:28:44 2013
@@ -10,6 +10,7 @@ import org.apache.ctakes.relationextract
 import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
 import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.DependencyPathFeaturesExtractor;
 import org.apache.ctakes.temporal.ae.feature.EventArgumentPropertyExtractor;
 import org.apache.ctakes.temporal.ae.feature.UmlsFeatureExtractor;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
@@ -66,14 +67,7 @@ public class EventEventRelationAnnotator
 	    						, new PartOfSpeechFeaturesExtractor()
 	    						, new EventArgumentPropertyExtractor()
 	    						, new UmlsFeatureExtractor()
-//	    						, new DependencyTreeFeaturesExtractor()
-//	    						, new DependencyPathFeaturesExtractor()
-	    						
-//	    						, new TemporalAttributeFeatureExtractor()
-//	    						, new EventTimeFlatTreeFeatureExtractor()
-//	    						, new TemporalPETExtractor()
-//	    						, new TemporalPathExtractor()
-//	    						, new TemporalFTExtractor()
+	    						, new DependencyPathFeaturesExtractor()
 	    						);
 	  }
 
@@ -117,5 +111,4 @@ public class EventEventRelationAnnotator
 		}
 		return category;
 	}
-
 }

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java?rev=1490662&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java Fri Jun  7 14:28:44 2013
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.uimafit.util.JCasUtil;
+
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+
+public class DependencyParseUtils {
+
+	/**
+	 * Returns the paths from each node to the common ancestor between them
+	 */
+	public static List<LinkedList<ConllDependencyNode>> getPathsToCommonAncestor(ConllDependencyNode node1, ConllDependencyNode node2) {
+		List<LinkedList<ConllDependencyNode>> paths = new ArrayList<LinkedList<ConllDependencyNode>>(2);
+		LinkedList<ConllDependencyNode> node1ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node1);
+		LinkedList<ConllDependencyNode> node2ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node2);
+		
+		// We will remove the last item in each path until they diverge
+		ConllDependencyNode ancestor = null;
+		while (!node1ToHeadPath.isEmpty() && !node2ToHeadPath.isEmpty()) {
+			if (node1ToHeadPath.getLast() == node2ToHeadPath.getLast()) {
+				node1ToHeadPath.removeLast();
+				ancestor = node2ToHeadPath.removeLast();
+			} else {
+				break;
+			}
+		}
+		
+		// Put the common ancestor back on both paths
+		if (ancestor != null) {
+	    	 node1ToHeadPath.add(ancestor);
+	    	 node2ToHeadPath.add(ancestor);
+		}
+	     
+		paths.add(node1ToHeadPath);
+		paths.add(node2ToHeadPath);
+		return paths;
+	}
+
+	/**
+	 * Finds the head word within a given annotation span
+	 */
+	public static ConllDependencyNode findAnnotationHead(JCas jcas, Annotation annotation) {
+	
+	    for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
+	    	
+	    	ConllDependencyNode head = depNode.getHead();
+	    	if (head == null || head.getEnd() <= annotation.getBegin() || head.getBegin() > annotation.getEnd()) {
+	    		// The head is outside the bounds of the annotation, so this node must be the annotation's head
+	    		return depNode;
+	    	}
+	    }
+	    // Can this happen?
+	    return null;
+	}
+
+	public static LinkedList<ConllDependencyNode> getPathToSentenceHead(ConllDependencyNode node) {
+	     LinkedList<ConllDependencyNode> path = new LinkedList<ConllDependencyNode>();
+	     ConllDependencyNode currNode = node;
+	     while (currNode.getHead() != null) { 
+	         path.add(currNode);
+	         currNode = currNode.getHead();
+	     }
+	     return path;
+	}
+
+	/**
+	 * Finds the path between two dependency nodes
+	 */
+	public static LinkedList<ConllDependencyNode> getPathBetweenNodes(ConllDependencyNode srcNode, ConllDependencyNode tgtNode) {
+		LinkedList<ConllDependencyNode> path = new LinkedList<ConllDependencyNode>();
+		List<LinkedList<ConllDependencyNode>> paths = getPathsToCommonAncestor(srcNode, tgtNode);
+		LinkedList<ConllDependencyNode> srcToAncestorPath = paths.get(0);
+		LinkedList<ConllDependencyNode> tgtToAncestorPath = paths.get(1);
+		
+		if (srcNode == tgtNode) {
+			return path;
+		}
+		
+		// Join the two paths
+		if (!srcToAncestorPath.isEmpty()) {
+			srcToAncestorPath.removeLast();
+		}
+		path = srcToAncestorPath;
+		while (!tgtToAncestorPath.isEmpty()) {
+			path.add(tgtToAncestorPath.removeLast());
+		}
+		
+		return path;
+	}
+	
+
+	/**
+	 * This will convert a path into a string lexicalized at the end points with arc labels and POS tags in between
+	 */
+	
+	public static String pathToString(LinkedList<ConllDependencyNode> path) {
+		
+		StringBuilder builder = new StringBuilder();
+		for (ConllDependencyNode node : path) {
+			if (node == path.getFirst() || node == path.getLast()) {
+				builder.append(node.getCoveredText());
+			} else {
+				builder.append(node.getPostag());
+			}
+			
+			builder.append("-");
+			builder.append(node.getDeprel());
+			if (node != path.getLast()) {
+				builder.append("/");
+			}
+		}
+		return builder.toString();
+	}
+	
+
+	
+
+	public static String dumpDependencyRelations(JCas jcas, Annotation annotation) {
+		StringBuilder builder = new StringBuilder();
+	    for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
+	    	if (depNode.getHead() != null) {
+	    		builder.append(String.format("%s(%s,%s)\n", depNode.getDeprel(), depNode.getCoveredText(), depNode.getHead().getCoveredText()));
+	    	}
+	    }
+	    return builder.toString();
+		
+	}
+
+}

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java?rev=1490662&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java Fri Jun  7 14:28:44 2013
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.classifier.Feature;
+
+public class DependencyPathFeaturesExtractor implements RelationFeaturesExtractor {
+	
+	
+	@Override
+	public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+			IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+		
+		List<Feature> features = new ArrayList<Feature>();
+		
+	    ConllDependencyNode node1 = DependencyParseUtils.findAnnotationHead(jCas, arg1);
+	    ConllDependencyNode node2 = DependencyParseUtils.findAnnotationHead(jCas, arg2);
+	    if (node1 == null || node2 == null) { return features; }
+	    
+	    List<LinkedList<ConllDependencyNode>> paths = DependencyParseUtils.getPathsToCommonAncestor(node1, node2);
+	    LinkedList<ConllDependencyNode> path1 = paths.get(0);
+	    LinkedList<ConllDependencyNode> path2 = paths.get(1);
+	    
+	    features.add(new Feature("DEPENDENCY_PATH_MEAN_DISTANCE_TO_COMMON_ANCESTOR", (path1.size() + path2.size()) / 2.0));
+	    features.add(new Feature("DEPENDENCY_PATH_MAX_DISTANCE_TO_COMMON_ANCESTOR", Math.max(path1.size(), path2.size())));
+	    features.add(new Feature("DEPENDENCY_PATH_MIN_DISTANCE_TO_COMMON_ANCESTOR", Math.min(path1.size(), path2.size())));
+	    
+	    LinkedList<ConllDependencyNode> node1ToNode2Path = DependencyParseUtils.getPathBetweenNodes(node1, node2);
+	    features.add(new Feature("DEPENDENCY_PATH", DependencyParseUtils.pathToString(node1ToNode2Path)));
+	    
+	    return features;
+	}
+
+}