You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2013/06/07 16:28:44 UTC
svn commit: r1490662 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae:
EventEventRelationAnnotator.java feature/DependencyParseUtils.java
feature/DependencyPathFeaturesExtractor.java
Author: dligach
Date: Fri Jun 7 14:28:44 2013
New Revision: 1490662
URL: http://svn.apache.org/r1490662
Log:
added dependency path features
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java?rev=1490662&r1=1490661&r2=1490662&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java Fri Jun 7 14:28:44 2013
@@ -10,6 +10,7 @@ import org.apache.ctakes.relationextract
import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
+import org.apache.ctakes.temporal.ae.feature.DependencyPathFeaturesExtractor;
import org.apache.ctakes.temporal.ae.feature.EventArgumentPropertyExtractor;
import org.apache.ctakes.temporal.ae.feature.UmlsFeatureExtractor;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
@@ -66,14 +67,7 @@ public class EventEventRelationAnnotator
, new PartOfSpeechFeaturesExtractor()
, new EventArgumentPropertyExtractor()
, new UmlsFeatureExtractor()
-// , new DependencyTreeFeaturesExtractor()
-// , new DependencyPathFeaturesExtractor()
-
-// , new TemporalAttributeFeatureExtractor()
-// , new EventTimeFlatTreeFeatureExtractor()
-// , new TemporalPETExtractor()
-// , new TemporalPathExtractor()
-// , new TemporalFTExtractor()
+ , new DependencyPathFeaturesExtractor()
);
}
@@ -117,5 +111,4 @@ public class EventEventRelationAnnotator
}
return category;
}
-
}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java?rev=1490662&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java Fri Jun 7 14:28:44 2013
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.uimafit.util.JCasUtil;
+
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+
+public class DependencyParseUtils {
+
+ /**
+ * Returns the paths from each node to the common ancestor between them
+ */
+ public static List<LinkedList<ConllDependencyNode>> getPathsToCommonAncestor(ConllDependencyNode node1, ConllDependencyNode node2) {
+ List<LinkedList<ConllDependencyNode>> paths = new ArrayList<LinkedList<ConllDependencyNode>>(2);
+ LinkedList<ConllDependencyNode> node1ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node1);
+ LinkedList<ConllDependencyNode> node2ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node2);
+
+ // We will remove the last item in each path until they diverge
+ ConllDependencyNode ancestor = null;
+ while (!node1ToHeadPath.isEmpty() && !node2ToHeadPath.isEmpty()) {
+ if (node1ToHeadPath.getLast() == node2ToHeadPath.getLast()) {
+ node1ToHeadPath.removeLast();
+ ancestor = node2ToHeadPath.removeLast();
+ } else {
+ break;
+ }
+ }
+
+ // Put the common ancestor back on both paths
+ if (ancestor != null) {
+ node1ToHeadPath.add(ancestor);
+ node2ToHeadPath.add(ancestor);
+ }
+
+ paths.add(node1ToHeadPath);
+ paths.add(node2ToHeadPath);
+ return paths;
+ }
+
+ /**
+ * Finds the head word within a given annotation span
+ */
+ public static ConllDependencyNode findAnnotationHead(JCas jcas, Annotation annotation) {
+
+ for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
+
+ ConllDependencyNode head = depNode.getHead();
+ if (head == null || head.getEnd() <= annotation.getBegin() || head.getBegin() > annotation.getEnd()) {
+ // The head is outside the bounds of the annotation, so this node must be the annotation's head
+ return depNode;
+ }
+ }
+ // Can this happen?
+ return null;
+ }
+
+ public static LinkedList<ConllDependencyNode> getPathToSentenceHead(ConllDependencyNode node) {
+ LinkedList<ConllDependencyNode> path = new LinkedList<ConllDependencyNode>();
+ ConllDependencyNode currNode = node;
+ while (currNode.getHead() != null) {
+ path.add(currNode);
+ currNode = currNode.getHead();
+ }
+ return path;
+ }
+
+ /**
+ * Finds the path between two dependency nodes
+ */
+ public static LinkedList<ConllDependencyNode> getPathBetweenNodes(ConllDependencyNode srcNode, ConllDependencyNode tgtNode) {
+ LinkedList<ConllDependencyNode> path = new LinkedList<ConllDependencyNode>();
+ List<LinkedList<ConllDependencyNode>> paths = getPathsToCommonAncestor(srcNode, tgtNode);
+ LinkedList<ConllDependencyNode> srcToAncestorPath = paths.get(0);
+ LinkedList<ConllDependencyNode> tgtToAncestorPath = paths.get(1);
+
+ if (srcNode == tgtNode) {
+ return path;
+ }
+
+ // Join the two paths
+ if (!srcToAncestorPath.isEmpty()) {
+ srcToAncestorPath.removeLast();
+ }
+ path = srcToAncestorPath;
+ while (!tgtToAncestorPath.isEmpty()) {
+ path.add(tgtToAncestorPath.removeLast());
+ }
+
+ return path;
+ }
+
+
+ /**
+ * This will convert a path into a string lexicalized at the end points with arc labels and POS tags in between
+ */
+
+ public static String pathToString(LinkedList<ConllDependencyNode> path) {
+
+ StringBuilder builder = new StringBuilder();
+ for (ConllDependencyNode node : path) {
+ if (node == path.getFirst() || node == path.getLast()) {
+ builder.append(node.getCoveredText());
+ } else {
+ builder.append(node.getPostag());
+ }
+
+ builder.append("-");
+ builder.append(node.getDeprel());
+ if (node != path.getLast()) {
+ builder.append("/");
+ }
+ }
+ return builder.toString();
+ }
+
+
+
+
+ public static String dumpDependencyRelations(JCas jcas, Annotation annotation) {
+ StringBuilder builder = new StringBuilder();
+ for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) {
+ if (depNode.getHead() != null) {
+ builder.append(String.format("%s(%s,%s)\n", depNode.getDeprel(), depNode.getCoveredText(), depNode.getHead().getCoveredText()));
+ }
+ }
+ return builder.toString();
+
+ }
+
+}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java?rev=1490662&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java Fri Jun 7 14:28:44 2013
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.classifier.Feature;
+
+public class DependencyPathFeaturesExtractor implements RelationFeaturesExtractor {
+
+
+ @Override
+ public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+
+ List<Feature> features = new ArrayList<Feature>();
+
+ ConllDependencyNode node1 = DependencyParseUtils.findAnnotationHead(jCas, arg1);
+ ConllDependencyNode node2 = DependencyParseUtils.findAnnotationHead(jCas, arg2);
+ if (node1 == null || node2 == null) { return features; }
+
+ List<LinkedList<ConllDependencyNode>> paths = DependencyParseUtils.getPathsToCommonAncestor(node1, node2);
+ LinkedList<ConllDependencyNode> path1 = paths.get(0);
+ LinkedList<ConllDependencyNode> path2 = paths.get(1);
+
+ features.add(new Feature("DEPENDENCY_PATH_MEAN_DISTANCE_TO_COMMON_ANCESTOR", (path1.size() + path2.size()) / 2.0));
+ features.add(new Feature("DEPENDENCY_PATH_MAX_DISTANCE_TO_COMMON_ANCESTOR", Math.max(path1.size(), path2.size())));
+ features.add(new Feature("DEPENDENCY_PATH_MIN_DISTANCE_TO_COMMON_ANCESTOR", Math.min(path1.size(), path2.size())));
+
+ LinkedList<ConllDependencyNode> node1ToNode2Path = DependencyParseUtils.getPathBetweenNodes(node1, node2);
+ features.add(new Feature("DEPENDENCY_PATH", DependencyParseUtils.pathToString(node1ToNode2Path)));
+
+ return features;
+ }
+
+}