You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/01/08 00:15:44 UTC

svn commit: r1430085 - in /incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features: FlatTreeFeatureExtractor.java PETFeatureExtractor.java

Author: tmill
Date: Mon Jan  7 23:15:44 2013
New Revision: 1430085

URL: http://svn.apache.org/viewvc?rev=1430085&view=rev
Log:
ctakes-111: Adds tree-based features for use in tree-kernel svms.

Added:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/FlatTreeFeatureExtractor.java   (with props)
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/PETFeatureExtractor.java   (with props)

Added: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/FlatTreeFeatureExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/FlatTreeFeatureExtractor.java?rev=1430085&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/FlatTreeFeatureExtractor.java (added)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/FlatTreeFeatureExtractor.java Mon Jan  7 23:15:44 2013
@@ -0,0 +1,56 @@
+package org.apache.ctakes.relationextractor.ae.features;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.classifier.Feature;
+import org.uimafit.util.JCasUtil;
+
+public class FlatTreeFeatureExtractor implements RelationFeaturesExtractor {
+
+	// Returns flat POS-trees a la Hovy et al 2012 (EACL)
+	@Override
+	public List<Feature> extract(JCas jcas, IdentifiedAnnotation arg1,
+			IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+		List<Feature> features = new ArrayList<Feature>();
+		
+		SimpleTree tree = null;
+		tree = new SimpleTree("BOP");
+		TreebankNode lca = AnnotationTreeUtils.getCommonAncestor(AnnotationTreeUtils.annotationNode(jcas, arg1),
+																   AnnotationTreeUtils.annotationNode(jcas, arg2));
+		SimpleTree arg1Tree = new SimpleTree("ARG1");
+		SimpleTree arg2Tree = new SimpleTree("ARG2");
+		
+		tree.addChild(arg1Tree);
+		List<BaseToken> coveredTokens = JCasUtil.selectCovered(jcas, BaseToken.class, lca);
+		for(BaseToken token : coveredTokens){
+			// create pre-terminal tree
+			SimpleTree tokenTree = new SimpleTree("TOK");
+//			tokenTree.addChild(new SimpleTree(token.getCoveredText()));
+			tokenTree.addChild(new SimpleTree(token.getPartOfSpeech()));
+			
+			// do we add this to one of the arg trees or to the root?
+			if(token.getEnd() <= arg1.getEnd()){
+				arg1Tree.addChild(tokenTree);
+			}else if(token.getBegin() >= arg2.getBegin()){
+				arg2Tree.addChild(tokenTree);
+			}else{
+				SimpleTree termTree = new SimpleTree("TERM");
+				termTree.addChild(tokenTree);
+				tree.addChild(termTree);
+			}
+		}
+		tree.addChild(arg2Tree);
+		
+		features.add(new Feature("TK_BOP", tree.toString()));
+		return features;
+	}
+
+}

Propchange: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/FlatTreeFeatureExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/PETFeatureExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/PETFeatureExtractor.java?rev=1430085&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/PETFeatureExtractor.java (added)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/PETFeatureExtractor.java Mon Jan  7 23:15:44 2013
@@ -0,0 +1,45 @@
+package org.apache.ctakes.relationextractor.ae.features;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.constituency.parser.treekernel.TreeExtractor;
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.classifier.Feature;
+
+public class PETFeatureExtractor implements RelationFeaturesExtractor {
+
+	// Returns Path-enclosed trees of Moschitti 2004 (ACL)
+	@Override
+	public List<Feature> extract(JCas jcas, IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+		List<Feature> features = new ArrayList<Feature>();
+		// first get the root and print it out...
+		TopTreebankNode root = AnnotationTreeUtils.getTreeCopy(jcas, AnnotationTreeUtils.getAnnotationTree(jcas, arg1));
+		
+//		SimpleTree tempClone = TreeExtractor.getSimpleClone(root);
+//		features.add(new Feature("TK_FULL", tempClone.toString()));
+		TreebankNode t1 = AnnotationTreeUtils.insertAnnotationNode(jcas, root, arg1, "ARG1");
+		TreebankNode t2 = AnnotationTreeUtils.insertAnnotationNode(jcas, root, arg2, "ARG2");
+
+		SimpleTree tree = null;
+		if(t1.getBegin() <= t2.getBegin() && t1.getEnd() >= t2.getEnd()){
+			// t1 encloses t2
+			tree = TreeExtractor.getSimpleClone(t1);
+		}else if(t2.getBegin() <= t1.getBegin() && t2.getEnd() >= t1.getEnd()){
+			// t2 encloses t1
+			tree = TreeExtractor.getSimpleClone(t2);
+		}else{
+			tree = TreeExtractor.extractPathEnclosedTree(t1, t2, jcas);
+		}
+
+		features.add(new Feature("TK_PET", tree.toString()));
+		return features;
+	}
+
+}

Propchange: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/PETFeatureExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain