You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/01/08 00:15:44 UTC
svn commit: r1430085 - in
/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features:
FlatTreeFeatureExtractor.java PETFeatureExtractor.java
Author: tmill
Date: Mon Jan 7 23:15:44 2013
New Revision: 1430085
URL: http://svn.apache.org/viewvc?rev=1430085&view=rev
Log:
ctakes-111: Adds tree-based features for use in tree-kernel svms.
Added:
incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/FlatTreeFeatureExtractor.java (with props)
incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/PETFeatureExtractor.java (with props)
Added: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/FlatTreeFeatureExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/FlatTreeFeatureExtractor.java?rev=1430085&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/FlatTreeFeatureExtractor.java (added)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/FlatTreeFeatureExtractor.java Mon Jan 7 23:15:44 2013
@@ -0,0 +1,56 @@
+package org.apache.ctakes.relationextractor.ae.features;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.classifier.Feature;
+import org.uimafit.util.JCasUtil;
+
+public class FlatTreeFeatureExtractor implements RelationFeaturesExtractor {
+
+ // Returns flat POS-trees a la Hovy et al 2012 (EACL)
+ @Override
+ public List<Feature> extract(JCas jcas, IdentifiedAnnotation arg1,
+ IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ List<Feature> features = new ArrayList<Feature>();
+
+ SimpleTree tree = null;
+ tree = new SimpleTree("BOP");
+ TreebankNode lca = AnnotationTreeUtils.getCommonAncestor(AnnotationTreeUtils.annotationNode(jcas, arg1),
+ AnnotationTreeUtils.annotationNode(jcas, arg2));
+ SimpleTree arg1Tree = new SimpleTree("ARG1");
+ SimpleTree arg2Tree = new SimpleTree("ARG2");
+
+ tree.addChild(arg1Tree);
+ List<BaseToken> coveredTokens = JCasUtil.selectCovered(jcas, BaseToken.class, lca);
+ for(BaseToken token : coveredTokens){
+ // create pre-terminal tree
+ SimpleTree tokenTree = new SimpleTree("TOK");
+// tokenTree.addChild(new SimpleTree(token.getCoveredText()));
+ tokenTree.addChild(new SimpleTree(token.getPartOfSpeech()));
+
+ // do we add this to one of the arg trees or to the root?
+ if(token.getEnd() <= arg1.getEnd()){
+ arg1Tree.addChild(tokenTree);
+ }else if(token.getBegin() >= arg2.getBegin()){
+ arg2Tree.addChild(tokenTree);
+ }else{
+ SimpleTree termTree = new SimpleTree("TERM");
+ termTree.addChild(tokenTree);
+ tree.addChild(termTree);
+ }
+ }
+ tree.addChild(arg2Tree);
+
+ features.add(new Feature("TK_BOP", tree.toString()));
+ return features;
+ }
+
+}
Propchange: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/FlatTreeFeatureExtractor.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/PETFeatureExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/PETFeatureExtractor.java?rev=1430085&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/PETFeatureExtractor.java (added)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/PETFeatureExtractor.java Mon Jan 7 23:15:44 2013
@@ -0,0 +1,45 @@
+package org.apache.ctakes.relationextractor.ae.features;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.constituency.parser.treekernel.TreeExtractor;
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.classifier.Feature;
+
+public class PETFeatureExtractor implements RelationFeaturesExtractor {
+
+ // Returns Path-enclosed trees of Moschitti 2004 (ACL)
+ @Override
+ public List<Feature> extract(JCas jcas, IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+ List<Feature> features = new ArrayList<Feature>();
+ // first get the root and print it out...
+ TopTreebankNode root = AnnotationTreeUtils.getTreeCopy(jcas, AnnotationTreeUtils.getAnnotationTree(jcas, arg1));
+
+// SimpleTree tempClone = TreeExtractor.getSimpleClone(root);
+// features.add(new Feature("TK_FULL", tempClone.toString()));
+ TreebankNode t1 = AnnotationTreeUtils.insertAnnotationNode(jcas, root, arg1, "ARG1");
+ TreebankNode t2 = AnnotationTreeUtils.insertAnnotationNode(jcas, root, arg2, "ARG2");
+
+ SimpleTree tree = null;
+ if(t1.getBegin() <= t2.getBegin() && t1.getEnd() >= t2.getEnd()){
+ // t1 encloses t2
+ tree = TreeExtractor.getSimpleClone(t1);
+ }else if(t2.getBegin() <= t1.getBegin() && t2.getEnd() >= t1.getEnd()){
+ // t2 encloses t1
+ tree = TreeExtractor.getSimpleClone(t2);
+ }else{
+ tree = TreeExtractor.extractPathEnclosedTree(t1, t2, jcas);
+ }
+
+ features.add(new Feature("TK_PET", tree.toString()));
+ return features;
+ }
+
+}
Propchange: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/PETFeatureExtractor.java
------------------------------------------------------------------------------
svn:mime-type = text/plain