You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/06/14 21:12:47 UTC
svn commit: r1493217 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae:
TimeAnnotator.java feature/ParseSpanFeatureExtractor.java
Author: tmill
Date: Fri Jun 14 19:12:47 2013
New Revision: 1493217
URL: http://svn.apache.org/r1493217
Log:
Added parse feature extractor to regular time annotator.
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseSpanFeatureExtractor.java (with props)
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java?rev=1493217&r1=1493216&r2=1493217&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java Fri Jun 14 19:12:47 2013
@@ -22,6 +22,7 @@ import java.io.File;
import java.util.ArrayList;
import java.util.List;
+import org.apache.ctakes.temporal.ae.feature.ParseSpanFeatureExtractor;
import org.apache.ctakes.temporal.ae.feature.TimeWordTypeExtractor;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
@@ -80,7 +81,10 @@ public class TimeAnnotator extends Tempo
protected List<SimpleFeatureExtractor> tokenFeatureExtractors;
protected List<CleartkExtractor> contextFeatureExtractors;
-
+
+// protected List<SimpleFeatureExtractor> parseFeatureExtractors;
+ protected ParseSpanFeatureExtractor parseExtractor;
+
private BIOChunking<BaseToken, TimeMention> timeChunking;
@Override
@@ -97,6 +101,9 @@ public class TimeAnnotator extends Tempo
new TypePathExtractor(BaseToken.class, "partOfSpeech"),
new TimeWordTypeExtractor());
+// CombinedExtractor parseExtractors = new CombinedExtractor(
+// new ParseSpanFeatureExtractor()
+// );
this.tokenFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
this.tokenFeatureExtractors.add(allExtractors);
@@ -106,6 +113,9 @@ public class TimeAnnotator extends Tempo
allExtractors,
new Preceding(3),
new Following(3)));
+// this.parseFeatureExtractors = new ArrayList<ParseSpanFeatureExtractor>();
+// this.parseFeatureExtractors.add(new ParseSpanFeatureExtractor());
+ parseExtractor = new ParseSpanFeatureExtractor();
}
@Override
@@ -146,6 +156,18 @@ public class TimeAnnotator extends Tempo
String previousOutcome = index < 0 ? "O" : outcomes.get(index);
features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
}
+ // features from dominating parse tree
+// for(SimpleFeatureExtractor extractor : this.parseFeatureExtractors){
+ BaseToken startToken = token;
+ for(int i = tokenIndex-1; i >= 0; --i){
+ String outcome = outcomes.get(i);
+ if(outcome.equals("O")){
+ break;
+ }
+ startToken = tokens.get(i);
+ }
+ features.addAll(parseExtractor.extract(jCas, startToken.getBegin(), token.getEnd()));
+// }
// if training, write to data file
if (this.isTraining()) {
String outcome = outcomes.get(tokenIndex);
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseSpanFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseSpanFeatureExtractor.java?rev=1493217&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseSpanFeatureExtractor.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseSpanFeatureExtractor.java Fri Jun 14 19:12:47 2013
@@ -0,0 +1,62 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.classifier.Feature;
+
+public class ParseSpanFeatureExtractor {
+
+ public List<Feature> extract(JCas jcas, int begin, int end)
+ {
+ List<Feature> feats = new ArrayList<Feature>();
+
+ TreebankNode domNode = AnnotationTreeUtils.annotationNode(jcas, begin, end);
+ if(domNode != null){
+ feats.add(new Feature("DominatingTreeCat", domNode.getNodeType()));
+ if(domNode.getNodeTags() != null){
+ for(int ind = 0; ind < domNode.getNodeTags().size(); ind++){
+ String tag = domNode.getNodeTags(ind);
+// if(tag.equals("TMP")){
+ feats.add(new Feature("DominatingTmpTag", tag));
+// }
+ }
+ }
+ TreebankNode parent = domNode.getParent();
+ if(parent != null){
+ feats.add(new Feature("DominatingTreeParent", domNode.getNodeType()));
+ do{
+ if(parent.getNodeTags() != null){
+ for(int ind = 0; ind < parent.getNodeTags().size(); ind++){
+ String tag = parent.getNodeTags(ind);
+// if(tag.equals("TMP")){
+ feats.add(new Feature("DominatingAncestorTmpTag", tag));
+// }
+ }
+ }
+ parent = parent.getParent();
+ }while(parent != null);
+ }
+
+ if(domNode.getLeaf()){
+ feats.add(new Feature("DominatingIsLeaf"));
+ }else{
+ StringBuffer buffer = new StringBuffer();
+ for(int i = 0; i < domNode.getChildren().size(); i++){
+ buffer.append(domNode.getChildren(i).getNodeType());
+ buffer.append("_");
+ feats.add(new Feature("DominatingChildBag" + domNode.getChildren(i).getNodeType()));
+ }
+ feats.add(new Feature("DominatingProduction", buffer.toString()));
+ }
+ if(domNode.getBegin() == begin && domNode.getEnd() == end){
+ feats.add(new Feature("DominatingExactMatch"));
+ }
+ }
+ return feats;
+ }
+
+}
Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseSpanFeatureExtractor.java
------------------------------------------------------------------------------
svn:mime-type = text/plain