You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/05/03 17:50:15 UTC
svn commit: r1478850 - in
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal:
ae/ConstituencyBasedTimeAnnotator.java eval/EvaluationOfTimeSpans.java
Author: tmill
Date: Fri May 3 15:50:14 2013
New Revision: 1478850
URL: http://svn.apache.org/r1478850
Log:
Checked in some code for potentially adding 2 annotators for time spans.
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java?rev=1478850&r1=1478849&r2=1478850&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java Fri May 3 15:50:14 2013
@@ -2,7 +2,9 @@ package org.apache.ctakes.temporal.ae;
import java.io.File;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
@@ -20,7 +22,7 @@ import org.cleartk.classifier.DataWriter
import org.cleartk.classifier.Feature;
import org.cleartk.classifier.Instance;
import org.cleartk.classifier.feature.extractor.CleartkExtractor;
-import static org.cleartk.classifier.feature.extractor.CleartkExtractor.*;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Bag;
import org.cleartk.classifier.feature.extractor.CleartkExtractor.Covered;
import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType;
@@ -89,12 +91,20 @@ TemporalEntityAnnotator_ImplBase {
public void process(JCas jCas, Segment segment)
throws AnalysisEngineProcessException {
+ HashSet<TimeMention> mentions = new HashSet<TimeMention>(JCasUtil.selectCovered(TimeMention.class, segment));
+
for(TopTreebankNode root : JCasUtil.selectCovered(TopTreebankNode.class, segment)){
- processNode(jCas, root.getChildren(0));
+ recursivelyProcessNode(jCas, root.getChildren(0), NON_MENTION, mentions);
}
+// if(mentions.size() > 0){
+// System.out.println("Remaining mentions:");
+// for(TimeMention mention : mentions){
+// System.out.println(mention.getCoveredText());
+// }
+// }
}
- private void processNode(JCas jCas, TreebankNode node) throws CleartkProcessingException {
+ private void recursivelyProcessNode(JCas jCas, TreebankNode node, String parentCategory, Set<TimeMention> mentions) throws CleartkProcessingException {
// accumulate features:
ArrayList<Feature> features = new ArrayList<Feature>();
String category = NON_MENTION;
@@ -103,6 +113,7 @@ TemporalEntityAnnotator_ImplBase {
if(node.getParent().getParent() == null) features.add(new Feature("IS_ROOT"));
features.add(new Feature("NODE_LABEL", node.getNodeType()));
features.add(new Feature("PARENT_LABEL", node.getParent().getNodeType()));
+ features.add(new Feature("PARENT_CAT", parentCategory));
if(node.getLeaf()){
features.add(new Feature("IS_LEAF"));
@@ -111,7 +122,9 @@ TemporalEntityAnnotator_ImplBase {
for(int i = 0; i < node.getChildren().size(); i++){
buffer.append(node.getChildren(i).getNodeType());
buffer.append("_");
+ features.add(new Feature("CHILD_BAG", node.getChildren(i).getNodeType()));
}
+// features.add(new Feature("NUM_TOKENS", JCasUtil.selectCovered(BaseToken.class, node).size()));
features.add(new Feature("PRODUCTION", buffer.toString()));
}
@@ -119,12 +132,13 @@ TemporalEntityAnnotator_ImplBase {
for(SimpleFeatureExtractor extractor : featureExtractors){
features.addAll(extractor.extract(jCas, node));
}
-
+
if(this.isTraining()){
List<TimeMention> goldMentions = JCasUtil.selectCovered(TimeMention.class, node);
for(TimeMention mention : goldMentions){
if(mention.getBegin() == node.getBegin() && mention.getEnd() == node.getEnd()){
category = MENTION;
+ mentions.remove(mention);
}
}
this.dataWriter.write(new Instance<String>(category, features));
@@ -142,8 +156,7 @@ TemporalEntityAnnotator_ImplBase {
for(int i = 0; i < node.getChildren().size(); i++){
TreebankNode child = node.getChildren(i);
- processNode(jCas, child);
+ recursivelyProcessNode(jCas, child, category, mentions);
}
}
-
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java?rev=1478850&r1=1478849&r2=1478850&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java Fri May 3 15:50:14 2013
@@ -24,6 +24,7 @@ import java.util.List;
import java.util.logging.Level;
import org.apache.ctakes.temporal.ae.ConstituencyBasedTimeAnnotator;
+import org.apache.ctakes.temporal.ae.TimeAnnotator;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.jcas.JCas;
@@ -32,6 +33,7 @@ import org.apache.uima.resource.Resource
import org.cleartk.classifier.jar.JarClassifierBuilder;
import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
import org.cleartk.eval.AnnotationStatistics;
+import org.uimafit.factory.AnalysisEngineFactory;
import com.lexicalscope.jewel.cli.CliFactory;
@@ -77,6 +79,10 @@ public class EvaluationOfTimeSpans exten
@Override
protected AnalysisEngineDescription getAnnotatorDescription(File directory)
throws ResourceInitializationException {
+// return AnalysisEngineFactory.createAggregateDescription(
+// TimeAnnotator.createAnnotatorDescription(directory),
+// AnalysisEngineFactory.createPrimitiveDescription(RemoveTreeAlignedMentions.class),
+// ConstituencyBasedTimeAnnotator.createAnnotatorDescription(directory));
return ConstituencyBasedTimeAnnotator.createAnnotatorDescription(directory);
}