You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2012/12/21 18:46:01 UTC
svn commit: r1425046 - in
/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal:
ae/EventAnnotator.java ae/feature/PredicateArgumentExtractor.java
ae/feature/SRLExtractor.java eval/EvaluationOfEventSpans.java
Author: stevenbethard
Date: Fri Dec 21 17:46:00 2012
New Revision: 1425046
URL: http://svn.apache.org/viewvc?rev=1425046&view=rev
Log:
Rewrites predicate/argument extractor to be much more efficient.
Added:
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PredicateArgumentExtractor.java
- copied, changed from r1424631, incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java
Removed:
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java
Modified:
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java
Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java?rev=1425046&r1=1425045&r2=1425046&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java (original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java Fri Dec 21 17:46:00 2012
@@ -27,7 +27,7 @@ import java.util.List;
import java.util.Random;
import org.apache.ctakes.temporal.ae.feature.ChunkingExtractor;
-import org.apache.ctakes.temporal.ae.feature.SRLExtractor;
+import org.apache.ctakes.temporal.ae.feature.PredicateArgumentExtractor;
import org.apache.ctakes.temporal.ae.feature.selection.Chi2FeatureSelection;
import org.apache.ctakes.temporal.ae.feature.selection.FeatureSelection;
import org.apache.ctakes.typesystem.type.constants.CONST;
@@ -164,8 +164,7 @@ public class EventAnnotator extends Clea
this.tokenFeatureExtractor = new CombinedExtractor(
new CoveredTextExtractor(),
new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
- new TypePathExtractor(BaseToken.class, "partOfSpeech"),
- new SRLExtractor());
+ new TypePathExtractor(BaseToken.class, "partOfSpeech"));
this.contextFeatureExtractor = new CleartkExtractor(
BaseToken.class,
this.tokenFeatureExtractor,
@@ -189,6 +188,7 @@ public class EventAnnotator extends Clea
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
+ PredicateArgumentExtractor predicateArgumentExtractor = new PredicateArgumentExtractor(jCas);
Random rand = new Random();
// classify tokens within each sentence
@@ -241,13 +241,16 @@ public class EventAnnotator extends Clea
// features from token attributes
features.addAll(this.tokenFeatureExtractor.extract(jCas, token));
- // features from surrounding tokens
- features.addAll(this.contextFeatureExtractor.extractWithin(jCas, token, sentence));
-
// features from surrounding entity, phrase, etc. chunk-labels
for (ChunkingExtractor extractor : chunkingExtractors) {
features.addAll(extractor.extract(tokenIndex, nChunkLabelsBefore, nChunkLabelsAfter));
}
+
+ // features from semantic roles
+ features.addAll(predicateArgumentExtractor.extract(token));
+
+ // features from surrounding tokens
+ features.addAll(this.contextFeatureExtractor.extractWithin(jCas, token, sentence));
// features from previous classifications
for (int i = nPreviousClassifications; i > 0; --i) {
@@ -255,7 +258,7 @@ public class EventAnnotator extends Clea
String previousOutcome = index < 0 ? "O" : outcomes.get(index);
features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
}
-
+
// apply feature selection, if necessary
if (this.featureSelection != null) {
features = this.featureSelection.transform(features);
Copied: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PredicateArgumentExtractor.java (from r1424631, incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java)
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PredicateArgumentExtractor.java?p2=incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PredicateArgumentExtractor.java&p1=incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java&r1=1424631&r2=1425046&rev=1425046&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java (original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PredicateArgumentExtractor.java Fri Dec 21 17:46:00 2012
@@ -18,7 +18,7 @@
*/
package org.apache.ctakes.temporal.ae.feature;
-import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
@@ -26,63 +26,48 @@ import org.apache.ctakes.typesystem.type
import org.apache.ctakes.typesystem.type.textsem.SemanticArgument;
import org.apache.ctakes.typesystem.type.textsem.SemanticRoleRelation;
import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.classifier.Feature;
-import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
-import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
import org.uimafit.util.JCasUtil;
-public class SRLExtractor implements SimpleFeatureExtractor {
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Multimap;
- @Override
- public List<Feature> extract(JCas jCas, Annotation focusAnnotation)
- throws CleartkExtractorException {
- // and cache the results so that we only do this once per CAS
- String jCasText = jCas.getDocumentText();
- String roleFeat = "SemanticRole";
- String roleVerbFeat = "RoleAndVerb";
- String verb = "noVerb";
- Feature role = new Feature(roleFeat, "NoRole");
- Feature roleVerb = new Feature(roleVerbFeat, "NoRole"+verb);
- ArrayList<Feature> features = new ArrayList<Feature>();
- for (Predicate predicate : JCasUtil.select(jCas, Predicate.class)) {
+public class PredicateArgumentExtractor {
+
+ private Multimap<BaseToken, Predicate> tokenPredicateMap;
+
+ private Multimap<BaseToken, SemanticArgument> tokenArgumentMap;
+ public PredicateArgumentExtractor(JCas jCas) {
+ this.tokenPredicateMap = ArrayListMultimap.create();
+ for (Predicate predicate : JCasUtil.select(jCas, Predicate.class)) {
for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, predicate)) {
- if (token.equals(focusAnnotation)) {// token.getBegin()==focusAnnotation.getBegin()){
- role = new Feature(roleFeat,"Predicate");
- verb = jCasText.substring(predicate.getBegin(), predicate.getEnd());
- roleVerb = new Feature(roleVerbFeat, "Predicate::"+verb);
-
- features.add(role);
- features.add(roleVerb);
- return features;
- }
+ this.tokenPredicateMap.put(token, predicate);
}
-
- for (SemanticRoleRelation relation : JCasUtil.select(
- predicate.getRelations(),
- SemanticRoleRelation.class)) {
- SemanticArgument arg = relation.getArgument();
- // System.out.format("\tArg: %s=%s \n", arg.getLabel(), arg.getCoveredText());
- for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, arg)) {
- if (token.equals(focusAnnotation)) {// token.getBegin()==focusAnnotation.getBegin()){
- String label = arg.getLabel();
- Predicate currentPred = relation.getPredicate();
- verb = jCasText.substring(currentPred.getBegin(), currentPred.getEnd());
- role = new Feature(roleFeat, label);
- roleVerb = new Feature(roleVerbFeat, label+"::"+verb);
-
- features.add(role);
- features.add(roleVerb);
- return features;
- }
- }
+ }
+ this.tokenArgumentMap = ArrayListMultimap.create();
+ for (SemanticArgument argument : JCasUtil.select(jCas, SemanticArgument.class)) {
+ for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, argument)) {
+ this.tokenArgumentMap.put(token, argument);
}
}
+ }
- features.add(role);
- features.add(roleVerb);
+ public List<Feature> extract(BaseToken token) {
+ List<Feature> features = Lists.newArrayList();
+ Collection<Predicate> predicates = this.tokenPredicateMap.get(token);
+ features.add(new Feature("Predicate", !predicates.isEmpty()));
+ for (Predicate predicate : predicates) {
+ features.add(new Feature("Predicate_Lex", predicate.getCoveredText()));
+ }
+ for (SemanticArgument argument : this.tokenArgumentMap.get(token)) {
+ SemanticRoleRelation relation = argument.getRelation();
+ String category = relation.getCategory();
+ features.add(new Feature("Argument", category));
+ String predicateText = relation.getPredicate().getCoveredText();
+ features.add(new Feature("Argument_Lex", String.format("%s_%s", category, predicateText)));
+ }
return features;
}
-
}
Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java?rev=1425046&r1=1425045&r2=1425046&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java (original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java Fri Dec 21 17:46:00 2012
@@ -86,7 +86,9 @@ public class EvaluationOfEventSpans exte
float featureSelectionThreshold) {
super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, EnumSet.of(
AnnotatorType.PART_OF_SPEECH_TAGS,
- AnnotatorType.CHUNKS));
+ AnnotatorType.CHUNKS,
+ AnnotatorType.DEPENDENCIES,
+ AnnotatorType.SEMANTIC_ROLES));
//AnnotatorType.UMLS_NAMED_ENTITIES,
//AnnotatorType.LEXICAL_VARIANTS,
//AnnotatorType.DEPENDENCIES,