You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2012/12/21 18:46:01 UTC

svn commit: r1425046 - in /incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/EventAnnotator.java ae/feature/PredicateArgumentExtractor.java ae/feature/SRLExtractor.java eval/EvaluationOfEventSpans.java

Author: stevenbethard
Date: Fri Dec 21 17:46:00 2012
New Revision: 1425046

URL: http://svn.apache.org/viewvc?rev=1425046&view=rev
Log:
Rewrites predicate/argument extractor to be much more efficient.

Added:
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PredicateArgumentExtractor.java
      - copied, changed from r1424631, incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java
Removed:
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java
Modified:
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java?rev=1425046&r1=1425045&r2=1425046&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java (original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java Fri Dec 21 17:46:00 2012
@@ -27,7 +27,7 @@ import java.util.List;
 import java.util.Random;
 
 import org.apache.ctakes.temporal.ae.feature.ChunkingExtractor;
-import org.apache.ctakes.temporal.ae.feature.SRLExtractor;
+import org.apache.ctakes.temporal.ae.feature.PredicateArgumentExtractor;
 import org.apache.ctakes.temporal.ae.feature.selection.Chi2FeatureSelection;
 import org.apache.ctakes.temporal.ae.feature.selection.FeatureSelection;
 import org.apache.ctakes.typesystem.type.constants.CONST;
@@ -164,8 +164,7 @@ public class EventAnnotator extends Clea
     this.tokenFeatureExtractor = new CombinedExtractor(
         new CoveredTextExtractor(),
         new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
-        new TypePathExtractor(BaseToken.class, "partOfSpeech"),
-        new SRLExtractor());
+        new TypePathExtractor(BaseToken.class, "partOfSpeech"));
     this.contextFeatureExtractor = new CleartkExtractor(
         BaseToken.class,
         this.tokenFeatureExtractor,
@@ -189,6 +188,7 @@ public class EventAnnotator extends Clea
 
   @Override
   public void process(JCas jCas) throws AnalysisEngineProcessException {
+    PredicateArgumentExtractor predicateArgumentExtractor = new PredicateArgumentExtractor(jCas);
 
     Random rand = new Random();
     // classify tokens within each sentence
@@ -241,13 +241,16 @@ public class EventAnnotator extends Clea
         // features from token attributes
         features.addAll(this.tokenFeatureExtractor.extract(jCas, token));
 
-        // features from surrounding tokens
-        features.addAll(this.contextFeatureExtractor.extractWithin(jCas, token, sentence));
-
         // features from surrounding entity, phrase, etc. chunk-labels
         for (ChunkingExtractor extractor : chunkingExtractors) {
           features.addAll(extractor.extract(tokenIndex, nChunkLabelsBefore, nChunkLabelsAfter));
         }
+        
+        // features from semantic roles
+        features.addAll(predicateArgumentExtractor.extract(token));
+
+        // features from surrounding tokens
+        features.addAll(this.contextFeatureExtractor.extractWithin(jCas, token, sentence));
 
         // features from previous classifications
         for (int i = nPreviousClassifications; i > 0; --i) {
@@ -255,7 +258,7 @@ public class EventAnnotator extends Clea
           String previousOutcome = index < 0 ? "O" : outcomes.get(index);
           features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
         }
-
+        
         // apply feature selection, if necessary
         if (this.featureSelection != null) {
           features = this.featureSelection.transform(features);

Copied: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PredicateArgumentExtractor.java (from r1424631, incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java)
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PredicateArgumentExtractor.java?p2=incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PredicateArgumentExtractor.java&p1=incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java&r1=1424631&r2=1425046&rev=1425046&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java (original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PredicateArgumentExtractor.java Fri Dec 21 17:46:00 2012
@@ -18,7 +18,7 @@
  */
 package org.apache.ctakes.temporal.ae.feature;
 
-import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
@@ -26,63 +26,48 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textsem.SemanticArgument;
 import org.apache.ctakes.typesystem.type.textsem.SemanticRoleRelation;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
 import org.cleartk.classifier.Feature;
-import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
-import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
 import org.uimafit.util.JCasUtil;
 
-public class SRLExtractor implements SimpleFeatureExtractor {
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Multimap;
 
-  @Override
-  public List<Feature> extract(JCas jCas, Annotation focusAnnotation)
-      throws CleartkExtractorException {
-    // and cache the results so that we only do this once per CAS
-	String jCasText = jCas.getDocumentText();
-	String roleFeat = "SemanticRole";
-	String roleVerbFeat = "RoleAndVerb";
-	String verb = "noVerb";
-    Feature role = new Feature(roleFeat, "NoRole");
-    Feature roleVerb = new Feature(roleVerbFeat, "NoRole"+verb);
-    ArrayList<Feature> features = new ArrayList<Feature>();
-    for (Predicate predicate : JCasUtil.select(jCas, Predicate.class)) {
+public class PredicateArgumentExtractor {
+
+  private Multimap<BaseToken, Predicate> tokenPredicateMap;
+
+  private Multimap<BaseToken, SemanticArgument> tokenArgumentMap;
 
+  public PredicateArgumentExtractor(JCas jCas) {
+    this.tokenPredicateMap = ArrayListMultimap.create();
+    for (Predicate predicate : JCasUtil.select(jCas, Predicate.class)) {
       for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, predicate)) {
-        if (token.equals(focusAnnotation)) {// token.getBegin()==focusAnnotation.getBegin()){
-          role = new Feature(roleFeat,"Predicate");
-          verb = jCasText.substring(predicate.getBegin(), predicate.getEnd());
-          roleVerb = new Feature(roleVerbFeat, "Predicate::"+verb);
-          
-          features.add(role);
-          features.add(roleVerb);
-          return features;
-        }
+        this.tokenPredicateMap.put(token, predicate);
       }
-
-      for (SemanticRoleRelation relation : JCasUtil.select(
-          predicate.getRelations(),
-          SemanticRoleRelation.class)) {
-        SemanticArgument arg = relation.getArgument();
-        // System.out.format("\tArg: %s=%s \n", arg.getLabel(), arg.getCoveredText());
-        for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, arg)) {
-          if (token.equals(focusAnnotation)) {// token.getBegin()==focusAnnotation.getBegin()){
-            String label = arg.getLabel();
-            Predicate currentPred = relation.getPredicate();
-            verb = jCasText.substring(currentPred.getBegin(), currentPred.getEnd());
-            role = new Feature(roleFeat, label);
-            roleVerb = new Feature(roleVerbFeat, label+"::"+verb);
-            
-            features.add(role);
-            features.add(roleVerb);
-            return features;
-          }
-        }
+    }
+    this.tokenArgumentMap = ArrayListMultimap.create();
+    for (SemanticArgument argument : JCasUtil.select(jCas, SemanticArgument.class)) {
+      for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, argument)) {
+        this.tokenArgumentMap.put(token, argument);
       }
     }
+  }
 
-    features.add(role);
-    features.add(roleVerb);
+  public List<Feature> extract(BaseToken token) {
+    List<Feature> features = Lists.newArrayList();
+    Collection<Predicate> predicates = this.tokenPredicateMap.get(token);
+    features.add(new Feature("Predicate", !predicates.isEmpty()));
+    for (Predicate predicate : predicates) {
+      features.add(new Feature("Predicate_Lex", predicate.getCoveredText()));
+    }
+    for (SemanticArgument argument : this.tokenArgumentMap.get(token)) {
+      SemanticRoleRelation relation = argument.getRelation();
+      String category = relation.getCategory();
+      features.add(new Feature("Argument", category));
+      String predicateText = relation.getPredicate().getCoveredText();
+      features.add(new Feature("Argument_Lex", String.format("%s_%s", category, predicateText)));
+    }
     return features;
   }
-
 }

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java?rev=1425046&r1=1425045&r2=1425046&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java (original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java Fri Dec 21 17:46:00 2012
@@ -86,7 +86,9 @@ public class EvaluationOfEventSpans exte
       float featureSelectionThreshold) {
     super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, EnumSet.of(
         AnnotatorType.PART_OF_SPEECH_TAGS,
-        AnnotatorType.CHUNKS));
+        AnnotatorType.CHUNKS,
+        AnnotatorType.DEPENDENCIES,
+        AnnotatorType.SEMANTIC_ROLES));
         //AnnotatorType.UMLS_NAMED_ENTITIES,
         //AnnotatorType.LEXICAL_VARIANTS,
         //AnnotatorType.DEPENDENCIES,