You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2013/01/29 19:56:22 UTC
svn commit: r1440057 - in
/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor:
ae/RelationExtractorAnnotator.java eval/RelationExtractorEvaluation.java
Author: stevenbethard
Date: Tue Jan 29 18:56:22 2013
New Revision: 1440057
URL: http://svn.apache.org/viewvc?rev=1440057&view=rev
Log:
Fixes view handling in RelationExtractorEvaluation so that RelationExtractorAnnotator no longer has to be view-aware
Modified:
incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java?rev=1440057&r1=1440056&r2=1440057&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java Tue Jan 29 18:56:22 2013
@@ -25,19 +25,6 @@ import java.util.List;
import java.util.Map;
import java.util.Random;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CASException;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.cleartk.classifier.CleartkAnnotator;
-import org.cleartk.classifier.CleartkProcessingException;
-import org.cleartk.classifier.Feature;
-import org.cleartk.classifier.Instance;
-import org.uimafit.descriptor.ConfigurationParameter;
-import org.uimafit.util.JCasUtil;
-
import org.apache.ctakes.relationextractor.ae.features.DependencyPathFeaturesExtractor;
import org.apache.ctakes.relationextractor.ae.features.DependencyTreeFeaturesExtractor;
import org.apache.ctakes.relationextractor.ae.features.NamedEntityFeaturesExtractor;
@@ -47,8 +34,17 @@ import org.apache.ctakes.relationextract
import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.CleartkProcessingException;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.Instance;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.util.JCasUtil;
import com.google.common.collect.Lists;
@@ -56,14 +52,6 @@ public abstract class RelationExtractorA
public static final String NO_RELATION_CATEGORY = "-NONE-";
- public static final String PARAM_GOLD_VIEW_NAME = "GoldViewName";
-
- @ConfigurationParameter(
- name = PARAM_GOLD_VIEW_NAME,
- mandatory = false,
- description = "view containing the manual relation annotations; needed for training")
- protected String goldViewName;
-
public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE = "ProbabilityOfKeepingANegativeExample";
@ConfigurationParameter(
@@ -92,14 +80,6 @@ public abstract class RelationExtractorA
new DependencyPathFeaturesExtractor()
);
}
-
- @Override
- public void initialize(UimaContext context) throws ResourceInitializationException {
- super.initialize(context);
- if (this.isTraining() && this.goldViewName == null) {
- throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during training");
- }
- }
/**
* Selects the relevant mentions/annotations within a sentence for relation identification/extraction.
@@ -111,17 +91,6 @@ public abstract class RelationExtractorA
*/
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
- // during training, pull entity and relation annotations from the manual annotation view
- JCas identifiedAnnotationView, relationView;
- if (this.isTraining()) {
- try {
- identifiedAnnotationView = relationView = jCas.getView(this.goldViewName);
- } catch (CASException e) {
- throw new AnalysisEngineProcessException(e);
- }
- } else {
- identifiedAnnotationView = relationView = jCas;
- }
// lookup from pair of annotations to binary text relation
// note: assumes that there will be at most one relation per pair
@@ -129,7 +98,7 @@ public abstract class RelationExtractorA
relationLookup = new HashMap<List<Annotation>, BinaryTextRelation>();
if (this.isTraining()) {
relationLookup = new HashMap<List<Annotation>, BinaryTextRelation>();
- for (BinaryTextRelation relation : JCasUtil.select(relationView, BinaryTextRelation.class)) {
+ for (BinaryTextRelation relation : JCasUtil.select(jCas, BinaryTextRelation.class)) {
Annotation arg1 = relation.getArg1().getArgument();
Annotation arg2 = relation.getArg2().getArgument();
// The key is a list of args so we can do bi-directional lookup
@@ -141,7 +110,7 @@ public abstract class RelationExtractorA
for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
// collect all relevant relation arguments from the sentence
- List<IdentifiedAnnotationPair> candidatePairs = this.getCandidateRelationArgumentPairs(identifiedAnnotationView, sentence);
+ List<IdentifiedAnnotationPair> candidatePairs = this.getCandidateRelationArgumentPairs(jCas, sentence);
// walk through the pairs of annotations
for (IdentifiedAnnotationPair pair : candidatePairs) {
@@ -186,15 +155,15 @@ public abstract class RelationExtractorA
}
// add the relation to the CAS
- RelationArgument relArg1 = new RelationArgument(relationView);
+ RelationArgument relArg1 = new RelationArgument(jCas);
relArg1.setArgument(arg1);
relArg1.setRole("Argument");
relArg1.addToIndexes();
- RelationArgument relArg2 = new RelationArgument(relationView);
+ RelationArgument relArg2 = new RelationArgument(jCas);
relArg2.setArgument(arg2);
relArg2.setRole("Related_to");
relArg2.addToIndexes();
- BinaryTextRelation relation = new BinaryTextRelation(relationView);
+ BinaryTextRelation relation = new BinaryTextRelation(jCas);
relation.setArg1(relArg1);
relation.setArg2(relArg2);
relation.setCategory(predictedCategory);
Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java?rev=1440057&r1=1440056&r2=1440057&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java (original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Tue Jan 29 18:56:22 2013
@@ -385,16 +385,14 @@ public class RelationExtractorEvaluation
RemoveOtherRelations.PARAM_RELATION_CATEGORY,
this.relationCategory),
CAS.NAME_DEFAULT_SOFA, GOLD_VIEW_NAME);
- // replace cTAKES entity mentions and modifiers in the system view with the gold annotations
- builder.add(AnalysisEngineFactory.createPrimitiveDescription(ReplaceCTakesEntityMentionsAndModifiersWithGold.class));
+ // remove cTAKES entity mentions and modifiers in the system view and copy in the gold relations
+ builder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveCTakesMentionsAndCopyGoldRelations.class));
// add the relation extractor, configured for training mode
AnalysisEngineDescription classifierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
this.classifierAnnotatorClass,
this.additionalParameters);
ConfigurationParameterFactory.addConfigurationParameters(
classifierAnnotator,
- RelationExtractorAnnotator.PARAM_GOLD_VIEW_NAME,
- RelationExtractorEvaluation.GOLD_VIEW_NAME,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
this.dataWriterClass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
@@ -430,7 +428,7 @@ public class RelationExtractorEvaluation
builder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveSmallerEntityMentions.class));
} else {
// replace cTAKES entity mentions and modifiers in the system view with the gold annotations
- builder.add(AnalysisEngineFactory.createPrimitiveDescription(ReplaceCTakesEntityMentionsAndModifiersWithGold.class));
+ builder.add(AnalysisEngineFactory.createPrimitiveDescription(ReplaceCTakesMentionsWithGoldMentions.class));
}
// add the relation extractor, configured for classification mode
AnalysisEngineDescription classifierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
@@ -715,11 +713,10 @@ public class RelationExtractorEvaluation
}
/**
- * Annotator that removes cTAKES EntityMentions and Modifiers from the system view, and copies
- * over the manually annotated EntityMentions and Modifiers from the gold view.
- *
+ * Annotator that removes cTAKES mentions in the system view and copies relations from the gold
+ * view to the system view
*/
- public static class ReplaceCTakesEntityMentionsAndModifiersWithGold extends
+ public static class RemoveCTakesMentionsAndCopyGoldRelations extends
JCasAnnotator_ImplBase {
@Override
@@ -731,7 +728,7 @@ public class RelationExtractorEvaluation
} catch (CASException e) {
throw new AnalysisEngineProcessException(e);
}
-
+
// remove cTAKES EntityMentions and Modifiers from system view
List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
@@ -745,16 +742,31 @@ public class RelationExtractorEvaluation
goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
+ Feature sofaFeature = jCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
for (IdentifiedAnnotation goldMention : goldMentions) {
Annotation copy = (Annotation) copier.copyFs(goldMention);
- Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
copy.setFeatureValue(sofaFeature, systemView.getSofa());
copy.addToIndexes();
}
+
+ // copy gold relations to the system view
+ for (BinaryTextRelation goldRelation : JCasUtil.select(goldView, BinaryTextRelation.class)) {
+ BinaryTextRelation relation = (BinaryTextRelation) copier.copyFs(goldRelation);
+ relation.addToIndexes(systemView);
+ for (RelationArgument relArg : Lists.newArrayList(relation.getArg1(), relation.getArg2())) {
+ relArg.addToIndexes(systemView);
+ // relArg.getArgument() should have been added to indexes with mentions above
+ }
+ }
}
}
- public static class ReplaceGoldEntityMentionsAndModifiersWithCTakes extends
+ /**
+ * Annotator that removes cTAKES EntityMentions and Modifiers from the system view, and copies
+ * over the manually annotated EntityMentions and Modifiers from the gold view.
+ *
+ */
+ public static class ReplaceCTakesMentionsWithGoldMentions extends
JCasAnnotator_ImplBase {
@Override
@@ -767,73 +779,28 @@ public class RelationExtractorEvaluation
throw new AnalysisEngineProcessException(e);
}
- // remove manual EntityMentions and Modifiers from gold view
- List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
- goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
- goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
- for (IdentifiedAnnotation goldMention : goldMentions) {
- goldMention.removeFromIndexes();
- }
-
- // copy cTAKES EntityMentions and Modifiers to gold view
+ // remove cTAKES EntityMentions and Modifiers from system view
List<IdentifiedAnnotation> cTakesMentions = new ArrayList<IdentifiedAnnotation>();
cTakesMentions.addAll(JCasUtil.select(systemView, EntityMention.class));
cTakesMentions.addAll(JCasUtil.select(systemView, Modifier.class));
- CasCopier copier = new CasCopier(systemView.getCas(), goldView.getCas());
for (IdentifiedAnnotation cTakesMention : cTakesMentions) {
- Annotation copy = (Annotation) copier.copyFs(cTakesMention);
- Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
- copy.setFeatureValue(sofaFeature, goldView.getSofa());
- copy.addToIndexes();
+ cTakesMention.removeFromIndexes();
}
- // replace gold EntityMentions and Modifiers in relations with cTAKES ones
- List<BinaryTextRelation> relations = new ArrayList<BinaryTextRelation>();
- relations.addAll(JCasUtil.select(goldView, BinaryTextRelation.class));
- for (BinaryTextRelation relation : relations) {
-
- // attempt to replace the gold RelationArguments with system ones
- for (RelationArgument relArg : Arrays.asList(relation.getArg1(), relation.getArg2())) {
- IdentifiedAnnotation goldArg = (IdentifiedAnnotation) relArg.getArgument();
- Class<? extends IdentifiedAnnotation> argClass = goldArg.getClass();
-
- // find all annotations covered by the gold argument and of the same class (these should
- // be the ones copied over from the cTAKES output earlier)
- List<? extends IdentifiedAnnotation> systemArgs = JCasUtil.selectCovered(
- goldView,
- argClass,
- goldArg);
-
- // find the largest covered annotation that has the same type
- IdentifiedAnnotation bestFitArg = null;
- int maxSize = 0;
- for (IdentifiedAnnotation systemArg : systemArgs) {
- int size = systemArg.getEnd() - systemArg.getBegin();
- if (size >= maxSize && goldArg.getTypeID() == systemArg.getTypeID()) {
- maxSize = size;
- bestFitArg = systemArg;
- }
- }
- if (bestFitArg != null) {
- relArg.setArgument(bestFitArg);
- }
-
- // log a message if we didn't find a perfect match
- if (maxSize != goldArg.getEnd() - goldArg.getBegin()) {
- List<String> choices = new ArrayList<String>();
- for (IdentifiedAnnotation systemArg : systemArgs) {
- choices.add(format(systemArg));
- }
- String actionFormat = bestFitArg == null ? "dropping" : "using %s instead of";
- String action = String.format(actionFormat, format(bestFitArg));
- String message = String.format("%s %s; choices: %s", action, format(goldArg), choices);
- this.getContext().getLogger().log(Level.WARNING, message);
- }
- }
+ // copy gold EntityMentions and Modifiers to the system view
+ List<IdentifiedAnnotation> goldMentions = new ArrayList<IdentifiedAnnotation>();
+ goldMentions.addAll(JCasUtil.select(goldView, EntityMention.class));
+ goldMentions.addAll(JCasUtil.select(goldView, Modifier.class));
+ CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
+ for (IdentifiedAnnotation goldMention : goldMentions) {
+ Annotation copy = (Annotation) copier.copyFs(goldMention);
+ Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
+ copy.setFeatureValue(sofaFeature, systemView.getSofa());
+ copy.addToIndexes();
}
}
}
-
+
static String format(IdentifiedAnnotation a) {
return a == null ? null : String.format("\"%s\"(type=%d)", a.getCoveredText(), a.getTypeID());
}