You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/07/16 16:37:39 UTC
svn commit: r1611026 -
/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CoreferenceChainAnnotator.java
Author: tmill
Date: Wed Jul 16 14:37:38 2014
New Revision: 1611026
URL: http://svn.apache.org/r1611026
Log:
CTAKES-199: Moved chain annotator coref classes into temporal for event coreference.
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CoreferenceChainAnnotator.java
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CoreferenceChainAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CoreferenceChainAnnotator.java?rev=1611026&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CoreferenceChainAnnotator.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CoreferenceChainAnnotator.java Wed Jul 16 14:37:38 2014
@@ -0,0 +1,123 @@
+package org.apache.ctakes.temporal.ae;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
+import org.apache.ctakes.typesystem.type.relation.CoreferenceRelation;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.EmptyFSList;
+import org.apache.uima.jcas.cas.NonEmptyFSList;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+/*
+ * Does not find coreference -- simply turns annotated pairs into chains of clustered mentions
+ */
+public class CoreferenceChainAnnotator extends JCasAnnotator_ImplBase {
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ Collection<CoreferenceRelation> pairs = JCasUtil.select(jCas, CoreferenceRelation.class);
+ Map<Annotation,Set<Annotation>> chains = new HashMap<>();
+
+ for(CoreferenceRelation pair : pairs){
+ Annotation ante = pair.getArg1().getArgument();
+ Annotation ana = pair.getArg2().getArgument();
+
+ /* 3 cases:
+ * 1) Only antecedent is in a chain -- add anaphor to that chain
+ * 2) Only anaphor is in a chain -- add antecedent to that chain
+ * 3) Both in different chains -- join the chains
+ * 4) Both in same chain -- do nothing
+ * 5) Neither in a chain -- create new chain
+ */
+ if(chains.containsKey(ante) && !chains.containsKey(ana)){
+ // 1
+ chains.get(ante).add(ana);
+ chains.put(ana, chains.get(ante));
+ }else if(chains.containsKey(ana) && !chains.containsKey(ante)){
+ // 2
+ chains.get(ana).add(ante);
+ chains.put(ante, chains.get(ana));
+ }else if(chains.containsKey(ante) && chains.containsKey(ana)){
+ if(!chains.get(ante).equals(chains.get(ana))){
+ // 3
+ Set<Annotation> anteChain = chains.get(ante);
+ Set<Annotation> anaChain = chains.get(ana);
+ anteChain.addAll(anaChain);
+ chains.put(ana, anteChain);
+ // make all annotations in ana chain point to ante chain:
+ for(Annotation markable : anaChain){
+ chains.put(markable, anteChain);
+ }
+ }
+ // else 4, which do nothing
+ }else{
+ // 5
+ Set<Annotation> newChain = new HashSet<Annotation>();
+ newChain.add(ante);
+ newChain.add(ana);
+ chains.put(ante, newChain);
+ chains.put(ana, newChain);
+ }
+ }
+
+ // convert java Sets into ordered UIMA lists.
+ for(Set<Annotation> mentionSet : new HashSet<Set<Annotation>>(chains.values())){
+ List<Annotation> sortedMentions = new ArrayList<>(mentionSet);
+ Collections.sort(sortedMentions, new AnnotationComparator());
+ CollectionTextRelation chain = new CollectionTextRelation(jCas);
+ NonEmptyFSList list = new NonEmptyFSList(jCas);
+ chain.setMembers(list);
+ list.addToIndexes();
+ for(int i = 0; i < sortedMentions.size(); i++){
+ Annotation mention = sortedMentions.get(i);
+ list.setHead(mention);
+ if(i == (sortedMentions.size() - 1)){
+ list.setTail(new EmptyFSList(jCas));
+ list.getTail().addToIndexes();
+ }else{
+ list.setTail(new NonEmptyFSList(jCas));
+ list = (NonEmptyFSList) list.getTail();
+ list.addToIndexes();
+ }
+ }
+ chain.addToIndexes();
+ }
+ }
+
+ private class AnnotationComparator implements Comparator<Annotation> {
+
+ @Override
+ public int compare(Annotation o1, Annotation o2) {
+ if(o1.getBegin() < o2.getBegin()){
+ return -1;
+ }else if(o1.getBegin() == o2.getBegin() && o1.getEnd() < o2.getEnd()){
+ return -1;
+ }else if(o1.getBegin() == o2.getBegin() && o1.getEnd() > o2.getEnd()){
+ return 1;
+ }else if(o2.getBegin() < o1.getBegin()){
+ return 1;
+ }else{
+ return 0;
+ }
+ }
+ }
+
+ public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException{
+ return AnalysisEngineFactory.createPrimitiveDescription(CoreferenceChainAnnotator.class);
+ }
+}