You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2016/12/06 22:02:28 UTC
svn commit: r1772966 - in /ctakes/trunk:
ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/
ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/
ctakes-coreference/src/main/java/org/apache/ctakes/coreference/eval/ ...
Author: tmill
Date: Tue Dec 6 22:02:28 2016
New Revision: 1772966
URL: http://svn.apache.org/viewvc?rev=1772966&view=rev
Log:
Coreference annotator creates Events at end of coref process.
Removed:
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/CoreferencePipelineFactory.java
Modified:
ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableHeadTreeCreator.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/factory/CoreferenceAnnotatorFactory.java
ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/struct/CounterMap.java
Modified: ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java?rev=1772966&r1=1772965&r2=1772966&view=diff
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java (original)
+++ ctakes/trunk/ctakes-clinical-pipeline/src/main/java/org/apache/ctakes/clinicalpipeline/ClinicalPipelineFactory.java Tue Dec 6 22:02:28 2016
@@ -18,7 +18,21 @@
*/
package org.apache.ctakes.clinicalpipeline;
-import org.apache.ctakes.assertion.medfacts.cleartk.*;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.ctakes.assertion.medfacts.cleartk.ConditionalCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.GenericCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.HistoryCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.SubjectCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.UncertaintyCleartkAnalysisEngine;
import org.apache.ctakes.chunker.ae.Chunker;
import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
@@ -26,6 +40,7 @@ import org.apache.ctakes.contexttokenize
import org.apache.ctakes.core.ae.SentenceDetector;
import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
+import org.apache.ctakes.coreference.factory.CoreferenceAnnotatorFactory;
import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator;
import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
@@ -51,11 +66,6 @@ import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.resource.ResourceInitializationException;
import org.xml.sax.SAXException;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.util.*;
-
final public class ClinicalPipelineFactory {
private ClinicalPipelineFactory() {
@@ -128,6 +138,16 @@ final public class ClinicalPipelineFacto
builder.add( ChunkAdjuster.createAnnotatorDescription( new String[] { "NP", "PP", "NP" }, 2 ) );
return builder.createAggregateDescription();
}
+
+ public static AnalysisEngineDescription getCoreferencePipeline() throws ResourceInitializationException, MalformedURLException {
+ AggregateBuilder builder = new AggregateBuilder();
+
+ builder.add(getFastPipeline());
+ builder.add(ConstituencyParser.createAnnotatorDescription());
+ builder.add(CoreferenceAnnotatorFactory.getDefaultCoreferencePipeline());
+
+ return builder.createAggregateDescription();
+ }
public static void main( final String... args ) throws IOException, UIMAException, SAXException {
// The note is easier to read when sentences are stacked - changed 3/16/2015 spf
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableHeadTreeCreator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableHeadTreeCreator.java?rev=1772966&r1=1772965&r2=1772966&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableHeadTreeCreator.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableHeadTreeCreator.java Tue Dec 6 22:02:28 2016
@@ -45,14 +45,14 @@ public class MarkableHeadTreeCreator ext
try{
docId = DocumentIDAnnotationUtil.getDocumentID(jcas);
}catch(Exception e){
- e.printStackTrace();
+ //System.err.println(e.getMessage());
}
if(docId == null || docId == DocumentIDAnnotationUtil.NO_DOCUMENT_ID){
try {
docId = ViewUriUtil.getURI(jcas).toString();
- } catch (AnalysisEngineProcessException e) {
- e.printStackTrace();
- logger.warn("No document ID found using traditional methods. Using ad hoc combination");
+ } catch (Exception e) {
+ //System.err.println(e.getMessage());
+ //logger.warn("No document ID found using traditional methods. Using ad hoc combination");
String docText = jcas.getDocumentText();
docId = docText.substring(0, Math.min(20, docText.length())) + "_hash=" + docText.hashCode();
}
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java?rev=1772966&r1=1772965&r2=1772966&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java Tue Dec 6 22:02:28 2016
@@ -1,13 +1,18 @@
package org.apache.ctakes.coreference.ae;
+import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
+
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
import org.apache.ctakes.core.util.ListFactory;
import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterAgreementFeaturesExtractor;
@@ -24,14 +29,30 @@ import org.apache.ctakes.coreference.ae.
import org.apache.ctakes.coreference.ae.pairing.cluster.HeadwordPairer;
import org.apache.ctakes.coreference.ae.pairing.cluster.SectionHeaderPairer;
import org.apache.ctakes.coreference.ae.pairing.cluster.SentenceDistancePairer;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
+import org.apache.ctakes.typesystem.type.refsem.AnatomicalSite;
+import org.apache.ctakes.typesystem.type.refsem.DiseaseDisorder;
+import org.apache.ctakes.typesystem.type.refsem.Element;
+import org.apache.ctakes.typesystem.type.refsem.Event;
+import org.apache.ctakes.typesystem.type.refsem.Medication;
+import org.apache.ctakes.typesystem.type.refsem.Procedure;
+import org.apache.ctakes.typesystem.type.refsem.SignSymptom;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelationIdentifiedAnnotationRelation;
import org.apache.ctakes.typesystem.type.relation.CoreferenceRelation;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
+import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.Markable;
+import org.apache.ctakes.typesystem.type.textsem.MedicationMention;
+import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
+import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.utils.struct.CounterMap;
+import org.apache.ctakes.utils.struct.MapFactory;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -40,6 +61,7 @@ import org.apache.uima.fit.factory.Analy
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.EmptyFSList;
+import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.NonEmptyFSList;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.ml.CleartkAnnotator;
@@ -250,39 +272,6 @@ public class MentionClusterCoreferenceAn
feature.setValue("NULL");
String message = String.format("Null value found in %s from %s", feature, features);
System.err.println(message);
- // throw new IllegalArgumentException(String.format(message, feature, features));
- }else{
-// String prefix = null;
- // Durret and Klein style feature conjunctions: pronoun type or pos tag. maybe try umls semantic-type?
- /*
- if(mentionText.equals("it") || mentionText.equals("this") || mentionText.equals("that")){
- prefix = "PRO_"+mentionText;
- }else if(headNode != null && headNode.getPostag() != null){
- prefix = headNode.getPostag();
- }else{
- prefix = "UNK";
- }
- */
- // headword-based feature conjunctions
-/* if(headNode != null && headNode.getCoveredText() != null && headMatches(headNode.getCoveredText().toLowerCase(), features)){
- prefix = "HEAD_MATCH";
- }else{
- prefix = "NO_HEAD_MATCH";
- }
-*/
-
- // UMLS semantic type feature conjunctions
- /*
- for(Feature feat : features){
- if(feat.getName().startsWith("ClusterSemType")){
- dupFeatures.add(new Feature(feat.getName()+"_"+feature.getName(), feature.getValue()));
- }
- }
- */
-
-// if(prefix != null){
-// dupFeatures.add(new Feature(prefix+"_"+feature.getName(), feature.getValue()));
-// }
}
}
@@ -350,6 +339,8 @@ public class MentionClusterCoreferenceAn
}
removeSingletonClusters(jCas);
+
+ createEventClusters(jCas);
}
@@ -423,6 +414,68 @@ public class MentionClusterCoreferenceAn
ListFactory.append(jCas, cluster.getMembers(), mention);
}
+ /**
+ * Create the set of Event types for every chain we found in the document.
+ * Event is a non-Annotation type (i.e., no span) that has its own attributes
+ * but points to an FSArray of mentions which each have their own attributes.
+ *
+ * @param jCas
+ * - JCas object, needed to create UIMA types
+ * @throws AnalysisEngineProcessException
+ */
+ private static void createEventClusters(JCas jCas) throws AnalysisEngineProcessException{
+ // First, find the largest span identified annotation that shares a headword with the markable
+ // do that by finding the head of the markable, then finding the identifiedannotations that cover it:
+ Map<ConllDependencyNode, Collection<IdentifiedAnnotation>> dep2event = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
+ for(CollectionTextRelation cluster : JCasUtil.select(jCas, CollectionTextRelation.class)){
+ CounterMap<Class<? extends IdentifiedAnnotation>> headCounts = new CounterMap<>();
+ List<Markable> memberList = new ArrayList<>(JCasUtil.select(cluster.getMembers(), Markable.class));
+ for(Markable member : memberList){
+ ConllDependencyNode head = MapFactory.get(getKey(jCas), member);
+ // Now find all the identified annotations that share this head:
+ IdentifiedAnnotation largest = null;
+ for(IdentifiedAnnotation covering : dep2event.get(head)){
+ if(isUmlsAnnotation(covering) && head == DependencyUtility.getNominalHeadNode(jCas, covering)){
+ if(largest == null || (covering.getEnd()-covering.getBegin() > (largest.getEnd()-largest.getBegin()))){
+ largest = covering;
+ }
+ }
+ }
+ if(largest != null){
+ headCounts.add(largest.getClass());
+ }
+ }
+ FSArray mentions = new FSArray(jCas, memberList.size());
+ IntStream.range(0, memberList.size()).forEach(i -> mentions.set(i, memberList.get(i)));
+
+ Element element = null;
+ if(headCounts.size() == 0){
+ element = new Event(jCas);
+ }else{
+ Class<? extends IdentifiedAnnotation> mostCommon = headCounts.entrySet().stream()
+ .sorted(Map.Entry.<Class<? extends IdentifiedAnnotation>,Integer>comparingByValue().reversed())
+ .limit(1)
+ .map(f -> f.getKey())
+ .collect(Collectors.toList()).get(0);
+ if(mostCommon.equals(DiseaseDisorderMention.class)){
+ element = new DiseaseDisorder(jCas);
+ }else if(mostCommon.equals(ProcedureMention.class)){
+ element = new Procedure(jCas);
+ }else if(mostCommon.equals(SignSymptomMention.class)){
+ element = new SignSymptom(jCas);
+ }else if(mostCommon.equals(MedicationMention.class)){
+ element = new Medication(jCas);
+ }else if(mostCommon.equals(AnatomicalSiteMention.class)){
+ element = new AnatomicalSite(jCas);
+ }else{
+ System.err.println("This coreference chain has an unknown type: " + mostCommon.getSimpleName());
+ throw new AnalysisEngineProcessException();
+ }
+ }
+ element.setMentions(mentions);
+ element.addToIndexes();
+ }
+ }
private static void removeSingletonClusters(JCas jcas){
List<CollectionTextRelation> toRemove = new ArrayList<>();
@@ -438,6 +491,14 @@ public class MentionClusterCoreferenceAn
}
}
+ private static boolean isUmlsEvent(IdentifiedAnnotation a){
+ return a instanceof DiseaseDisorderMention || a instanceof SignSymptomMention || a instanceof ProcedureMention || a instanceof MedicationMention;
+ }
+
+ private static boolean isUmlsAnnotation(IdentifiedAnnotation a){
+ return isUmlsEvent(a) || a instanceof AnatomicalSiteMention;
+ }
+
// private static final boolean dominates(Annotation arg1, Annotation arg2) {
// return (arg1.getBegin() <= arg2.getBegin() && arg1.getEnd() >= arg2.getEnd());
// }
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java?rev=1772966&r1=1772965&r2=1772966&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java Tue Dec 6 22:02:28 2016
@@ -34,7 +34,7 @@ import org.apache.ctakes.coreference.ae.
import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator;
import org.apache.ctakes.coreference.ae.MentionClusterRankingCoreferenceAnnotator;
import org.apache.ctakes.coreference.ae.PersonChainAnnotator;
-import org.apache.ctakes.coreference.util.CoreferencePipelineFactory;
+import org.apache.ctakes.coreference.factory.CoreferenceAnnotatorFactory;
import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
import org.apache.ctakes.temporal.ae.BackwardsTimeAnnotator;
@@ -423,7 +423,7 @@ public class EvaluationOfEventCoreferenc
}else if(this.evalType == EVAL_SYSTEM.CLUSTER_RANK){
aggregateBuilder.add(MentionClusterRankingCoreferenceAnnotator.createAnnotatorDescription(directory.getAbsolutePath() + File.separator + "model.jar"));
}else if(this.evalType == EVAL_SYSTEM.BASELINE){
- aggregateBuilder.add(CoreferencePipelineFactory.getCoreferencePipeline());
+ aggregateBuilder.add(CoreferenceAnnotatorFactory.getLegacyCoreferencePipeline());
}else{
logger.info("Running an evaluation that does not add an annotator: " + this.evalType);
}
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/factory/CoreferenceAnnotatorFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/factory/CoreferenceAnnotatorFactory.java?rev=1772966&r1=1772965&r2=1772966&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/factory/CoreferenceAnnotatorFactory.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/factory/CoreferenceAnnotatorFactory.java Tue Dec 6 22:02:28 2016
@@ -22,6 +22,10 @@ import org.apache.ctakes.coreference.ae.
import org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator;
import org.apache.ctakes.coreference.ae.MarkableSalienceAnnotator;
import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator;
+import org.apache.ctakes.coreference.ae.MipacqMarkableCreator;
+import org.apache.ctakes.coreference.ae.MipacqMarkableExpander;
+import org.apache.ctakes.coreference.ae.MipacqMarkablePairGenerator;
+import org.apache.ctakes.coreference.ae.MipacqSvmChainCreator;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
@@ -58,6 +62,17 @@ public class CoreferenceAnnotatorFactory
return builder.createAggregateDescription();
}
+ public static AnalysisEngineDescription getLegacyCoreferencePipeline() throws ResourceInitializationException{
+ AggregateBuilder builder = new AggregateBuilder();
+
+ builder.add(AnalysisEngineFactory.createEngineDescription(MipacqMarkableCreator.class));
+ builder.add(AnalysisEngineFactory.createEngineDescription(MipacqMarkableExpander.class));
+ builder.add(AnalysisEngineFactory.createEngineDescription(MipacqMarkablePairGenerator.class));
+ builder.add(AnalysisEngineFactory.createEngineDescription(MipacqSvmChainCreator.class));
+
+ return builder.createAggregateDescription();
+ }
+
// This method will point at the method we think is most likely to be useful for callers of mixed understanding
// who may not grok the method names for the systems named for their implementation.
public static AnalysisEngineDescription getDefaultCoreferencePipeline() throws ResourceInitializationException {
Modified: ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/struct/CounterMap.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/struct/CounterMap.java?rev=1772966&r1=1772965&r2=1772966&view=diff
==============================================================================
--- ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/struct/CounterMap.java (original)
+++ ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/struct/CounterMap.java Tue Dec 6 22:02:28 2016
@@ -18,7 +18,11 @@
*/
package org.apache.ctakes.utils.struct;
+import java.util.Collections;
import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
// This class is a simplifying class which makes it easy to build hashes to keep track of counts
// and write less boilerplate code. If you just call it with an object, it will increment the
@@ -43,4 +47,12 @@ public class CounterMap<K> extends HashM
}
super.put(key, super.get(key)+i);
}
+
+ public List<K> getKeysSortedByValue(){
+ return entrySet()
+ .stream()
+ .sorted(Map.Entry.comparingByValue(Collections.reverseOrder()))
+ .map(e -> e.getKey())
+ .collect(Collectors.toList());
+ }
}