You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2016/12/07 15:51:33 UTC
svn commit: r1773100 - in
/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference:
ae/MentionClusterCoreferenceAnnotator.java util/MarkableUtilities.java
Author: tmill
Date: Wed Dec 7 15:51:33 2016
New Revision: 1773100
URL: http://svn.apache.org/viewvc?rev=1773100&view=rev
Log:
Moved some code to a utility class so I can use it in other projects.
Added:
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableUtilities.java
Modified:
ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java?rev=1773100&r1=1773099&r2=1773100&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java Wed Dec 7 15:51:33 2016
@@ -1,11 +1,8 @@
package org.apache.ctakes.coreference.ae;
-import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
-
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
@@ -29,7 +26,7 @@ import org.apache.ctakes.coreference.ae.
import org.apache.ctakes.coreference.ae.pairing.cluster.HeadwordPairer;
import org.apache.ctakes.coreference.ae.pairing.cluster.SectionHeaderPairer;
import org.apache.ctakes.coreference.ae.pairing.cluster.SentenceDistancePairer;
-import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.coreference.util.MarkableUtilities;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
import org.apache.ctakes.typesystem.type.refsem.AnatomicalSite;
@@ -42,7 +39,6 @@ import org.apache.ctakes.typesystem.type
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelationIdentifiedAnnotationRelation;
import org.apache.ctakes.typesystem.type.relation.CoreferenceRelation;
-import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -52,7 +48,6 @@ import org.apache.ctakes.typesystem.type
import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.utils.struct.CounterMap;
-import org.apache.ctakes.utils.struct.MapFactory;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -426,20 +421,19 @@ public class MentionClusterCoreferenceAn
private static void createEventClusters(JCas jCas) throws AnalysisEngineProcessException{
// First, find the largest span identified annotation that shares a headword with the markable
// do that by finding the head of the markable, then finding the identifiedannotations that cover it:
- Map<ConllDependencyNode, Collection<IdentifiedAnnotation>> dep2event = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
+
+ Map<Markable, List<IdentifiedAnnotation>> markable2annotations = MarkableUtilities.indexCoveringUmlsAnnotations(jCas);
+
for(CollectionTextRelation cluster : JCasUtil.select(jCas, CollectionTextRelation.class)){
CounterMap<Class<? extends IdentifiedAnnotation>> headCounts = new CounterMap<>();
List<Markable> memberList = new ArrayList<>(JCasUtil.select(cluster.getMembers(), Markable.class));
for(Markable member : memberList){
- ConllDependencyNode head = MapFactory.get(getKey(jCas), member);
- // Now find all the identified annotations that share this head:
+ // Now find the largest covering annotation:
IdentifiedAnnotation largest = null;
- for(IdentifiedAnnotation covering : dep2event.get(head)){
- if(isUmlsAnnotation(covering) && head == DependencyUtility.getNominalHeadNode(jCas, covering)){
- if(largest == null || (covering.getEnd()-covering.getBegin() > (largest.getEnd()-largest.getBegin()))){
- largest = covering;
- }
- }
+ for(IdentifiedAnnotation covering : markable2annotations.get(member)){
+ if(largest == null || (covering.getEnd()-covering.getBegin() > (largest.getEnd()-largest.getBegin()))){
+ largest = covering;
+ }
}
if(largest != null){
headCounts.add(largest.getClass());
@@ -447,7 +441,7 @@ public class MentionClusterCoreferenceAn
}
FSArray mentions = new FSArray(jCas, memberList.size());
IntStream.range(0, memberList.size()).forEach(i -> mentions.set(i, memberList.get(i)));
-
+
Element element = null;
if(headCounts.size() == 0){
element = new Event(jCas);
@@ -491,14 +485,7 @@ public class MentionClusterCoreferenceAn
}
}
- private static boolean isUmlsEvent(IdentifiedAnnotation a){
- return a instanceof DiseaseDisorderMention || a instanceof SignSymptomMention || a instanceof ProcedureMention || a instanceof MedicationMention;
- }
-
- private static boolean isUmlsAnnotation(IdentifiedAnnotation a){
- return isUmlsEvent(a) || a instanceof AnatomicalSiteMention;
- }
-
+
// private static final boolean dominates(Annotation arg1, Annotation arg2) {
// return (arg1.getBegin() <= arg2.getBegin() && arg1.getEnd() >= arg2.getEnd());
// }
Added: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableUtilities.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableUtilities.java?rev=1773100&view=auto
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableUtilities.java (added)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableUtilities.java Wed Dec 7 15:51:33 2016
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.coreference.util;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
+import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
+import org.apache.ctakes.typesystem.type.textsem.MedicationMention;
+import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
+import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
+import org.apache.ctakes.utils.struct.MapFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+public class MarkableUtilities {
+ /**
+ *
+ * @param jCas
+ * @return Mapping from all Markables in the CAS to UMLS IdentifiedAnnotations that share the same dependency head.
+ * Coreference takes place over Markables which may include IdentifiedAnnotations as well as pronouns. So we
+ * get the head token for every Markable, then find all the IdentifiedAnnotations that cover that head, then
+ * filter to those are UMLS semantic group types and whose dependency head is the same as the Markable.
+ */
+ public static Map<Markable,List<IdentifiedAnnotation>> indexCoveringUmlsAnnotations(JCas jCas){
+ Map<Markable,List<IdentifiedAnnotation>> map = new HashMap<>();
+
+ Map<ConllDependencyNode, Collection<IdentifiedAnnotation>> dep2event = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
+
+ for(CollectionTextRelation cluster : JCasUtil.select(jCas, CollectionTextRelation.class)){
+ List<Markable> memberList = new ArrayList<>(JCasUtil.select(cluster.getMembers(), Markable.class));
+ for(Markable member : memberList){
+ map.put(member, new ArrayList<>());
+ ConllDependencyNode head = MapFactory.get(MarkableHeadTreeCreator.getKey(jCas), member);
+
+ for(IdentifiedAnnotation covering : dep2event.get(head)){
+ if(isUmlsAnnotation(covering) && head == DependencyUtility.getNominalHeadNode(jCas, covering)){
+ map.get(member).add(covering);
+ }
+ }
+ }
+ }
+ return map;
+ }
+
+ private static boolean isUmlsEvent(IdentifiedAnnotation a){
+ return a instanceof DiseaseDisorderMention || a instanceof SignSymptomMention || a instanceof ProcedureMention || a instanceof MedicationMention;
+ }
+
+ private static boolean isUmlsAnnotation(IdentifiedAnnotation a){
+ return isUmlsEvent(a) || a instanceof AnatomicalSiteMention;
+ }
+
+}