You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2016/12/07 15:51:33 UTC

svn commit: r1773100 - in /ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference: ae/MentionClusterCoreferenceAnnotator.java util/MarkableUtilities.java

Author: tmill
Date: Wed Dec  7 15:51:33 2016
New Revision: 1773100

URL: http://svn.apache.org/viewvc?rev=1773100&view=rev
Log:
Moved some code to a utility class so I can use it in other projects.

Added:
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableUtilities.java
Modified:
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java?rev=1773100&r1=1773099&r2=1773100&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java Wed Dec  7 15:51:33 2016
@@ -1,11 +1,8 @@
 package org.apache.ctakes.coreference.ae;
 
-import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
-
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.HashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
@@ -29,7 +26,7 @@ import org.apache.ctakes.coreference.ae.
 import org.apache.ctakes.coreference.ae.pairing.cluster.HeadwordPairer;
 import org.apache.ctakes.coreference.ae.pairing.cluster.SectionHeaderPairer;
 import org.apache.ctakes.coreference.ae.pairing.cluster.SentenceDistancePairer;
-import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.coreference.util.MarkableUtilities;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
 import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
 import org.apache.ctakes.typesystem.type.refsem.AnatomicalSite;
@@ -42,7 +39,6 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelationIdentifiedAnnotationRelation;
 import org.apache.ctakes.typesystem.type.relation.CoreferenceRelation;
-import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
 import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
 import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -52,7 +48,6 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
 import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.ctakes.utils.struct.CounterMap;
-import org.apache.ctakes.utils.struct.MapFactory;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -426,20 +421,19 @@ public class MentionClusterCoreferenceAn
   private static void createEventClusters(JCas jCas) throws AnalysisEngineProcessException{
     // First, find the largest span identified annotation that shares a headword with the markable
     // do that by finding the head of the markable, then finding the identifiedannotations that cover it:
-    Map<ConllDependencyNode, Collection<IdentifiedAnnotation>> dep2event = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
+    
+    Map<Markable, List<IdentifiedAnnotation>> markable2annotations = MarkableUtilities.indexCoveringUmlsAnnotations(jCas);
+
     for(CollectionTextRelation cluster : JCasUtil.select(jCas, CollectionTextRelation.class)){
       CounterMap<Class<? extends IdentifiedAnnotation>> headCounts = new CounterMap<>();
       List<Markable> memberList = new ArrayList<>(JCasUtil.select(cluster.getMembers(), Markable.class));
       for(Markable member : memberList){
-        ConllDependencyNode head = MapFactory.get(getKey(jCas), member);
-        // Now find all the identified annotations that share this head:
+        // Now find the largest covering annotation:
         IdentifiedAnnotation largest = null;
-        for(IdentifiedAnnotation covering : dep2event.get(head)){
-          if(isUmlsAnnotation(covering) && head == DependencyUtility.getNominalHeadNode(jCas, covering)){
-            if(largest == null || (covering.getEnd()-covering.getBegin() > (largest.getEnd()-largest.getBegin()))){
-              largest = covering;
-            }
-          }            
+        for(IdentifiedAnnotation covering : markable2annotations.get(member)){
+          if(largest == null || (covering.getEnd()-covering.getBegin() > (largest.getEnd()-largest.getBegin()))){
+            largest = covering;
+          }
         }
         if(largest != null){
           headCounts.add(largest.getClass());
@@ -447,7 +441,7 @@ public class MentionClusterCoreferenceAn
       }
       FSArray mentions = new FSArray(jCas, memberList.size());
       IntStream.range(0, memberList.size()).forEach(i -> mentions.set(i, memberList.get(i)));
-      
+
       Element element = null;
       if(headCounts.size() == 0){
         element = new Event(jCas);
@@ -491,14 +485,7 @@ public class MentionClusterCoreferenceAn
     }
   }
   
-  private static boolean isUmlsEvent(IdentifiedAnnotation a){
-    return a instanceof DiseaseDisorderMention || a instanceof SignSymptomMention || a instanceof ProcedureMention || a instanceof MedicationMention;
-  }
-  
-  private static boolean isUmlsAnnotation(IdentifiedAnnotation a){
-    return isUmlsEvent(a) || a instanceof AnatomicalSiteMention;
-  }
-  
+ 
 //  private static final boolean dominates(Annotation arg1, Annotation arg2) {
 //    return (arg1.getBegin() <= arg2.getBegin() && arg1.getEnd() >= arg2.getEnd());
 //  }

Added: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableUtilities.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableUtilities.java?rev=1773100&view=auto
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableUtilities.java (added)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableUtilities.java Wed Dec  7 15:51:33 2016
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.coreference.util;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
+import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
+import org.apache.ctakes.typesystem.type.textsem.MedicationMention;
+import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
+import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
+import org.apache.ctakes.utils.struct.MapFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+public class MarkableUtilities {
+  /**
+   * 
+   * @param jCas
+   * @return Mapping from all Markables in the CAS to UMLS IdentifiedAnnotations that share the same dependency head.
+   * Coreference takes place over Markables which may include IdentifiedAnnotations as well as pronouns. So we 
+   * get the head token for every Markable, then find all the IdentifiedAnnotations that cover that head, then
+   * filter to those are UMLS semantic group types and whose dependency head is the same as the Markable.
+   */
+  public static Map<Markable,List<IdentifiedAnnotation>> indexCoveringUmlsAnnotations(JCas jCas){
+    Map<Markable,List<IdentifiedAnnotation>> map = new HashMap<>();
+
+    Map<ConllDependencyNode, Collection<IdentifiedAnnotation>> dep2event = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
+
+    for(CollectionTextRelation cluster : JCasUtil.select(jCas, CollectionTextRelation.class)){
+      List<Markable> memberList = new ArrayList<>(JCasUtil.select(cluster.getMembers(), Markable.class));
+      for(Markable member : memberList){
+        map.put(member, new ArrayList<>());
+        ConllDependencyNode head = MapFactory.get(MarkableHeadTreeCreator.getKey(jCas), member);
+
+        for(IdentifiedAnnotation covering : dep2event.get(head)){
+          if(isUmlsAnnotation(covering) && head == DependencyUtility.getNominalHeadNode(jCas, covering)){
+            map.get(member).add(covering);
+          }
+        }
+      }
+    }
+    return map;
+  }
+
+  private static boolean isUmlsEvent(IdentifiedAnnotation a){
+    return a instanceof DiseaseDisorderMention || a instanceof SignSymptomMention || a instanceof ProcedureMention || a instanceof MedicationMention;
+  }
+
+  private static boolean isUmlsAnnotation(IdentifiedAnnotation a){
+    return isUmlsEvent(a) || a instanceof AnatomicalSiteMention;
+  }
+
+}