You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by st...@apache.org on 2012/10/05 04:23:59 UTC

svn commit: r1394349 - /incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java

Author: stevenbethard
Date: Fri Oct  5 02:23:58 2012
New Revision: 1394349

URL: http://svn.apache.org/viewvc?rev=1394349&view=rev
Log:
Makes some more progress on loading SHARP Knowtator annotations

Modified:
    incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java

Modified: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java?rev=1394349&r1=1394348&r2=1394349&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java (original)
+++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java Fri Oct  5 02:23:58 2012
@@ -1,8 +1,8 @@
 package org.apache.ctakes.core.ae;
 
-import java.io.File;
 import java.io.IOException;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -10,6 +10,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.logging.Logger;
 
 import org.apache.ctakes.core.knowtator.KnowtatorAnnotation;
 import org.apache.ctakes.core.knowtator.KnowtatorXMLParser;
@@ -23,8 +24,10 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
 import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.structured.DocumentID;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.Modifier;
 import org.apache.ctakes.typesystem.type.textsem.TimeMention;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
@@ -33,24 +36,30 @@ import org.apache.uima.jcas.cas.TOP;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.jdom2.JDOMException;
 import org.uimafit.component.JCasAnnotator_ImplBase;
-import org.uimafit.descriptor.ConfigurationParameter;
-
-public abstract class SHARPKnowtatorXMLReader extends JCasAnnotator_ImplBase {
-
-  public static final String PARAM_KNOWTATOR_XML_DIRECTORY = "knowtatorXMLDirectory";
-
-  @ConfigurationParameter(name = PARAM_KNOWTATOR_XML_DIRECTORY, mandatory = true)
-  protected File knowtatorXMLDirectory;
+import org.uimafit.util.JCasUtil;
 
+public class SHARPKnowtatorXMLReader extends JCasAnnotator_ImplBase {
+  
   /**
    * Given the URI of the plain text file, determines the URI of the Knowtator XML file
    */
-  protected abstract URI getKnowtatorXML(JCas jCas) throws AnalysisEngineProcessException;
+  protected URI getKnowtatorXML(JCas jCas) throws AnalysisEngineProcessException {
+    String textURI = JCasUtil.selectSingle(jCas, DocumentID.class).getDocumentID();
+    String xmlURI = textURI.replaceAll("Knowtator/text", "Knowtator_XML") + ".knowtator.xml";
+    System.err.println(xmlURI);
+    try {
+      return new URI(xmlURI);
+    } catch (URISyntaxException e) {
+      throw new AnalysisEngineProcessException(e);
+    }
+  }
 
   /**
    * Returns the names of the annotators in the Knowtator files that represent the gold standard
    */
-  protected abstract String[] getAnnotatorNames();
+  protected String[] getAnnotatorNames() {
+    return new String[] { "consensus set annotator team" };
+  }
 
   @Override
   public void process(JCas jCas) throws AnalysisEngineProcessException {
@@ -72,6 +81,8 @@ public abstract class SHARPKnowtatorXMLR
     Set<String> entityRelationTypes = new HashSet<String>();
     entityRelationTypes.add("location_of");
     entityRelationTypes.add("degree_of");
+    entityRelationTypes.add("causes/brings_about");
+    entityRelationTypes.add("indicates");
     Set<String> eventRelationTypes = new HashSet<String>();
     eventRelationTypes.add("TLINK");
     eventRelationTypes.add("ALINK");
@@ -165,6 +176,19 @@ public abstract class SHARPKnowtatorXMLR
             idAnnotationMap,
             delayedFeatures);
 
+      } else if ("Phenomena".equals(annotation.type)) {
+        EntityMention entityMention = new EntityMention(jCas, coveringSpan.begin, coveringSpan.end);
+        addEntityMentionFeatures(
+            annotation,
+            entityMention,
+            jCas,
+            CONST.NE_TYPE_ID_UNKNOWN /* TODO: is this the correct type? */,
+            stringSlots,
+            booleanSlots,
+            annotationSlots,
+            idAnnotationMap,
+            delayedFeatures);
+
       } else if ("Procedure".equals(annotation.type)) {
         EntityMention entityMention = new EntityMention(jCas, coveringSpan.begin, coveringSpan.end);
         addEntityMentionFeatures(
@@ -177,6 +201,24 @@ public abstract class SHARPKnowtatorXMLR
             annotationSlots,
             idAnnotationMap,
             delayedFeatures);
+        KnowtatorAnnotation bodyLocation = annotationSlots.remove("body_location");
+        if (bodyLocation != null) {
+          delayedFeatures.add(new DelayedFeature<EntityMention>(entityMention, bodyLocation) {
+            @Override
+            protected void setValue(TOP valueAnnotation) {
+              // TODO: this.annotation.setBodyLocation(...)
+            }
+          });
+        }
+        KnowtatorAnnotation historyOf = annotationSlots.remove("historyOf_CU");
+        if (historyOf != null) {
+          delayedFeatures.add(new DelayedFeature<EntityMention>(entityMention, historyOf) {
+            @Override
+            protected void setValue(TOP valueAnnotation) {
+              // TODO: this.annotation.setHistoryOf(...)
+            }
+          });
+        }
 
       } else if ("Sign_symptom".equals(annotation.type)) {
         EntityMention entityMention = new EntityMention(jCas, coveringSpan.begin, coveringSpan.end);
@@ -199,6 +241,15 @@ public abstract class SHARPKnowtatorXMLR
             }
           });
         }
+        KnowtatorAnnotation severity = annotationSlots.remove("severity");
+        if (severity != null) {
+          delayedFeatures.add(new DelayedFeature<EntityMention>(entityMention, severity) {
+            @Override
+            protected void setValue(TOP valueAnnotation) {
+              // TODO: this.annotation.setSeverity(...)
+            }
+          });
+        }
 
       } else if ("EVENT".equals(annotation.type)) {
 
@@ -278,51 +329,109 @@ public abstract class SHARPKnowtatorXMLR
       } else if ("generic_class".equals(annotation.type)) {
         // TODO: there's currently no Generic in the type system
         boolean value = booleanSlots.remove("generic_normalization");
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        // modifier.setNormalizedForm(...);
+        idAnnotationMap.put(annotation.id, modifier);
 
       } else if ("severity_class".equals(annotation.type)) {
-        // TODO: severity has a span, but it extends TOP
         Severity severity = new Severity(jCas);
         severity.setValue(stringSlots.remove("severity_normalization"));
         severity.addToIndexes();
-        idTopMap.put(annotation.id, severity);
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        modifier.setNormalizedForm(severity);
+        idAnnotationMap.put(annotation.id, modifier);
 
       } else if ("conditional_class".equals(annotation.type)) {
-        // TODO: there's currently no Generic in the type system
+        // TODO: there's currently no Conditional in the type system
         boolean value = booleanSlots.remove("conditional_normalization");
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        // modifier.setNormalizedForm(...);
+        idAnnotationMap.put(annotation.id, modifier);
 
       } else if ("course_class".equals(annotation.type)) {
-        // TODO: course has a span, but it extends TOP
         Course course = new Course(jCas);
         course.setValue(stringSlots.remove("course_normalization"));
         course.addToIndexes();
-        idTopMap.put(annotation.id, course);
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        modifier.setNormalizedForm(course);
+        idAnnotationMap.put(annotation.id, modifier);
 
       } else if ("uncertainty_indicator_class".equals(annotation.type)) {
         // TODO: there's currently no Uncertainty in the type system
         String value = stringSlots.remove("uncertainty_indicator_normalization");
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        // modifier.setNormalizedForm(...);
+        idAnnotationMap.put(annotation.id, modifier);
 
       } else if ("distal_or_proximal".equals(annotation.type)) {
         // TODO: there's currently no Distal or Proximal in the type system
         String value = stringSlots.remove("distal_or_proximal_normalization");
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        // modifier.setNormalizedForm(...);
+        idAnnotationMap.put(annotation.id, modifier);
 
       } else if ("Person".equals(annotation.type)) {
         // TODO: there's currently no Subject in the type system
         String value = stringSlots.remove("subject_normalization_CU");
+        // TODO: what does a code mean on a Person?
+        String code = stringSlots.remove("associatedCode");
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        // modifier.setNormalizedForm(...);
+        idAnnotationMap.put(annotation.id, modifier);
 
       } else if ("body_side_class".equals(annotation.type)) {
-        // TODO: BodySide has a span, but it extends TOP
         BodySide bodySide = new BodySide(jCas);
         bodySide.setValue(stringSlots.remove("body_side_normalization"));
         bodySide.addToIndexes();
-        idTopMap.put(annotation.id, bodySide);
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        modifier.setNormalizedForm(bodySide);
+        idAnnotationMap.put(annotation.id, modifier);
 
       } else if ("negation_indicator_class".equals(annotation.type)) {
         // TODO: there's currently no Negation in the type system
         String value = stringSlots.remove("negation_indicator_normalization");
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        // modifier.setNormalizedForm(...);
+        idAnnotationMap.put(annotation.id, modifier);
+
+      } else if ("historyOf_indicator_class".equals(annotation.type)) {
+        // TODO: there's currently no HistoryOf in the type system
+        String value = stringSlots.remove("historyOf_normalization");
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        // modifier.setNormalizedForm(...);
+        idAnnotationMap.put(annotation.id, modifier);
+
+      } else if ("superior_or_inferior".equals(annotation.type)) {
+        // TODO: there's currently no Superior or Inferior in the type system
+        String value = stringSlots.remove("superior_or_inferior_normalization");
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        // modifier.setNormalizedForm(...);
+        idAnnotationMap.put(annotation.id, modifier);
+
+      } else if ("medial_or_lateral".equals(annotation.type)) {
+        // TODO: there's currently no Medial or Lateral in the type system
+        String value = stringSlots.remove("medial_or_lateral_normalization");
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        // modifier.setNormalizedForm(...);
+        idAnnotationMap.put(annotation.id, modifier);
+
+      } else if ("Route".equals(annotation.type)) {
+        // TODO: there's currently no Route in the type system
+        String value = stringSlots.remove("route_values");
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        // modifier.setNormalizedForm(...);
+        idAnnotationMap.put(annotation.id, modifier);
+
+      } else if ("Clinical_attribute".equals(annotation.type)) {
+        // TODO: what does this even mean?
+        Modifier modifier = new Modifier(jCas, coveringSpan.begin, coveringSpan.end);
+        // modifier.setNormalizedForm(...);
+        idAnnotationMap.put(annotation.id, modifier);
 
       } else if (eventRelationTypes.contains(annotation.type)) {
         // store the ALINK information for later, once all annotations are in the CAS
         DelayedRelation relation = new DelayedRelation();
+        relation.sourceFile = knowtatorXML;
         relation.annotation = annotation;
         relation.source = annotationSlots.remove("Event");
         relation.target = annotationSlots.remove("related_to");
@@ -332,6 +441,7 @@ public abstract class SHARPKnowtatorXMLR
       } else if (entityRelationTypes.contains(annotation.type)) {
         // store the relation information for later, once all annotations are in the CAS
         DelayedRelation relation = new DelayedRelation();
+        relation.sourceFile = knowtatorXML;
         relation.annotation = annotation;
         relation.source = annotationSlots.remove("Argument_CU");
         relation.target = annotationSlots.remove("Related_to_CU");
@@ -339,7 +449,10 @@ public abstract class SHARPKnowtatorXMLR
         delayedRelations.add(relation);
 
       } else {
-        throw new IllegalArgumentException("Unrecognized type: " + annotation.type);
+        throw new UnsupportedOperationException(String.format(
+            "unrecognized type '%s' in %s",
+            annotation.type,
+            knowtatorXML));
       }
 
       // make sure all slots have been consumed
@@ -350,9 +463,12 @@ public abstract class SHARPKnowtatorXMLR
       for (Map.Entry<String, Set<String>> entry : slotGroups.entrySet()) {
         Set<String> remainingSlots = entry.getValue();
         if (!remainingSlots.isEmpty()) {
-          String format = "%s has unprocessed %s: %s";
-          String message = String.format(format, annotation.type, entry.getKey(), remainingSlots);
-          throw new UnsupportedOperationException(message);
+          throw new UnsupportedOperationException(String.format(
+              "%s has unprocessed %s %s in %s",
+              annotation.type,
+              entry.getKey(),
+              remainingSlots,
+              knowtatorXML));
         }
       }
     }
@@ -413,6 +529,17 @@ public abstract class SHARPKnowtatorXMLR
       });
     }
 
+    // uncertainty must be delayed until the Uncertainty annotations are present
+    KnowtatorAnnotation uncertainty = annotationSlots.remove("uncertainty_indicator_CU");
+    if (uncertainty != null) {
+      delayedFeatures.add(new DelayedFeature<EntityMention>(entityMention, uncertainty) {
+        @Override
+        protected void setValue(TOP valueAnnotation) {
+          // TODO: this.annotation.setUncertainty(...)
+        }
+      });
+    }
+
     // subject must be delayed until the Subject annotations are present
     KnowtatorAnnotation subject = annotationSlots.remove("subject_CU");
     if (subject != null) {
@@ -462,6 +589,10 @@ public abstract class SHARPKnowtatorXMLR
   }
 
   private static class DelayedRelation {
+    private static Logger LOGGER = Logger.getLogger(DelayedRelation.class.getName());
+
+    public URI sourceFile;
+
     public KnowtatorAnnotation annotation;
 
     public KnowtatorAnnotation source;
@@ -473,6 +604,25 @@ public abstract class SHARPKnowtatorXMLR
     public KnowtatorAnnotation uncertainty;
 
     public void addToIndexes(JCas jCas, Map<String, Annotation> idAnnotationMap) {
+      if (this.source == null) {
+        // throw new UnsupportedOperationException(String.format(
+        LOGGER.warning(String.format(
+            "no source for '%s' with annotationSlots %s in %s",
+            this.annotation.id,
+            this.annotation.annotationSlots.keySet(),
+            this.sourceFile));
+        return;
+      }
+      if (this.target == null) {
+        // throw new UnsupportedOperationException(String.format(
+        LOGGER.warning(String.format(
+            "no target for '%s' with annotationSlots %s in %s",
+            this.annotation.id,
+            this.annotation.annotationSlots.keySet(),
+            this.sourceFile));
+        return;
+      }
+      
       // look up the relations in the map and issue an error if they're missing
       Annotation sourceMention = idAnnotationMap.get(this.source.id);
       Annotation targetMention = idAnnotationMap.get(this.target.id);
@@ -483,14 +633,21 @@ public abstract class SHARPKnowtatorXMLR
         badId = this.target.id;
       }
       if (badId != null) {
-        String message = String.format("no annotation with id '%s'", badId);
-        throw new UnsupportedOperationException(message);
+        throw new UnsupportedOperationException(String.format(
+            "no annotation with id '%s' in %s",
+            badId,
+            this.sourceFile));
       }
 
       // get the uncertainty
       if (this.uncertainty != null) {
-        Annotation uncertainty = idAnnotationMap.get(this.uncertainty);
-        System.err.println(uncertainty.getCoveredText());
+        Annotation uncertainty = idAnnotationMap.get(this.uncertainty.id);
+        if (uncertainty == null) {
+          throw new UnsupportedOperationException(String.format(
+              "no annotation with id '%s' in %s",
+              this.uncertainty.id,
+              this.sourceFile));
+        }
       }
 
       // add the relation to the CAS