You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2020/09/28 17:00:19 UTC

svn commit: r1882088 - in /ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation: IdentifiedAnnotationBuilder.java SemanticGroup.java

Author: seanfinan
Date: Mon Sep 28 17:00:19 2020
New Revision: 1882088

URL: http://svn.apache.org/viewvc?rev=1882088&view=rev
Log:
IdentifiedAnnotationBuilder : easily construct clinical annotations
SemanticGroup : get group by ctakes type integer

Added:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java
Modified:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java?rev=1882088&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationBuilder.java Mon Sep 28 17:00:19 2020
@@ -0,0 +1,264 @@
+package org.apache.ctakes.core.util.annotation;
+
+
+import org.apache.ctakes.core.util.Pair;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/26/2020
+ */
+final public class IdentifiedAnnotationBuilder {
+
+   static private final Logger LOGGER = Logger.getLogger( "IdentifiedAnnotationBuilder" );
+   static private final Pair<Integer> NULL_SPAN = new Pair<>( -1, -1 );
+
+   private Pair<Integer> _textSpan = NULL_SPAN;
+   private SemanticGroup _group = SemanticGroup.UNKNOWN;
+   private SemanticTui _type = SemanticTui.UNKNOWN;
+   private final Collection<SemanticTui> _tuis = EnumSet.noneOf( SemanticTui.class );
+   private String _cui = "";
+   private String _prefText = "";
+   private final Map<String, Collection<String>> _schemaCodes = new HashMap<>();
+
+
+   /**
+    * @param begin text span character index
+    * @param end   text span character index
+    * @return this builder
+    */
+   public IdentifiedAnnotationBuilder span( final int begin, final int end ) {
+      return span( new Pair<>( begin, end ) );
+   }
+
+   /**
+    * @param textSpan text span character indices
+    * @return this builder
+    */
+   public IdentifiedAnnotationBuilder span( final Pair<Integer> textSpan ) {
+      _textSpan = textSpan;
+      return this;
+   }
+
+   /**
+    * @param semanticGroup for the annotation
+    * @return this builder
+    */
+   public IdentifiedAnnotationBuilder group( final SemanticGroup semanticGroup ) {
+      _group = semanticGroup;
+      return this;
+   }
+
+   /**
+    * @param semanticGroup name for the annotation
+    * @return this builder
+    */
+   public IdentifiedAnnotationBuilder group( final String semanticGroup ) {
+      return group( SemanticGroup.getGroup( semanticGroup ) );
+   }
+
+   /**
+    * @param semanticType for the annotation
+    * @return this builder
+    */
+   public IdentifiedAnnotationBuilder type( final SemanticTui semanticType ) {
+      _type = semanticType;
+      return this;
+   }
+
+   /**
+    * @param semanticType name for the annotation
+    * @return this builder
+    */
+   public IdentifiedAnnotationBuilder type( final String semanticType ) {
+      return type( SemanticTui.getTui( semanticType ) );
+   }
+
+   /**
+    * @param tui representing the primary semantic type.
+    * @return this builder
+    */
+   public IdentifiedAnnotationBuilder tui( final String tui ) {
+      return type( SemanticTui.getTui( tui ) );
+   }
+
+   /**
+    * @param semanticType representing possibly secondary semantic type.
+    * @return this builder
+    */
+   public IdentifiedAnnotationBuilder addTui( final SemanticTui semanticType ) {
+      _tuis.add( semanticType );
+      return this;
+   }
+
+   /**
+    * @param tui representing possibly secondary semantic type.
+    * @return this builder
+    */
+   public IdentifiedAnnotationBuilder addTui( final String tui ) {
+      return addTui( SemanticTui.getTui( tui ) );
+   }
+
+   /**
+    * @param cui concept unique identifier
+    * @return this builder
+    */
+   public IdentifiedAnnotationBuilder cui( final String cui ) {
+      _cui = cui;
+      return this;
+   }
+
+   /**
+    * @param text preferred
+    * @return this builder
+    */
+   public IdentifiedAnnotationBuilder preferredText( final String text ) {
+      _prefText = text;
+      return this;
+   }
+
+   /**
+    * Can be used multiple times
+    *
+    * @param schema name of an encoding schema.  e.g. SNOMEDCT_US
+    * @param code   code for this annotation in that schema
+    * @return this builder
+    */
+   public IdentifiedAnnotationBuilder addSchemaCode( final String schema, final String code ) {
+      _schemaCodes.computeIfAbsent( schema, s -> new HashSet<>() ).add( code );
+      return this;
+   }
+
+
+   /**
+    * @param textSpan  -
+    * @param docLength -
+    * @return false if the span is unspecified, reversed, or outside the document text.
+    */
+   static private boolean isSpanValid( final Pair<Integer> textSpan, final int docLength ) {
+      if ( textSpan.equals( NULL_SPAN ) ) {
+         LOGGER.error( "A Text Span must be specified to build an IdentifiedAnnotation." );
+         return false;
+      }
+      if ( textSpan.getValue1() >= textSpan.getValue2() ) {
+         LOGGER.error( "The Text Span ("
+                       + textSpan.getValue1() + "," + textSpan.getValue2()
+                       + ") is poorly formed.  A valid text span is required to build an IdentifiedAnnotation." );
+         return false;
+      }
+      if ( textSpan.getValue1() < 0 || textSpan.getValue2() >= docLength ) {
+         LOGGER.error( "The Text Span ("
+                       + textSpan.getValue1() + "," + textSpan.getValue2()
+                       + ") must be within the document text bounds (0," + docLength
+                       + ") to build an IdentifiedAnnotation." );
+         return false;
+      }
+      return true;
+   }
+
+
+   /**
+    * @param jcas          ye olde ...
+    * @param cui           -
+    * @param type          -
+    * @param preferredText may be empty
+    * @param schema        may be empty
+    * @param code          may be empty
+    * @return a UmlsConcept for the annotation
+    */
+   static private UmlsConcept createUmlsConcept( final JCas jcas,
+                                                 final String cui,
+                                                 final SemanticTui type,
+                                                 final String preferredText,
+                                                 final String schema,
+                                                 final String code ) {
+      final UmlsConcept umlsConcept = new UmlsConcept( jcas );
+      umlsConcept.setCui( cui );
+      if ( type != SemanticTui.UNKNOWN ) {
+         umlsConcept.setTui( type.name() );
+      }
+      if ( !preferredText.isEmpty() ) {
+         umlsConcept.setPreferredText( preferredText );
+      }
+      if ( !schema.isEmpty() ) {
+         umlsConcept.setCodingScheme( schema );
+         umlsConcept.setCode( code );
+      }
+      return umlsConcept;
+   }
+
+
+   /**
+    * @param jcas ye olde ...
+    * @return an IdentifiedAnnotation with properties specified or null if the cui or span are illegal.
+    */
+   public IdentifiedAnnotation build( final JCas jcas ) {
+      if ( !isSpanValid( _textSpan, jcas.getDocumentText().length() ) ) {
+         return null;
+      }
+      if ( _cui.isEmpty() ) {
+         LOGGER.error( "A Concept Unique Identifier (CUI) must be specified to build an IdentifiedAnnotation." );
+         return null;
+      }
+      if ( _group == SemanticGroup.UNKNOWN && _type != SemanticTui.UNKNOWN ) {
+         _group = _type.getGroup();
+      }
+      if ( _group == SemanticGroup.UNKNOWN && !_tuis.isEmpty() ) {
+         final Collection<SemanticGroup> groups = _tuis.stream()
+                                                       .map( SemanticTui::getGroup )
+                                                       .collect( Collectors.toSet() );
+         _group = SemanticGroup.getBestGroup( groups );
+      }
+      if ( _type == SemanticTui.UNKNOWN && !_tuis.isEmpty() ) {
+         _type = _tuis.stream()
+                      .filter( t -> t.getGroup() == _group )
+                      .min( Comparator.comparing( SemanticTui::name ) )
+                      .orElse( SemanticTui.UNKNOWN );
+      }
+      final IdentifiedAnnotation annotation = _group
+            .getCreator()
+            .apply( jcas );
+      annotation.setTypeID( _group.getCode() );
+      annotation.setBegin( _textSpan.getValue1() );
+      annotation.setEnd( _textSpan.getValue2() );
+      annotation.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_EXPLICIT_AE );
+
+      final Collection<UmlsConcept> umlsConcepts = new HashSet<>();
+      if ( _schemaCodes.isEmpty() ) {
+         umlsConcepts.add( createUmlsConcept( jcas, _cui, _type, _prefText, "", "" ) );
+      } else {
+         for ( Map.Entry<String, Collection<String>> schemaCodes : _schemaCodes.entrySet() ) {
+            for ( String code : schemaCodes.getValue() ) {
+               umlsConcepts.add( createUmlsConcept( jcas, _cui, _type, _prefText, schemaCodes.getKey(), code ) );
+            }
+         }
+      }
+      if ( !_tuis.isEmpty() ) {
+         _tuis.stream()
+              .filter( t -> t != _type )
+              .filter( t -> t != SemanticTui.UNKNOWN )
+              .map( t -> createUmlsConcept( jcas, _cui, t, "", "", "" ) )
+              .forEach( umlsConcepts::add );
+      }
+      final FSArray conceptArr = new FSArray( jcas, umlsConcepts.size() );
+      int arrIdx = 0;
+      for ( UmlsConcept umlsConcept : umlsConcepts ) {
+         conceptArr.set( arrIdx, umlsConcept );
+         arrIdx++;
+      }
+      annotation.setOntologyConceptArr( conceptArr );
+      annotation.addToIndexes();
+      return annotation;
+   }
+
+
+}

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java?rev=1882088&r1=1882087&r2=1882088&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java Mon Sep 28 17:00:19 2020
@@ -6,6 +6,7 @@ import org.apache.uima.jcas.JCas;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Comparator;
+import java.util.HashSet;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 
@@ -82,11 +83,18 @@ public enum SemanticGroup {
    }
 
    static public Collection<SemanticGroup> getGroups( final IdentifiedAnnotation annotation ) {
-      return SemanticTui.getTuis( annotation )
-            .stream()
-            .map( SemanticTui::getGroup )
-            .distinct()
-            .collect( Collectors.toList() );
+      final Collection<SemanticGroup> groups
+            = SemanticTui.getTuis( annotation )
+                         .stream()
+                         .map( SemanticTui::getGroup )
+                         .collect( Collectors.toSet() );
+      final SemanticGroup typeIdGroup = getBestTypeIdGroup( annotation );
+      if ( typeIdGroup == UNKNOWN || groups.contains( typeIdGroup ) ) {
+         return groups;
+      }
+      final Collection<SemanticGroup> allGroups = new HashSet<>( groups );
+      allGroups.add( typeIdGroup );
+      return allGroups;
    }
 
    static private final class BestGrouper implements Comparator<SemanticGroup> {
@@ -104,13 +112,25 @@ public enum SemanticGroup {
    }
 
    static public SemanticGroup getBestGroup( final IdentifiedAnnotation annotation ) {
+      final SemanticGroup typeIdGroup = getBestTypeIdGroup( annotation );
+      if ( typeIdGroup != UNKNOWN ) {
+         return typeIdGroup;
+      }
       return getBestGroup( getGroups( annotation ) );
    }
 
    static public SemanticGroup getBestGroup( final Collection<SemanticGroup> groups ) {
       return groups.stream()
-            .min( BestGrouper.INSTANCE )
-            .orElse( UNKNOWN );
+                   .min( BestGrouper.INSTANCE )
+                   .orElse( UNKNOWN );
+   }
+
+   static private SemanticGroup getBestTypeIdGroup( final IdentifiedAnnotation annotation ) {
+      final int typeId = annotation.getTypeID();
+      return Arrays.stream( values() )
+                   .filter( g -> g.getCode() == typeId )
+                   .findFirst()
+                   .orElse( UNKNOWN );
    }
 
 }