You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2014/11/04 18:28:38 UTC

svn commit: r1636659 - in /ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer: MetaWsdTermConsumer.java SemanticCleanupTermConsumer.java

Author: seanfinan
Date: Tue Nov  4 17:28:38 2014
New Revision: 1636659

URL: http://svn.apache.org/r1636659
Log:
Rename of MetaWsdTermConsumer to SemanticCleanupTermConsumer.
Also added extra semantic-group cui culling.

Added:
    ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java
      - copied, changed from r1636649, ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java
Removed:
    ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java

Copied: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java (from r1636649, ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java?p2=ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java&p1=ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java&r1=1636649&r2=1636659&rev=1636659&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java Tue Nov  4 17:28:38 2014
@@ -19,18 +19,19 @@ import java.util.logging.Logger;
  * Also a start at wsd by trim of overlapping terms of conflicting but related semantic group.
  * In this incarnation, any sign / symptom that is within a disease / disorder is assumed to be
  * less specific than the disease disorder and is discarded.
+ * In addition, any s/s or d/d that has the same span as an anatomical site is discarded.
  *
  * @author SPF , chip-nlp
  * @version %I%
  * @since 10/24/2014
  */
-public class MetaWsdTermConsumer extends AbstractTermConsumer {
+public class SemanticCleanupTermConsumer extends AbstractTermConsumer {
 
    static private final Logger LOGGER = Logger.getLogger( "MetaWsdTermConsumer" );
 
    private final TermConsumer _idHitConsumer;
 
-   public MetaWsdTermConsumer( final UimaContext uimaContext, final Properties properties ) {
+   public SemanticCleanupTermConsumer( final UimaContext uimaContext, final Properties properties ) {
       super( uimaContext, properties );
       _idHitConsumer = new PrecisionTermConsumer( uimaContext, properties );
    }
@@ -58,9 +59,8 @@ public class MetaWsdTermConsumer extends
       final Map<Integer, CollectionMap<TextSpan, Long, ? extends Collection<Long>>> groupedSemanticCuis
             = new HashMap<>();
       // The dictionary may have more than one type, create a map of types to terms and use them all
-      final CollectionMap<TextSpan, Long, ? extends Collection<Long>> semanticTerms = new HashSetMap<>();
       for ( Integer cTakesSemantic : usedcTakesSemantics ) {
-         semanticTerms.clear();
+         final CollectionMap<TextSpan, Long, ? extends Collection<Long>> semanticTerms = new HashSetMap<>();
          for ( Map.Entry<TextSpan, ? extends Collection<Long>> spanCuis : textSpanCuis ) {
             for ( Long cuiCode : spanCuis.getValue() ) {
                final Collection<Concept> concepts = cuiConcepts.getCollection( cuiCode );
@@ -69,30 +69,50 @@ public class MetaWsdTermConsumer extends
                }
             }
          }
-         groupedSemanticCuis.put( cTakesSemantic, PrecisionTermConsumer.createPreciseTerms( semanticTerms ) );
+         groupedSemanticCuis.put( cTakesSemantic, semanticTerms );
       }
-      // Clean up sign/symptoms that are also within disease/disorder spans
-      semanticTerms.clear();
-      for ( Map.Entry<TextSpan, ? extends Collection<Long>> diseases : groupedSemanticCuis
-            .get( CONST.NE_TYPE_ID_DISORDER ) ) {
-         semanticTerms.addAllValues( diseases.getKey(), diseases.getValue() );
-      }
-      for ( Map.Entry<TextSpan, ? extends Collection<Long>> diseases : groupedSemanticCuis
-            .get( CONST.NE_TYPE_ID_FINDING ) ) {
-         semanticTerms.addAllValues( diseases.getKey(), diseases.getValue() );
+      // Clean up sign/symptoms and disease/disorder spans that are also anatomical sites
+      if ( groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_ANATOMICAL_SITE ) ) {
+         if ( groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_FINDING ) ) {
+            for ( TextSpan anatomicalSpan : groupedSemanticCuis.get( CONST.NE_TYPE_ID_ANATOMICAL_SITE ).keySet() ) {
+               groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING ).remove( anatomicalSpan );
+            }
+         }
+         if ( groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_DISORDER ) ) {
+            for ( TextSpan anatomicalSpan : groupedSemanticCuis.get( CONST.NE_TYPE_ID_ANATOMICAL_SITE ).keySet() ) {
+               groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING ).remove( anatomicalSpan );
+            }
+         }
       }
-      final CollectionMap<TextSpan, Long, ? extends Collection<Long>> preciseDiseaseTerms
-            = PrecisionTermConsumer.createPreciseTerms( semanticTerms );
-      final Iterable<TextSpan> findingSpans = new ArrayList<>( groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING )
-            .keySet() );
-      for ( TextSpan findingSpan : findingSpans ) {
-         if ( !preciseDiseaseTerms.containsKey( findingSpan ) ) {
-            groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING ).remove( findingSpan );
+      // Clean up sign/symptoms that are also within disease/disorder spans
+      if ( groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_FINDING )
+           && groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_DISORDER ) ) {
+         final CollectionMap<TextSpan, Long, ? extends Collection<Long>> semanticTerms = new HashSetMap<>();
+         for ( Map.Entry<TextSpan, ? extends Collection<Long>> diseases : groupedSemanticCuis
+               .get( CONST.NE_TYPE_ID_DISORDER ) ) {
+            semanticTerms.addAllValues( diseases.getKey(), diseases.getValue() );
+            groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING ).remove( diseases.getKey() );
+         }
+         for ( Map.Entry<TextSpan, ? extends Collection<Long>> findings : groupedSemanticCuis
+               .get( CONST.NE_TYPE_ID_FINDING ) ) {
+            semanticTerms.addAllValues( findings.getKey(), findings.getValue() );
+         }
+         // We just created a collection with only the largest Textspans.
+         // Any smaller Finding textspans are therefore within a larger d/d textspan and should be removed.
+         final CollectionMap<TextSpan, Long, ? extends Collection<Long>> preciseDiseaseTerms
+               = PrecisionTermConsumer.createPreciseTerms( semanticTerms );
+         final Iterable<TextSpan> findingSpans = new ArrayList<>( groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING )
+               .keySet() );
+         for ( TextSpan findingSpan : findingSpans ) {
+            if ( !preciseDiseaseTerms.containsKey( findingSpan ) ) {
+               groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING ).remove( findingSpan );
+            }
          }
       }
       for ( Map.Entry<Integer, CollectionMap<TextSpan, Long, ? extends Collection<Long>>> group : groupedSemanticCuis
             .entrySet() ) {
-         consumeTypeIdHits( jcas, codingScheme, group.getKey(), group.getValue(), cuiConcepts );
+         consumeTypeIdHits( jcas, codingScheme, group.getKey(),
+               PrecisionTermConsumer.createPreciseTerms( group.getValue() ), cuiConcepts );
       }
    }