You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2016/04/04 18:35:28 UTC

svn commit: r1737704 - /ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java

Author: seanfinan
Date: Mon Apr  4 16:35:28 2016
New Revision: 1737704

URL: http://svn.apache.org/viewvc?rev=1737704&view=rev
Log:
Better Semantic Cleanup, some refactoring

Modified:
    ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java?rev=1737704&r1=1737703&r2=1737704&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java Mon Apr  4 16:35:28 2016
@@ -71,42 +71,26 @@ public class SemanticCleanupTermConsumer
          }
          groupedSemanticCuis.put( cTakesSemantic, semanticTerms );
       }
-      // Clean up sign/symptoms and disease/disorder spans that are also anatomical sites
-      if ( groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_ANATOMICAL_SITE )
-           && groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_FINDING ) ) {
-         for ( TextSpan anatomicalSpan : groupedSemanticCuis.get( CONST.NE_TYPE_ID_ANATOMICAL_SITE ).keySet() ) {
-            groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING ).remove( anatomicalSpan );
-         }
-         if ( groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_DISORDER ) ) {
-            for ( TextSpan anatomicalSpan : groupedSemanticCuis.get( CONST.NE_TYPE_ID_ANATOMICAL_SITE ).keySet() ) {
-               groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING ).remove( anatomicalSpan );
-            }
-         }
-      }
+      // Clean up sign/symptom and disease/disorder spans that are also anatomical sites
+      removeUnwantedSpans( CONST.NE_TYPE_ID_ANATOMICAL_SITE, CONST.NE_TYPE_ID_FINDING, groupedSemanticCuis );
+      removeUnwantedSpans( CONST.NE_TYPE_ID_ANATOMICAL_SITE, CONST.NE_TYPE_ID_DISORDER, groupedSemanticCuis );
       // Clean up sign/symptoms that are also within disease/disorder spans
       if ( groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_FINDING )
            && groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_DISORDER ) ) {
-         final CollectionMap<TextSpan, Long, ? extends Collection<Long>> semanticTerms = new HashSetMap<>();
-         for ( Map.Entry<TextSpan, ? extends Collection<Long>> diseases : groupedSemanticCuis
-               .get( CONST.NE_TYPE_ID_DISORDER ) ) {
-            semanticTerms.addAllValues( diseases.getKey(), diseases.getValue() );
-            groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING ).remove( diseases.getKey() );
-         }
-         for ( Map.Entry<TextSpan, ? extends Collection<Long>> findings : groupedSemanticCuis
-               .get( CONST.NE_TYPE_ID_FINDING ) ) {
-            semanticTerms.addAllValues( findings.getKey(), findings.getValue() );
-         }
+         removeUnwantedSpans( CONST.NE_TYPE_ID_DISORDER, CONST.NE_TYPE_ID_FINDING, groupedSemanticCuis );
+         final CollectionMap<TextSpan, Long, ? extends Collection<Long>> copiedTerms = new HashSetMap<>();
+         copyTerms( CONST.NE_TYPE_ID_DISORDER, groupedSemanticCuis, copiedTerms );
+         copyTerms( CONST.NE_TYPE_ID_FINDING, groupedSemanticCuis, copiedTerms );
          // We just created a collection with only the largest Textspans.
          // Any smaller Finding textspans are therefore within a larger d/d textspan and should be removed.
-         final CollectionMap<TextSpan, Long, ? extends Collection<Long>> preciseDiseaseTerms
-               = PrecisionTermConsumer.createPreciseTerms( semanticTerms );
-         final Iterable<TextSpan> findingSpans = new ArrayList<>( groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING )
-               .keySet() );
-         for ( TextSpan findingSpan : findingSpans ) {
-            if ( !preciseDiseaseTerms.containsKey( findingSpan ) ) {
-               groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING ).remove( findingSpan );
-            }
-         }
+         final CollectionMap<TextSpan, Long, ? extends Collection<Long>> preciseTerms
+               = PrecisionTermConsumer.createPreciseTerms( copiedTerms );
+         final CollectionMap<TextSpan, Long, ? extends Collection<Long>> findingSpanCuis
+               = groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING );
+         final Collection<TextSpan> findingSpans = new ArrayList<>( findingSpanCuis.keySet() );
+         findingSpans.stream()
+               .filter( fs -> !preciseTerms.containsKey( fs ) )
+               .forEach( findingSpanCuis::remove );
       }
       for ( Map.Entry<Integer, CollectionMap<TextSpan, Long, ? extends Collection<Long>>> group : groupedSemanticCuis
             .entrySet() ) {
@@ -115,6 +99,32 @@ public class SemanticCleanupTermConsumer
       }
    }
 
+   static private void removeUnwantedSpans( final int wantedTypeId, final int unwantedTypeId,
+                                            final Map<Integer,
+                                                  CollectionMap<TextSpan,
+                                                        Long, ? extends Collection<Long>>> groupedSemanticCuis ) {
+      if ( !groupedSemanticCuis.containsKey( wantedTypeId ) || !groupedSemanticCuis.containsKey( unwantedTypeId ) ) {
+         return;
+      }
+      final Iterable<TextSpan> wantedSpans = groupedSemanticCuis.get( wantedTypeId ).keySet();
+      final CollectionMap<TextSpan, Long, ? extends Collection<Long>> typeTextSpanCuis
+            = groupedSemanticCuis.get( unwantedTypeId );
+      for ( TextSpan wantedSpan : wantedSpans ) {
+         typeTextSpanCuis.remove( wantedSpan );
+      }
+   }
+
+   static private void copyTerms( final int typeId,
+                                  final Map<Integer, CollectionMap<TextSpan,
+                                        Long, ? extends Collection<Long>>> groupedSemanticCuis,
+                                  final CollectionMap<TextSpan, Long, ? extends Collection<Long>> copyTermsMap ) {
+      final CollectionMap<TextSpan, Long, ? extends Collection<Long>> spanCuis
+            = groupedSemanticCuis.get( typeId );
+      for ( Map.Entry<TextSpan, ? extends Collection<Long>> spanCui : spanCuis ) {
+         copyTermsMap.addAllValues( spanCui.getKey(), spanCui.getValue() );
+      }
+   }
+
    /**
     * {@inheritDoc}
     */