You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2014/10/24 19:57:02 UTC

svn commit: r1634107 - in /ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer: AbstractTermConsumer.java MetaWsdTermConsumer.java PrecisionTermConsumer.java

Author: seanfinan
Date: Fri Oct 24 17:57:01 2014
New Revision: 1634107

URL: http://svn.apache.org/r1634107
Log:
Publicize methods in Abstract and Precision TermConsumers
Added MetaWsdTermConsumer.
    MetaWsdTermConsumer performs a "greatest span" like PrecisionTermConsumer, and it also culls sign/symptom mentions that are within disease/disorder mentions, prefering the disease/disorder as being more specific.

Added:
    ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java
Modified:
    ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/AbstractTermConsumer.java
    ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/AbstractTermConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/AbstractTermConsumer.java?rev=1634107&r1=1634106&r2=1634107&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/AbstractTermConsumer.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/AbstractTermConsumer.java Fri Oct 24 17:57:01 2014
@@ -90,8 +90,7 @@ abstract public class AbstractTermConsum
       return usedSemanticTypes;
    }
 
-   static private boolean hascTakesSemantic( final Integer cTakesSemantic,
-                                             final Collection<Concept> concepts ) {
+   static protected boolean hascTakesSemantic( final Integer cTakesSemantic, final Iterable<Concept> concepts ) {
       for ( Concept concept : concepts ) {
          if ( concept.getCtakesSemantics().contains( cTakesSemantic ) ) {
             return true;

Added: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java?rev=1634107&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java (added)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java Fri Oct 24 17:57:01 2014
@@ -0,0 +1,111 @@
+package org.apache.ctakes.dictionary.lookup2.consumer;
+
+import org.apache.ctakes.dictionary.lookup2.concept.Concept;
+import org.apache.ctakes.dictionary.lookup2.dictionary.RareWordDictionary;
+import org.apache.ctakes.dictionary.lookup2.textspan.TextSpan;
+import org.apache.ctakes.dictionary.lookup2.util.collection.CollectionMap;
+import org.apache.ctakes.dictionary.lookup2.util.collection.HashSetMap;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+
+import java.util.*;
+import java.util.logging.Logger;
+
+/**
+ * Refine a collection of dictionary terms to only contain the most specific variations:
+ * "colon cancer" instead of "cancer", performed by span inclusion / complete containment, not overlap
+ * Also a start at wsd by trim of overlapping terms of conflicting but related semantic group.
+ * In this incarnation, any sign / symptom that is within a disease / disorder is assumed to be
+ * less specific than the disease disorder and is discarded.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/24/2014
+ */
+public class MetaWsdTermConsumer extends AbstractTermConsumer {
+
+   static private final Logger LOGGER = Logger.getLogger( "MetaWsdTermConsumer" );
+
+   private final TermConsumer _idHitConsumer;
+
+   public MetaWsdTermConsumer( final UimaContext uimaContext, final Properties properties ) {
+      super( uimaContext, properties );
+      _idHitConsumer = new PrecisionTermConsumer( uimaContext, properties );
+   }
+
+
+   /**
+    * Refine a collection of dictionary terms to only contain the most specific variations:
+    * "colon cancer" instead of "cancer", performed by span inclusion /complete containment, not overlap.
+    * For instance:
+    * "54 year old woman with left breast cancer."
+    * in the above sentence, "breast" as part of "breast cancer" is an anatomical site and should not be a S/S
+    * "Breast:
+    * "lump, cyst"
+    * in the above, breast is a list header, denoting findings on exam.
+    * {@inheritDoc}
+    */
+   @Override
+   public void consumeHits( final JCas jcas,
+                            final RareWordDictionary dictionary,
+                            final CollectionMap<TextSpan, Long, ? extends Collection<Long>> textSpanCuis,
+                            final CollectionMap<Long, Concept, ? extends Collection<Concept>> cuiConcepts )
+         throws AnalysisEngineProcessException {
+      final String codingScheme = getCodingScheme();
+      final Collection<Integer> usedcTakesSemantics = getUsedcTakesSemantics( cuiConcepts );
+      final Map<Integer, CollectionMap<TextSpan, Long, ? extends Collection<Long>>> groupedSemanticCuis
+            = new HashMap<>();
+      // The dictionary may have more than one type, create a map of types to terms and use them all
+      final CollectionMap<TextSpan, Long, ? extends Collection<Long>> semanticTerms = new HashSetMap<>();
+      for ( Integer cTakesSemantic : usedcTakesSemantics ) {
+         semanticTerms.clear();
+         for ( Map.Entry<TextSpan, ? extends Collection<Long>> spanCuis : textSpanCuis ) {
+            for ( Long cuiCode : spanCuis.getValue() ) {
+               final Collection<Concept> concepts = cuiConcepts.getCollection( cuiCode );
+               if ( hascTakesSemantic( cTakesSemantic, concepts ) ) {
+                  semanticTerms.placeValue( spanCuis.getKey(), cuiCode );
+               }
+            }
+         }
+         groupedSemanticCuis.put( cTakesSemantic, PrecisionTermConsumer.createPreciseTerms( semanticTerms ) );
+      }
+      // Clean up sign/symptoms that are also within disease/disorder spans
+      semanticTerms.clear();
+      for ( Map.Entry<TextSpan, ? extends Collection<Long>> diseases : groupedSemanticCuis
+            .get( CONST.NE_TYPE_ID_DISORDER ) ) {
+         semanticTerms.addAllValues( diseases.getKey(), diseases.getValue() );
+      }
+      for ( Map.Entry<TextSpan, ? extends Collection<Long>> diseases : groupedSemanticCuis
+            .get( CONST.NE_TYPE_ID_FINDING ) ) {
+         semanticTerms.addAllValues( diseases.getKey(), diseases.getValue() );
+      }
+      final CollectionMap<TextSpan, Long, ? extends Collection<Long>> preciseDiseaseTerms
+            = PrecisionTermConsumer.createPreciseTerms( semanticTerms );
+      final Iterable<TextSpan> findingSpans = new ArrayList<>( groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING )
+            .keySet() );
+      for ( TextSpan findingSpan : findingSpans ) {
+         if ( !preciseDiseaseTerms.containsKey( findingSpan ) ) {
+            groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING ).remove( findingSpan );
+         }
+      }
+      for ( Map.Entry<Integer, CollectionMap<TextSpan, Long, ? extends Collection<Long>>> group : groupedSemanticCuis
+            .entrySet() ) {
+         consumeTypeIdHits( jcas, codingScheme, group.getKey(), group.getValue(), cuiConcepts );
+      }
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void consumeTypeIdHits( final JCas jcas, final String codingScheme, final int cTakesSemantic,
+                                  final CollectionMap<TextSpan, Long, ? extends Collection<Long>> semanticTerms,
+                                  final CollectionMap<Long, Concept, ? extends Collection<Concept>> conceptMap )
+         throws AnalysisEngineProcessException {
+      _idHitConsumer.consumeTypeIdHits( jcas, codingScheme, cTakesSemantic, semanticTerms, conceptMap );
+   }
+
+
+}

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java?rev=1634107&r1=1634106&r2=1634107&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java Fri Oct 24 17:57:01 2014
@@ -69,7 +69,7 @@ final public class PrecisionTermConsumer
     * @param semanticTerms terms in the dictionary
     * @return terms with the longest spans
     */
-   static private CollectionMap<TextSpan, Long, ? extends Collection<Long>> createPreciseTerms(
+   static public CollectionMap<TextSpan, Long, ? extends Collection<Long>> createPreciseTerms(
          final CollectionMap<TextSpan, Long, ? extends Collection<Long>> semanticTerms ) {
       final Collection<TextSpan> discardSpans = new HashSet<>();
       final List<TextSpan> textSpans = new ArrayList<>( semanticTerms.keySet() );