You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2014/10/24 19:57:02 UTC
svn commit: r1634107 - in
/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer:
AbstractTermConsumer.java MetaWsdTermConsumer.java PrecisionTermConsumer.java
Author: seanfinan
Date: Fri Oct 24 17:57:01 2014
New Revision: 1634107
URL: http://svn.apache.org/r1634107
Log:
Publicize methods in Abstract and Precision TermConsumers
Added MetaWsdTermConsumer.
MetaWsdTermConsumer performs a "greatest span" like PrecisionTermConsumer, and it also culls sign/symptom mentions that are within disease/disorder mentions, prefering the disease/disorder as being more specific.
Added:
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java
Modified:
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/AbstractTermConsumer.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java
Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/AbstractTermConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/AbstractTermConsumer.java?rev=1634107&r1=1634106&r2=1634107&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/AbstractTermConsumer.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/AbstractTermConsumer.java Fri Oct 24 17:57:01 2014
@@ -90,8 +90,7 @@ abstract public class AbstractTermConsum
return usedSemanticTypes;
}
- static private boolean hascTakesSemantic( final Integer cTakesSemantic,
- final Collection<Concept> concepts ) {
+ static protected boolean hascTakesSemantic( final Integer cTakesSemantic, final Iterable<Concept> concepts ) {
for ( Concept concept : concepts ) {
if ( concept.getCtakesSemantics().contains( cTakesSemantic ) ) {
return true;
Added: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java?rev=1634107&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java (added)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/MetaWsdTermConsumer.java Fri Oct 24 17:57:01 2014
@@ -0,0 +1,111 @@
+package org.apache.ctakes.dictionary.lookup2.consumer;
+
+import org.apache.ctakes.dictionary.lookup2.concept.Concept;
+import org.apache.ctakes.dictionary.lookup2.dictionary.RareWordDictionary;
+import org.apache.ctakes.dictionary.lookup2.textspan.TextSpan;
+import org.apache.ctakes.dictionary.lookup2.util.collection.CollectionMap;
+import org.apache.ctakes.dictionary.lookup2.util.collection.HashSetMap;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+
+import java.util.*;
+import java.util.logging.Logger;
+
+/**
+ * Refine a collection of dictionary terms to only contain the most specific variations:
+ * "colon cancer" instead of "cancer", performed by span inclusion / complete containment, not overlap
+ * Also a start at wsd by trim of overlapping terms of conflicting but related semantic group.
+ * In this incarnation, any sign / symptom that is within a disease / disorder is assumed to be
+ * less specific than the disease disorder and is discarded.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/24/2014
+ */
+public class MetaWsdTermConsumer extends AbstractTermConsumer {
+
+ static private final Logger LOGGER = Logger.getLogger( "MetaWsdTermConsumer" );
+
+ private final TermConsumer _idHitConsumer;
+
+ public MetaWsdTermConsumer( final UimaContext uimaContext, final Properties properties ) {
+ super( uimaContext, properties );
+ _idHitConsumer = new PrecisionTermConsumer( uimaContext, properties );
+ }
+
+
+ /**
+ * Refine a collection of dictionary terms to only contain the most specific variations:
+ * "colon cancer" instead of "cancer", performed by span inclusion /complete containment, not overlap.
+ * For instance:
+ * "54 year old woman with left breast cancer."
+ * in the above sentence, "breast" as part of "breast cancer" is an anatomical site and should not be a S/S
+ * "Breast:
+ * "lump, cyst"
+ * in the above, breast is a list header, denoting findings on exam.
+ * {@inheritDoc}
+ */
+ @Override
+ public void consumeHits( final JCas jcas,
+ final RareWordDictionary dictionary,
+ final CollectionMap<TextSpan, Long, ? extends Collection<Long>> textSpanCuis,
+ final CollectionMap<Long, Concept, ? extends Collection<Concept>> cuiConcepts )
+ throws AnalysisEngineProcessException {
+ final String codingScheme = getCodingScheme();
+ final Collection<Integer> usedcTakesSemantics = getUsedcTakesSemantics( cuiConcepts );
+ final Map<Integer, CollectionMap<TextSpan, Long, ? extends Collection<Long>>> groupedSemanticCuis
+ = new HashMap<>();
+ // The dictionary may have more than one type, create a map of types to terms and use them all
+ final CollectionMap<TextSpan, Long, ? extends Collection<Long>> semanticTerms = new HashSetMap<>();
+ for ( Integer cTakesSemantic : usedcTakesSemantics ) {
+ semanticTerms.clear();
+ for ( Map.Entry<TextSpan, ? extends Collection<Long>> spanCuis : textSpanCuis ) {
+ for ( Long cuiCode : spanCuis.getValue() ) {
+ final Collection<Concept> concepts = cuiConcepts.getCollection( cuiCode );
+ if ( hascTakesSemantic( cTakesSemantic, concepts ) ) {
+ semanticTerms.placeValue( spanCuis.getKey(), cuiCode );
+ }
+ }
+ }
+ groupedSemanticCuis.put( cTakesSemantic, PrecisionTermConsumer.createPreciseTerms( semanticTerms ) );
+ }
+ // Clean up sign/symptoms that are also within disease/disorder spans
+ semanticTerms.clear();
+ for ( Map.Entry<TextSpan, ? extends Collection<Long>> diseases : groupedSemanticCuis
+ .get( CONST.NE_TYPE_ID_DISORDER ) ) {
+ semanticTerms.addAllValues( diseases.getKey(), diseases.getValue() );
+ }
+ for ( Map.Entry<TextSpan, ? extends Collection<Long>> diseases : groupedSemanticCuis
+ .get( CONST.NE_TYPE_ID_FINDING ) ) {
+ semanticTerms.addAllValues( diseases.getKey(), diseases.getValue() );
+ }
+ final CollectionMap<TextSpan, Long, ? extends Collection<Long>> preciseDiseaseTerms
+ = PrecisionTermConsumer.createPreciseTerms( semanticTerms );
+ final Iterable<TextSpan> findingSpans = new ArrayList<>( groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING )
+ .keySet() );
+ for ( TextSpan findingSpan : findingSpans ) {
+ if ( !preciseDiseaseTerms.containsKey( findingSpan ) ) {
+ groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING ).remove( findingSpan );
+ }
+ }
+ for ( Map.Entry<Integer, CollectionMap<TextSpan, Long, ? extends Collection<Long>>> group : groupedSemanticCuis
+ .entrySet() ) {
+ consumeTypeIdHits( jcas, codingScheme, group.getKey(), group.getValue(), cuiConcepts );
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void consumeTypeIdHits( final JCas jcas, final String codingScheme, final int cTakesSemantic,
+ final CollectionMap<TextSpan, Long, ? extends Collection<Long>> semanticTerms,
+ final CollectionMap<Long, Concept, ? extends Collection<Concept>> conceptMap )
+ throws AnalysisEngineProcessException {
+ _idHitConsumer.consumeTypeIdHits( jcas, codingScheme, cTakesSemantic, semanticTerms, conceptMap );
+ }
+
+
+}
Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java?rev=1634107&r1=1634106&r2=1634107&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java Fri Oct 24 17:57:01 2014
@@ -69,7 +69,7 @@ final public class PrecisionTermConsumer
* @param semanticTerms terms in the dictionary
* @return terms with the longest spans
*/
- static private CollectionMap<TextSpan, Long, ? extends Collection<Long>> createPreciseTerms(
+ static public CollectionMap<TextSpan, Long, ? extends Collection<Long>> createPreciseTerms(
final CollectionMap<TextSpan, Long, ? extends Collection<Long>> semanticTerms ) {
final Collection<TextSpan> discardSpans = new HashSet<>();
final List<TextSpan> textSpans = new ArrayList<>( semanticTerms.keySet() );