You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2020/09/25 00:55:09 UTC

svn commit: r1881993 - in /ctakes/trunk: ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/ ctakes-core/src/main/java/org/apache/ctakes/core/util/ ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/ ctake...

Author: seanfinan
Date: Fri Sep 25 00:55:08 2020
New Revision: 1881993

URL: http://svn.apache.org/viewvc?rev=1881993&view=rev
Log:
Refactoring in ctakes-gui to use SemanticTui
Some refactoring in piper files to accomodate relocation of AEs

Removed:
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/Tui.java
Modified:
    ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/FinishedLogger.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticTui.java
    ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java
    ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/XmiToPretty.piper
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryBuilder.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/MainPanel.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/Concept.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/ConceptMapFactory.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrconsoParser.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrstyParser.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/SourceTableModel.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/TuiTableModel.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java
    ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java

Modified: ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper (original)
+++ ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper Fri Sep 25 00:55:08 2020
@@ -20,5 +20,5 @@ load AttributeCleartkSubPipe
 add pretty.html.HtmlTextWriter SubDirectory=HTML
 
 // Log run time stats and completion
-addLast util.FinishedLogger
+addLast util.log.FinishedLogger
 

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/FinishedLogger.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/FinishedLogger.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/FinishedLogger.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/FinishedLogger.java Fri Sep 25 00:55:08 2020
@@ -1,6 +1,7 @@
 package org.apache.ctakes.core.util;
 
 import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
@@ -20,13 +21,16 @@ import org.apache.uima.resource.Resource
 )
 final public class FinishedLogger extends JCasAnnotator_ImplBase {
 
+   final org.apache.ctakes.core.util.log.FinishedLogger _delegate
+         = new org.apache.ctakes.core.util.log.FinishedLogger();
 
    /**
     * {@inheritDoc}
     */
    @Override
    public void initialize( final UimaContext context ) throws ResourceInitializationException {
-      throw new ResourceInitializationException( new Exception( "Deprecated use FinishedLogger in (sub) package log." ) );
+      Logger.getLogger( "FinishedLogger" ).warn( "Deprecated use FinishedLogger in (sub) package log." );
+      _delegate.initialize( context );
    }
 
    /**
@@ -34,7 +38,15 @@ final public class FinishedLogger extend
     */
    @Override
    public void process( final JCas jCas ) throws AnalysisEngineProcessException {
-      throw new AnalysisEngineProcessException( new Exception( "Deprecated use FinishedLogger in (sub) package log." ) );
+      _delegate.process( jCas );
    }
 
-}
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void collectionProcessComplete() throws AnalysisEngineProcessException {
+      _delegate.collectionProcessComplete();
+   }
+
+}
\ No newline at end of file

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java Fri Sep 25 00:55:08 2020
@@ -56,14 +56,26 @@ final public class IdentifiedAnnotationU
       return annotation.getHistoryOf() == CONST.NE_HISTORY_OF_PRESENT;
    }
 
-   static public SemanticGroup getSemanticGroup( final IdentifiedAnnotation annotation ) {
-      return SemanticGroup.getBestGroup( annotation );
+   /**
+    * @param annotation -
+    * @return Semantic Groups for all Umls Concepts of the annotation
+    */
+   static public Collection<SemanticGroup> getSemanticGroups( final IdentifiedAnnotation annotation ) {
+      return SemanticGroup.getGroups( annotation );
    }
 
-   static public Collection<SemanticTui> getSemanticTui( final IdentifiedAnnotation annotation ) {
+   /**
+    * @param annotation -
+    * @return Semantic Tuis for all Umls Concepts of the annotation
+    */
+   static public Collection<SemanticTui> getSemanticTuis( final IdentifiedAnnotation annotation ) {
       return SemanticTui.getTuis( annotation );
    }
 
+   /**
+    * @param annotation -
+    * @return cuis for all Umls Concepts of the annotation
+    */
    static public Collection<String> getCuis( final IdentifiedAnnotation annotation ) {
       return OntologyConceptUtil.getCuis( annotation );
    }
@@ -85,7 +97,11 @@ final public class IdentifiedAnnotationU
       return OntologyConceptUtil.getCodes( annotation, schemeName );
    }
 
-   static public Collection<String> getPreferredText( final IdentifiedAnnotation annotation ) {
+   /**
+    * @param annotation -
+    * @return preferred texts for all Umls Concepts of the annotation
+    */
+   static public Collection<String> getPreferredTexts( final IdentifiedAnnotation annotation ) {
       return OntologyConceptUtil.getUmlsConceptStream( annotation )
                                 .map( UmlsConcept::getPreferredText )
                                 .filter( Objects::nonNull )
@@ -93,5 +109,94 @@ final public class IdentifiedAnnotationU
                                 .collect( Collectors.toSet() );
    }
 
+   /**
+    * @param annotation -
+    * @return the confidence of the annotation
+    */
+   static public float getConfidence( final IdentifiedAnnotation annotation ) {
+      return annotation.getConfidence();
+   }
+
+   //
+   //    Some get methods that can utilize possible OntologyConcept wsd scores
+   //
+
+   /**
+    * @param annotation -
+    * @return the best wsd SemanticGroups
+    */
+   static public Collection<SemanticGroup> getBestSemanticGroups( final IdentifiedAnnotation annotation ) {
+      return getBestSemanticTuis( annotation )
+            .stream()
+            .map( SemanticTui::getGroup )
+            .collect( Collectors.toSet() );
+   }
+
+   /**
+    * @param annotation -
+    * @return the best wsd SemanticGroups
+    */
+   static public SemanticGroup getBestSemanticGroup( final IdentifiedAnnotation annotation ) {
+//      return SemanticGroup.getBestGroup( getBestSemanticGroups( annotation ) );
+      return SemanticGroup.getBestGroup( getSemanticGroups( annotation ) );
+   }
+
+   /**
+    * @param annotation -
+    * @return the best wsd SemanticTuis
+    */
+   static public Collection<SemanticTui> getBestSemanticTuis( final IdentifiedAnnotation annotation ) {
+      return OntologyConceptUtil.getBestUmlsConcepts( annotation )
+                                .stream()
+                                .map( SemanticTui::getTui )
+                                .collect( Collectors.toSet() );
+   }
+
+   /**
+    * @param annotation -
+    * @return the best wsd cuis
+    */
+   static public Collection<String> getBestCuis( final IdentifiedAnnotation annotation ) {
+      return OntologyConceptUtil.getBestUmlsConcepts( annotation )
+                                .stream()
+                                .map( UmlsConcept::getCui )
+                                .collect( Collectors.toSet() );
+   }
+
+   /**
+    * @param annotation -
+    * @return a collection of the best wsd schemes with codes for the given annotation.  e.g. snomed_us, rxnorm.
+    */
+   static public Collection<String> getBestCodeSchemes( final IdentifiedAnnotation annotation ) {
+      return OntologyConceptUtil.getBestUmlsConcepts( annotation )
+                                .stream()
+                                .map( UmlsConcept::getCodingScheme )
+                                .collect( Collectors.toSet() );
+   }
+
+   /**
+    * @param annotation -
+    * @param schemeName the name of a coding scheme.  e.g. snomed_us, rxnorm.
+    * @return the best wsd annotation codes for the given coding scheme.
+    */
+   static public Collection<String> getBestCodes( final IdentifiedAnnotation annotation, final String schemeName ) {
+      return OntologyConceptUtil.getBestUmlsConcepts( annotation )
+                                .stream()
+                                .filter( c -> schemeName.equals( c.getCodingScheme() ) )
+                                .map( UmlsConcept::getCode )
+                                .collect( Collectors.toSet() );
+   }
+
+   /**
+    * @param annotation -
+    * @return preferred texts for the best wsd Umls Concepts of the annotation
+    */
+   static public Collection<String> getBestPreferredTexts( final IdentifiedAnnotation annotation ) {
+      return OntologyConceptUtil.getBestUmlsConcepts( annotation )
+                                .stream()
+                                .map( UmlsConcept::getPreferredText )
+                                .collect( Collectors.toSet() );
+   }
+
 
 }

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java Fri Sep 25 00:55:08 2020
@@ -102,6 +102,56 @@ final public class OntologyConceptUtil {
    }
 
 
+   static private final Predicate<OntologyConcept> isDisambiguated = c -> {
+      try {
+         return c.getDisambiguated();
+      } catch ( Exception e ) {
+         return false;
+      }
+   };
+
+   static private double getWsdScore( final OntologyConcept concept ) {
+      try {
+         return concept.getScore();
+      } catch ( Exception e ) {
+         return 0.5;
+      }
+   }
+
+   ;
+
+
+   /**
+    * @param annotation -
+    * @return set of all Umls Concepts associated with the annotation with the highest wsd score
+    */
+   static public Collection<UmlsConcept> getBestUmlsConcepts( final IdentifiedAnnotation annotation ) {
+      final Stream<UmlsConcept> umlsConceptStream = getUmlsConceptStream( annotation );
+      List<UmlsConcept> wsdConcepts = umlsConceptStream
+            .filter( isDisambiguated )
+            .collect( Collectors.toList() );
+      if ( wsdConcepts.size() == 1 ) {
+         return wsdConcepts;
+      }
+      if ( wsdConcepts.isEmpty() ) {
+         wsdConcepts = umlsConceptStream
+               .sorted( Comparator.comparingDouble( OntologyConceptUtil::getWsdScore ) )
+               .collect( Collectors.toList() );
+      } else {
+         wsdConcepts = wsdConcepts.stream()
+                                  .sorted( Comparator.comparingDouble( OntologyConceptUtil::getWsdScore ) )
+                                  .collect( Collectors.toList() );
+      }
+      if ( wsdConcepts.size() <= 1 ) {
+         return wsdConcepts;
+      }
+      final double max = getWsdScore( wsdConcepts.get( wsdConcepts.size() - 1 ) );
+      return wsdConcepts.stream()
+                        .filter( c -> getWsdScore( c ) == max )
+                        .collect( Collectors.toSet() );
+   }
+
+
    //
    //   Get cuis, tuis, or codes for a single IdentifiedAnnotation
    //

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java Fri Sep 25 00:55:08 2020
@@ -12,34 +12,37 @@ import java.util.stream.Collectors;
 import static org.apache.ctakes.typesystem.type.constants.CONST.*;
 
 public enum SemanticGroup {
-   DRUG( NE_TYPE_ID_DRUG, "Drug", MedicationMention.class, MedicationMention::new ),
-   DISORDER( NE_TYPE_ID_DISORDER, "Disorder", DiseaseDisorderMention.class, DiseaseDisorderMention::new ),
-   FINDING( NE_TYPE_ID_FINDING, "Finding", SignSymptomMention.class, SignSymptomMention::new ),
-   PROCEDURE( NE_TYPE_ID_PROCEDURE, "Procedure", ProcedureMention.class, ProcedureMention::new ),
-   ANATOMY( NE_TYPE_ID_ANATOMICAL_SITE, "Anatomy", AnatomicalSiteMention.class, AnatomicalSiteMention::new ),
-   CLINICAL_ATTRIBUTE( NE_TYPE_ID_CLINICAL_ATTRIBUTE, "Attribute", SignSymptomMention.class, SignSymptomMention::new ),
-   DEVICE( NE_TYPE_ID_DEVICE, "Device", EntityMention.class, EntityMention::new ),
-   LAB( NE_TYPE_ID_LAB, "Lab", LabMention.class, LabMention::new ),
-   PHENOMENON( NE_TYPE_ID_PHENOMENA, "Phenomenon", EventMention.class, EventMention::new ),
-   SUBJECT( NE_TYPE_ID_SUBJECT_MODIFIER, "Subject", SubjectModifier.class, SubjectModifier::new ),
-   TITLE( NE_TYPE_ID_PERSON_TITLE, "Title", PersonTitleAnnotation.class, PersonTitleAnnotation::new ),
-   EVENT( NE_TYPE_ID_GENERIC_EVENT, "Event", EventMention.class, EventMention::new ),
-   ENTITY( NE_TYPE_ID_GENERIC_ENTITY, "Entity", EntityMention.class, EntityMention::new ),
-   TIME( NE_TYPE_ID_TIME_MENTION, "Time", TimeMention.class, TimeAnnotation::new ),
-   MODIFIER( NE_TYPE_ID_GENERIC_MODIFIER, "Modifier", Modifier.class, Modifier::new ),
-   LAB_MODIFIER( NE_TYPE_ID_LAB_VALUE_MODIFIER, "LabModifier", LabValueModifier.class, LabValueModifier::new ),
-   UNKNOWN( NE_TYPE_ID_UNKNOWN, "Unknown", IdentifiedAnnotation.class, IdentifiedAnnotation::new );
+   DRUG( NE_TYPE_ID_DRUG, "Drug", "Medication", MedicationMention.class, MedicationMention::new ),
+   DISORDER( NE_TYPE_ID_DISORDER, "Disorder", "Disease/Disorder", DiseaseDisorderMention.class, DiseaseDisorderMention::new ),
+   FINDING( NE_TYPE_ID_FINDING, "Finding", "Sign/Symptom", SignSymptomMention.class, SignSymptomMention::new ),
+   PROCEDURE( NE_TYPE_ID_PROCEDURE, "Procedure", "Procedure", ProcedureMention.class, ProcedureMention::new ),
+   ANATOMY( NE_TYPE_ID_ANATOMICAL_SITE, "Anatomy", "Anatomical Site", AnatomicalSiteMention.class, AnatomicalSiteMention::new ),
+   CLINICAL_ATTRIBUTE( NE_TYPE_ID_CLINICAL_ATTRIBUTE, "Attribute", "Clinical Attribute", SignSymptomMention.class, SignSymptomMention::new ),
+   DEVICE( NE_TYPE_ID_DEVICE, "Device", "Device", EntityMention.class, EntityMention::new ),
+   LAB( NE_TYPE_ID_LAB, "Lab", "Lab", LabMention.class, LabMention::new ),
+   PHENOMENON( NE_TYPE_ID_PHENOMENA, "Phenomenon", "Phenomenon", EventMention.class, EventMention::new ),
+   SUBJECT( NE_TYPE_ID_SUBJECT_MODIFIER, "Subject", "Subject", SubjectModifier.class, SubjectModifier::new ),
+   TITLE( NE_TYPE_ID_PERSON_TITLE, "Title", "Person Title", PersonTitleAnnotation.class, PersonTitleAnnotation::new ),
+   EVENT( NE_TYPE_ID_GENERIC_EVENT, "Event", "Event", EventMention.class, EventMention::new ),
+   ENTITY( NE_TYPE_ID_GENERIC_ENTITY, "Entity", "Entity", EntityMention.class, EntityMention::new ),
+   TIME( NE_TYPE_ID_TIME_MENTION, "Time", "Timex3", TimeMention.class, TimeAnnotation::new ),
+   MODIFIER( NE_TYPE_ID_GENERIC_MODIFIER, "Modifier", "Modifier", Modifier.class, Modifier::new ),
+   LAB_MODIFIER( NE_TYPE_ID_LAB_VALUE_MODIFIER, "LabModifier", "Lab Modifier", LabValueModifier.class, LabValueModifier::new ),
+   UNKNOWN( NE_TYPE_ID_UNKNOWN, "Unknown", "Unknown Semantic Group", IdentifiedAnnotation.class, IdentifiedAnnotation::new );
 
    private final int _code;
    private final String _name;
+   private final String _longName;
    private final Class<? extends IdentifiedAnnotation> _clazz;
    private final Function<JCas, ? extends IdentifiedAnnotation> _creator;
 
    SemanticGroup( final int code, final String name,
+                  final String longName,
                   final Class<? extends IdentifiedAnnotation> clazz,
                   final Function<JCas, ? extends IdentifiedAnnotation> creator ) {
       _code = code;
       _name = name;
+      _longName = name;
       _clazz = clazz;
       _creator = creator;
    }
@@ -52,6 +55,10 @@ public enum SemanticGroup {
       return _name;
    }
 
+   public String getLongName() {
+      return _longName;
+   }
+
    public Class<? extends IdentifiedAnnotation> getCtakesClass() {
       return _clazz;
    }
@@ -83,7 +90,7 @@ public enum SemanticGroup {
    }
 
    static private final class BestGrouper implements Comparator<SemanticGroup> {
-      static private BestGrouper INSTANCE = new BestGrouper();
+      static private final BestGrouper INSTANCE = new BestGrouper();
 
       public int compare( final SemanticGroup g1, final SemanticGroup g2 ) {
          if ( g1 == SemanticGroup.UNKNOWN ) {

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticTui.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticTui.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticTui.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticTui.java Fri Sep 25 00:55:08 2020
@@ -156,7 +156,7 @@ public enum SemanticTui {
 
    private final int _code;
    private final String _name;
-   private final SemanticGroup _group;
+   private SemanticGroup _group;
 
    SemanticTui( final int code, final String name, final SemanticGroup group ) {
       _code = code;
@@ -172,6 +172,16 @@ public enum SemanticTui {
       return _name;
    }
 
+   /**
+    * Allows a user to override the semantic group associated with a tui.
+    * This is useful when differentiating things like chemicals and drugs.
+    *
+    * @param group -
+    */
+   public void setGroup( final SemanticGroup group ) {
+      _group = group;
+   }
+
    public SemanticGroup getGroup() {
       return _group;
    }
@@ -197,14 +207,16 @@ public enum SemanticTui {
    }
 
    static public SemanticTui getTui( final String semanticType ) {
+      // Attempt to match name ( e.g. "Cell" ).
       final String toMatch = getMatchable( semanticType );
       for ( SemanticTui tui : SemanticTui.values() ) {
          if ( tui.getMatchType()
-               .equals( toMatch ) ) {
+                 .equals( toMatch ) ) {
             return tui;
          }
       }
-      return UNKNOWN;
+      // Attempt to match code ( e.g. "T001" ).
+      return getTuiFromCode( toMatch );
    }
 
    static public SemanticTui getTui( final int code ) {
@@ -219,7 +231,7 @@ public enum SemanticTui {
    static public SemanticTui getTuiFromCode( final String tuiCode ) {
       for ( SemanticTui tui : SemanticTui.values() ) {
          if ( tui.name()
-               .equals( tuiCode ) ) {
+                 .equalsIgnoreCase( tuiCode ) ) {
             return tui;
          }
       }
@@ -248,9 +260,31 @@ public enum SemanticTui {
       return getTuiFromCode( umlsConcept.getTui() );
    }
 
+   /**
+    * Allows a user to override the semantic group associated with a tui.
+    * This is useful when differentiating things like chemicals and drugs.
+    *
+    * @param tui   -
+    * @param group -
+    */
+   static public void setGroup( final int tui, final String group ) {
+      getTui( tui ).setGroup( SemanticGroup.getGroup( group ) );
+   }
+
+   /**
+    * Allows a user to override the semantic group associated with a tui.
+    * This is useful when differentiating things like chemicals and drugs.
+    *
+    * @param type  -
+    * @param group -
+    */
+   static public void setGroup( final String type, final String group ) {
+      getTui( type ).setGroup( SemanticGroup.getGroup( group ) );
+   }
+
    static private String getMatchable( final String text ) {
       return text.toLowerCase()
-            .replaceAll( ",", "" );
+                 .replaceAll( ",", "" );
    }
 
 }

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java Fri Sep 25 00:55:08 2020
@@ -1,7 +1,7 @@
 package org.apache.ctakes.dictionary.lookup2.util;
 
 import java.util.Collection;
-import java.util.HashSet;
+import java.util.stream.Collectors;
 
 /**
  * Author: SPF
@@ -38,24 +38,16 @@ final public class TuiCodeUtil {
 
 
    static public Collection<String> getIntAsTuis( final Collection<Integer> tuiCodes ) {
-      final Collection<String> tuis = new HashSet<>( tuiCodes.size() );
-      for ( Integer tuiCode : tuiCodes ) {
-         tuis.add( getAsTui( tuiCode ) );
-      }
-      return tuis;
+      return tuiCodes.stream().map( TuiCodeUtil::getAsTui ).collect( Collectors.toSet() );
    }
 
    static public Collection<String> getStringAsTuis( final Collection<String> tuiNums ) {
-      final Collection<String> tuis = new HashSet<>( tuiNums.size() );
-      for ( String tuiNum : tuiNums ) {
-         tuis.add( getAsTui( tuiNum ) );
-      }
-      return tuis;
+      return tuiNums.stream().map( TuiCodeUtil::getAsTui ).collect( Collectors.toSet() );
    }
 
    static public Integer getTuiCode( final String tui ) {
       final String tuiText = getAsTui( tui );
-      final String tuiNum = tuiText.substring( 1, tuiText.length() );
+      final String tuiNum = tuiText.substring( 1 );
       try {
          return Integer.parseInt( tuiNum );
       } catch ( NumberFormatException nfE ) {

Modified: ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/XmiToPretty.piper
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/XmiToPretty.piper?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/XmiToPretty.piper (original)
+++ ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/XmiToPretty.piper Fri Sep 25 00:55:08 2020
@@ -11,4 +11,4 @@ add pretty.plaintext.PrettyTextWriterFit
 add property.plaintext.PropertyTextWriterFit
 
 // Announce completion
-addLast util.FinishedLogger
+addLast util.log.FinishedLogger

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryBuilder.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryBuilder.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryBuilder.java Fri Sep 25 00:55:08 2020
@@ -1,9 +1,14 @@
 package org.apache.ctakes.gui.dictionary;
 
 
+import org.apache.ctakes.core.util.annotation.SemanticGroup;
+import org.apache.ctakes.core.util.annotation.SemanticTui;
 import org.apache.ctakes.core.util.collection.CollectionMap;
 import org.apache.ctakes.core.util.collection.HashSetMap;
-import org.apache.ctakes.gui.dictionary.umls.*;
+import org.apache.ctakes.gui.dictionary.umls.Concept;
+import org.apache.ctakes.gui.dictionary.umls.ConceptMapFactory;
+import org.apache.ctakes.gui.dictionary.umls.MrconsoParser;
+import org.apache.ctakes.gui.dictionary.umls.UmlsTermUtil;
 import org.apache.ctakes.gui.dictionary.util.HsqlUtil;
 import org.apache.ctakes.gui.dictionary.util.JdbcUtil;
 import org.apache.ctakes.gui.dictionary.util.RareWordDbWriter;
@@ -45,7 +50,7 @@ final class DictionaryBuilder {
                                    final Collection<String> wantedLanguages,
                                    final Collection<String> wantedSources,
                                    final Collection<String> wantedTargets,
-                                   final Collection<Tui> wantedTuis ) {
+                                   final Collection<SemanticTui> wantedTuis ) {
       // Set up the term utility
       final UmlsTermUtil umlsTermUtil = new UmlsTermUtil( DEFAULT_DATA_DIR );
       final Map<Long, Concept> conceptMap
@@ -59,7 +64,7 @@ final class DictionaryBuilder {
                                                final Collection<String> wantedLanguages,
                                                final Collection<String> wantedSources,
                                                final Collection<String> wantedTargets,
-                                               final Collection<Tui> wantedTuis ) {
+                                               final Collection<SemanticTui> wantedTuis ) {
       LOGGER.info( "Parsing Concepts" );
       // Create a map of Cuis to empty Concepts for all wanted Tuis and source vocabularies
       final Map<Long, Concept> conceptMap
@@ -75,11 +80,14 @@ final class DictionaryBuilder {
       return conceptMap;
    }
 
-   static private void removeWsdRarities( final Map<Long, Concept> conceptMap, final Collection<Tui> wantedTuis,
+   static private void removeWsdRarities( final Map<Long, Concept> conceptMap, final Collection<SemanticTui> wantedTuis,
                                           final int wsdDivisor, final int anatMultiplier ) {
       LOGGER.info( "Performing Poor man's WSD ..." );
-      final Collection<Tui> wantedAnatTuis = new ArrayList<>( wantedTuis );
-      wantedAnatTuis.retainAll( Arrays.asList( TuiTableModel.CTAKES_ANAT ) );
+      final EnumSet<SemanticTui> wantedAnatTuis = EnumSet.noneOf( SemanticTui.class );
+      Arrays.stream( SemanticTui.values() )
+            .filter( t -> t.getGroup() == SemanticGroup.ANATOMY )
+            .filter( wantedTuis::contains )
+            .forEach( wantedAnatTuis::add );
       final CollectionMap<String, Concept, Set<Concept>> synonymCodeMap = new HashSetMap<>( 500000 );
       for ( Concept concept : conceptMap.values() ) {
          concept.cullExtensions();
@@ -129,17 +137,23 @@ final class DictionaryBuilder {
    }
 
    static private void removeAnatTexts( final Map<Long, Concept> conceptMap,
-                                        final Collection<Tui> wantedTuis ) {
+                                        final Collection<SemanticTui> wantedTuis ) {
       LOGGER.info( "Removing Non-Anatomy synonyms that are also Anatomy synonyms ..." );
-      final Collection<Tui> wantedAnatTuis = new ArrayList<>( wantedTuis );
-      wantedAnatTuis.retainAll( Arrays.asList( TuiTableModel.CTAKES_ANAT ) );
+      final EnumSet<SemanticTui> wantedAnatTuis = EnumSet.noneOf( SemanticTui.class );
+      Arrays.stream( SemanticTui.values() )
+            .filter( t -> t.getGroup() == SemanticGroup.ANATOMY )
+            .filter( wantedTuis::contains )
+            .forEach( wantedAnatTuis::add );
       final Collection<String> anatTexts = conceptMap.values().stream()
-            .filter( c -> wantedAnatTuis.containsAll( c.getTuis() ) )
-            .map( Concept::getTexts )
-            .flatMap( Collection::stream )
-            .collect( Collectors.toSet() );
-      final Collection<Tui> nonAnatTuis = new ArrayList<>( wantedTuis );
-      nonAnatTuis.removeAll( Arrays.asList( TuiTableModel.CTAKES_ANAT ) );
+                                                     .filter( c -> wantedAnatTuis.containsAll( c.getTuis() ) )
+                                                     .map( Concept::getTexts )
+                                                     .flatMap( Collection::stream )
+                                                     .collect( Collectors.toSet() );
+      final EnumSet<SemanticTui> nonAnatTuis = EnumSet.noneOf( SemanticTui.class );
+      Arrays.stream( SemanticTui.values() )
+            .filter( t -> t.getGroup() != SemanticGroup.ANATOMY )
+            .filter( wantedTuis::contains )
+            .forEach( nonAnatTuis::add );
       final Collection<Long> empties = new ArrayList<>();
       int textCount = 0;
       for ( Map.Entry<Long, Concept> entry : conceptMap.entrySet() ) {
@@ -159,14 +173,21 @@ final class DictionaryBuilder {
 
 
    // TODO too much tui confusion in non-rxnorm drugs
-   static private void removeUnwantedDrugs( final Map<Long, Concept> conceptMap, Collection<Tui> wantedTuis ) {
+   static private void removeUnwantedDrugs( final Map<Long, Concept> conceptMap,
+                                            final Collection<SemanticTui> wantedTuis ) {
       LOGGER.info( "Removing Drug Concepts not in rxnorm ..." );
       // remove concepts that have only drug tuis but are not in rxnorm
-      final Collection<Tui> drugTuis = new ArrayList<>( wantedTuis );
-      drugTuis.retainAll( Arrays.asList( TuiTableModel.CTAKES_DRUG ) );
+      final EnumSet<SemanticTui> drugTuis = EnumSet.noneOf( SemanticTui.class );
+      Arrays.stream( SemanticTui.values() )
+            .filter( t -> t.getGroup() == SemanticGroup.DRUG )
+            .filter( wantedTuis::contains )
+            .forEach( drugTuis::add );
       // remove concepts that are in rxnorm but have non-drug tuis
-      final Collection<Tui> nonDrugTuis = new ArrayList<>( wantedTuis );
-      nonDrugTuis.removeAll( Arrays.asList( TuiTableModel.CTAKES_DRUG ) );
+      final EnumSet<SemanticTui> nonDrugTuis = EnumSet.noneOf( SemanticTui.class );
+      Arrays.stream( SemanticTui.values() )
+            .filter( t -> t.getGroup() != SemanticGroup.DRUG )
+            .filter( wantedTuis::contains )
+            .forEach( nonDrugTuis::add );
       // if concept has drug tuis but is not in rxnorm || concept is in rxnorm but does not have drug tuis
       final Collection<Long> empties = new ArrayList<>();
       int textCount = 0;

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/MainPanel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/MainPanel.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/MainPanel.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/MainPanel.java Fri Sep 25 00:55:08 2020
@@ -1,5 +1,6 @@
 package org.apache.ctakes.gui.dictionary;
 
+import org.apache.ctakes.core.util.annotation.SemanticTui;
 import org.apache.ctakes.gui.component.DisablerPane;
 import org.apache.ctakes.gui.component.FileChooserPanel;
 import org.apache.ctakes.gui.component.LoggerPanel;
@@ -18,9 +19,7 @@ import java.awt.event.ActionListener;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashSet;
+import java.util.*;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 
@@ -169,6 +168,36 @@ final class MainPanel extends JPanel {
          } catch ( IOException ioE ) {
             error( "Vocabulary Parse Error", ioE.getMessage() );
          }
+         final File mrSab = new File( __umlsDirPath + "/META", "MRSAB.RRF" );
+         final String mrSabPath = mrSab.getPath();
+         final Map<String, String> sourceNames = new HashMap<>();
+         final Map<String, String> sourceVersions = new HashMap<>();
+         final Map<String, String> sourceCuiCounts = new HashMap<>();
+         LOGGER.info( "Parsing vocabulary names from " + mrSabPath );
+         try ( final BufferedReader reader = FileUtil.createReader( mrSabPath ) ) {
+            int lineCount = 0;
+            java.util.List<String> tokens = FileUtil.readBsvTokens( reader, mrSabPath );
+            while ( tokens != null ) {
+               lineCount++;
+               if ( tokens.size() > MrsabIndex.CFR._index ) {
+                  final String sab = tokens.get( MrsabIndex.RSAB._index );
+                  if ( sources.contains( sab ) ) {
+                     sourceNames.put( sab, tokens.get( MrsabIndex.SON._index ) );
+                     sourceVersions.put( sab, tokens.get( MrsabIndex.SVER._index ) );
+                     sourceCuiCounts.put( sab, tokens.get( MrsabIndex.CFR._index ) );
+                  }
+               }
+               if ( lineCount % 100000 == 0 ) {
+                  LOGGER.info( "File Line " + lineCount + "\t Vocabularies " + sources.size() );
+               }
+               tokens = FileUtil.readBsvTokens( reader, mrConsoPath );
+            }
+            LOGGER.info( "Parsed " + sources.size() + " vocabulary names" );
+            _sourceModel.setSourceInfo( sourceNames, sourceVersions, sourceCuiCounts );
+         } catch ( IOException ioE ) {
+            error( "Vocabulary Parse Error", ioE.getMessage() );
+         }
+
          DisablerPane.getInstance().setVisible( false );
          frame.setCursor( Cursor.getDefaultCursor() );
       }
@@ -193,13 +222,16 @@ final class MainPanel extends JPanel {
       private final String __dictionaryName;
       private final Collection<String> __wantedSources;
       private final Collection<String> __wantedTargets;
-      private final Collection<Tui> __wantedTuis;
+      private final Collection<SemanticTui> __wantedTuis;
       private final Collection<String> __wantedLanguages;
 
-      private DictionaryBuildRunner( final String umlsDirPath, final String ctakesDirPath, final String dictionaryName,
+      private DictionaryBuildRunner( final String umlsDirPath,
+                                     final String ctakesDirPath,
+                                     final String dictionaryName,
                                      final Collection<String> wantedSources,
                                      final Collection<String> wantedTargets,
-                                     final Collection<Tui> wantedTuis, final Collection<String> wantedLangauges ) {
+                                     final Collection<SemanticTui> wantedTuis,
+                                     final Collection<String> wantedLangauges ) {
          __umlsDirPath = umlsDirPath;
          __ctakesDirPath = ctakesDirPath;
          __dictionaryName = dictionaryName;

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/Concept.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/Concept.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/Concept.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/Concept.java Fri Sep 25 00:55:08 2020
@@ -1,6 +1,7 @@
 package org.apache.ctakes.gui.dictionary.umls;
 
 
+import org.apache.ctakes.core.util.annotation.SemanticTui;
 import org.apache.ctakes.core.util.collection.HashSetMap;
 import org.apache.ctakes.gui.dictionary.util.TextTokenizer;
 
@@ -20,12 +21,12 @@ final public class Concept {
 
    final private Map<String, Integer> _textCounts;
    final private HashSetMap<String, String> _codes;
-   final private Collection<Tui> _tuis;
+   final private Collection<SemanticTui> _tuis;
 
    public Concept() {
       _textCounts = new HashMap<>( 1 );
       _codes = new HashSetMap<>( 0 );
-      _tuis = EnumSet.noneOf( Tui.class );
+      _tuis = EnumSet.noneOf( SemanticTui.class );
    }
 
    public int addTexts( final Collection<String> texts ) {
@@ -123,15 +124,15 @@ final public class Concept {
       return codes;
    }
 
-   public void addTui( final Tui tui ) {
+   public void addTui( final SemanticTui tui ) {
       _tuis.add( tui );
    }
 
-   public Collection<Tui> getTuis() {
+   public Collection<SemanticTui> getTuis() {
       return _tuis;
    }
 
-   public boolean hasTui( final Collection<Tui> tuis ) {
+   public boolean hasTui( final Collection<SemanticTui> tuis ) {
       return _tuis.stream().anyMatch( tuis::contains );
    }
 

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/ConceptMapFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/ConceptMapFactory.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/ConceptMapFactory.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/ConceptMapFactory.java Fri Sep 25 00:55:08 2020
@@ -1,6 +1,7 @@
 package org.apache.ctakes.gui.dictionary.umls;
 
 
+import org.apache.ctakes.core.util.annotation.SemanticTui;
 import org.apache.log4j.Logger;
 
 import java.util.Collection;
@@ -21,7 +22,7 @@ final public class ConceptMapFactory {
 
    static public Map<Long, Concept> createInitialConceptMap( final String umlsDirPath,
                                                              final Collection<String> wantedSources,
-                                                             final Collection<Tui> wantedTuis ) {
+                                                             final Collection<SemanticTui> wantedTuis ) {
       if ( wantedSources.isEmpty() ) {
          LOGGER.warn( "No source vocabularies specified" );
          return Collections.emptyMap();

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrconsoParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrconsoParser.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrconsoParser.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrconsoParser.java Fri Sep 25 00:55:08 2020
@@ -152,7 +152,7 @@ final public class MrconsoParser {
                concept.setPreferredText( text );
             }
             // Get tokenized text
-            final String tokenizedText = TextTokenizer.getTokenizedText( text );
+            final String tokenizedText = TextTokenizer.getTokenizedText( text.toLowerCase() );
             if ( tokenizedText == null || tokenizedText.isEmpty()
                  || !umlsTermUtil.isTextValid( tokenizedText )
                  || DoseUtil.hasUnit( tokenizedText ) ) {

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrstyParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrstyParser.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrstyParser.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrstyParser.java Fri Sep 25 00:55:08 2020
@@ -1,6 +1,7 @@
 package org.apache.ctakes.gui.dictionary.umls;
 
 
+import org.apache.ctakes.core.util.annotation.SemanticTui;
 import org.apache.ctakes.gui.dictionary.util.FileUtil;
 import org.apache.log4j.Logger;
 
@@ -28,14 +29,14 @@ final public class MrstyParser {
    }
 
    static public Map<Long, Concept> createConceptsForTuis( final String umlsPath,
-                                                           final Collection<Tui> wantedTuis ) {
+                                                           final Collection<SemanticTui> wantedTuis ) {
       final String mrstyPath = umlsPath + MRSTY_SUB_PATH;
       LOGGER.info( "Compiling list of Cuis with wanted Tuis using " + mrstyPath );
       long lineCount = 0;
       final Map<Long, Concept> wantedConcepts = new HashMap<>();
-      final Collection<Tui> usedTuis = EnumSet.noneOf( Tui.class );
-      final Map<Tui, Long> tuiCodeCount = new EnumMap<>( Tui.class );
-      for ( Tui tui : wantedTuis ) {
+      final Collection<SemanticTui> usedTuis = EnumSet.noneOf( SemanticTui.class );
+      final Map<SemanticTui, Long> tuiCodeCount = new EnumMap<>( SemanticTui.class );
+      for ( SemanticTui tui : wantedTuis ) {
          tuiCodeCount.put( tui, 0L );
       }
       try ( final BufferedReader reader = FileUtil.createReader( mrstyPath ) ) {
@@ -43,7 +44,7 @@ final public class MrstyParser {
          while ( tokens != null ) {
             lineCount++;
             if ( tokens.size() > TUI._index ) {
-               final Tui tuiEnum = Tui.valueOf( tokens.get( TUI._index ) );
+               final SemanticTui tuiEnum = SemanticTui.valueOf( tokens.get( TUI._index ) );
                if ( !wantedTuis.contains( tuiEnum ) ) {
                   tokens = FileUtil.readBsvTokens( reader, mrstyPath );
                   continue;
@@ -76,8 +77,8 @@ final public class MrstyParser {
       LOGGER.info( "File Lines " + lineCount + "\t Cuis: " + counts );
       if ( usedTuis.size() != wantedTuis.size() ) {
          wantedTuis.removeAll( usedTuis );
-         for ( Tui missingTui : wantedTuis ) {
-            LOGGER.warn( "Could not find Cuis for Tui " + missingTui + " " + missingTui.getDescription() );
+         for ( SemanticTui missingTui : wantedTuis ) {
+            LOGGER.warn( "Could not find Cuis for Tui " + missingTui + " " + missingTui.getSemanticType() );
          }
       }
       return wantedConcepts;

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/SourceTableModel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/SourceTableModel.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/SourceTableModel.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/SourceTableModel.java Fri Sep 25 00:55:08 2020
@@ -18,15 +18,21 @@ final public class SourceTableModel impl
 
    static private final Logger LOGGER = Logger.getLogger( "SourceTableModel" );
 
-   static private final String[] COLUMN_NAMES = { "Source", "Target", "Vocabulary" };
-   static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, Boolean.class, String.class };
+   static private final String[] COLUMN_NAMES = { "Read Synonyms", "Record Codes", "Code", "Vocabulary", "Version",
+                                                  "CUIs" };
+   static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, Boolean.class, String.class, String.class,
+                                                      String.class, String.class };
 
-   static private final String[] CTAKES_SOURCES = { "SNOMEDCT_US", "RXNORM" };
+   static private final String[] CTAKES_SOURCES = { "SNOMEDCT_US", "RXNORM", "MTH", "MSH", "LNC" };
+   static private final String[] CTAKES_TARGETS = { "SNOMEDCT_US", "RXNORM" };
 
    private final EventListenerList _listenerList = new EventListenerList();
    private final Collection<String> _wantedSources = new HashSet<>();
    private final Collection<String> _wantedTargets = new HashSet<>();
    private final List<String> _sources = new ArrayList<>();
+   private final Map<String, String> _sourceNames = new HashMap<>();
+   private final Map<String, String> _sourceVersions = new HashMap<>();
+   private final Map<String, String> _sourceCuiCounts = new HashMap<>();
 
 
    public void setSources( final Collection<String> sources ) {
@@ -36,7 +42,19 @@ final public class SourceTableModel impl
       _sources.addAll( sources );
       Collections.sort( _sources );
       _wantedSources.addAll( Arrays.asList( CTAKES_SOURCES ) );
-      _wantedTargets.addAll( Arrays.asList( CTAKES_SOURCES ) );
+      _wantedTargets.addAll( Arrays.asList( CTAKES_TARGETS ) );
+      fireTableChanged( new TableModelEvent( this ) );
+   }
+
+   public void setSourceInfo( final Map<String, String> sourceNames,
+                              final Map<String, String> sourceVersions,
+                              final Map<String, String> sourceCuiCounts ) {
+      _sourceNames.clear();
+      _sourceVersions.clear();
+      _sourceCuiCounts.clear();
+      _sourceNames.putAll( sourceNames );
+      _sourceVersions.putAll( sourceVersions );
+      _sourceCuiCounts.putAll( sourceCuiCounts );
       fireTableChanged( new TableModelEvent( this ) );
    }
 
@@ -61,7 +79,7 @@ final public class SourceTableModel impl
     */
    @Override
    public int getColumnCount() {
-      return 3;
+      return 6;
    }
 
    /**
@@ -101,6 +119,12 @@ final public class SourceTableModel impl
             return isTargetEnabled( source );
          case 2:
             return source;
+         case 3:
+            return _sourceNames.getOrDefault( source, "" );
+         case 4:
+            return _sourceVersions.getOrDefault( source, "" );
+         case 5:
+            return _sourceCuiCounts.getOrDefault( source, "" );
       }
       return "ERROR";
    }

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/TuiTableModel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/TuiTableModel.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/TuiTableModel.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/TuiTableModel.java Fri Sep 25 00:55:08 2020
@@ -1,5 +1,7 @@
 package org.apache.ctakes.gui.dictionary.umls;
 
+import org.apache.ctakes.core.util.annotation.SemanticGroup;
+import org.apache.ctakes.core.util.annotation.SemanticTui;
 import org.apache.log4j.Logger;
 
 import javax.swing.event.EventListenerList;
@@ -9,7 +11,8 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.EnumSet;
 
-import static org.apache.ctakes.gui.dictionary.umls.Tui.*;
+import static org.apache.ctakes.core.util.annotation.SemanticTui.*;
+
 
 /**
  * @author SPF , chip-nlp
@@ -20,28 +23,37 @@ final public class TuiTableModel impleme
 
    static private final Logger LOGGER = Logger.getLogger( "TuiTableModel" );
 
-   static public final Tui[] CTAKES_ANAT = { T021, T022, T023, T024, T025, T026, T029, T030 };
-   static private final Tui[] CTAKES_DISO = { T019, T020, T037, T047, T048, T049, T050, T190, T191 };
-   static private final Tui[] CTAKES_FIND = { T033, T034, T040, T041, T042, T043, T044, T045, T046, T056, T057, T184 };
-   static private final Tui[] CTAKES_PROC = { T059, T060, T061 };
-   static public final Tui[] CTAKES_DRUG = { T109, T110, T114, T115, T116, T118, T119, T121, T122, T123, T124,
-                                             T125, T126, T127, T129, T130, T131, T195, T196, T197, T200, T203 };
+//   static public final Tui[] CTAKES_ANAT = { T021, T022, T023, T024, T025, T026, T029, T030 };
+//   static public final Tui[] CTAKES_DRUG = { T109, T110, T114, T115, T116, T118, T119, T121, T122, T123, T124,
+//                                             T125, T126, T127, T129, T130, T131, T195, T196, T197, T200, T203 };
+
+   // Semantic Types that are in the normal ctakes semantic groups but are still not wanted.  e.g. "Cell"
+   static private final Collection<SemanticTui> UNWANTED_TUIS
+         = EnumSet.of( T116, T087, T123, T118, T026, T043, T025, T103, T120, T104, T077, T049, T088, T065, T196,
+         T050, T018, T126, T168, T045, T028, T125, T078, T129, T055, T197, T170, T130, T119, T063,
+         T066, T041, T073, T044, T085, T114, T124, T086, T115, T109, T040, T042, T046, T039,
+         T192, T062, T075, T054, UNKNOWN );
 
-   static private final String[] COLUMN_NAMES = { "Use", "TUI", "Semantic Type" };
-   static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, String.class, String.class };
+   static private final String[] COLUMN_NAMES = { "Use", "TUI", "Semantic Type", "Semantic Group" };
+   static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, String.class, String.class, String.class };
 
    private final EventListenerList _listenerList = new EventListenerList();
-   private final Collection<Tui> _wantedTuis = EnumSet.noneOf( Tui.class );
+   private final Collection<SemanticTui> _wantedTuis = EnumSet.noneOf( SemanticTui.class );
 
    public TuiTableModel() {
-      _wantedTuis.addAll( Arrays.asList( CTAKES_ANAT ) );
-      _wantedTuis.addAll( Arrays.asList( CTAKES_DISO ) );
-      _wantedTuis.addAll( Arrays.asList( CTAKES_FIND ) );
-      _wantedTuis.addAll( Arrays.asList( CTAKES_PROC ) );
-      _wantedTuis.addAll( Arrays.asList( CTAKES_DRUG ) );
+      final EnumSet<SemanticGroup> wantedGroups
+            = EnumSet.of( SemanticGroup.ANATOMY,
+            SemanticGroup.DISORDER,
+            SemanticGroup.FINDING,
+            SemanticGroup.PROCEDURE,
+            SemanticGroup.DRUG );
+      Arrays.stream( SemanticTui.values() )
+            .filter( t -> !UNWANTED_TUIS.contains( t ) )
+            .filter( t -> wantedGroups.contains( t.getGroup() ) )
+            .forEach( _wantedTuis::add );
    }
 
-   public Collection<Tui> getWantedTuis() {
+   public Collection<SemanticTui> getWantedTuis() {
       return _wantedTuis;
    }
 
@@ -50,7 +62,7 @@ final public class TuiTableModel impleme
     */
    @Override
    public int getRowCount() {
-      return Tui.values().length;
+      return SemanticTui.values().length;
    }
 
    /**
@@ -58,7 +70,7 @@ final public class TuiTableModel impleme
     */
    @Override
    public int getColumnCount() {
-      return 3;
+      return 4;
    }
 
    /**
@@ -90,19 +102,21 @@ final public class TuiTableModel impleme
     */
    @Override
    public Object getValueAt( final int rowIndex, final int columnIndex ) {
-      final Tui tui = Tui.values()[ rowIndex ];
+      final SemanticTui tui = SemanticTui.values()[ rowIndex ];
       switch ( columnIndex ) {
          case 0:
             return isTuiEnabled( tui );
          case 1:
             return tui.name();
          case 2:
-            return tui.getDescription();
+            return tui.getSemanticType();
+         case 3:
+            return tui.getGroupName();
       }
       return "ERROR";
    }
 
-   private boolean isTuiEnabled( final Tui tui ) {
+   private boolean isTuiEnabled( final SemanticTui tui ) {
       return _wantedTuis.contains( tui );
    }
 
@@ -112,7 +126,7 @@ final public class TuiTableModel impleme
    @Override
    public void setValueAt( final Object aValue, final int rowIndex, final int columnIndex ) {
       if ( aValue instanceof Boolean && columnIndex == 0 ) {
-         final Tui tui = Tui.values()[ rowIndex ];
+         final SemanticTui tui = SemanticTui.values()[ rowIndex ];
          if ( (Boolean)aValue ) {
             _wantedTuis.add( tui );
          } else {

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java Fri Sep 25 00:55:08 2020
@@ -2,10 +2,7 @@ package org.apache.ctakes.gui.dictionary
 
 import org.apache.log4j.Logger;
 
-import java.sql.Connection;
-import java.sql.Driver;
-import java.sql.DriverManager;
-import java.sql.SQLException;
+import java.sql.*;
 
 /**
  * Author: SPF
@@ -46,6 +43,7 @@ final public class JdbcUtil {
          LOGGER.error( sqlE.getMessage() );
          System.exit( 1 );
       }
+      registerShutdownHook( connection );
       return connection;
    }
 
@@ -82,4 +80,26 @@ final public class JdbcUtil {
       return sb.toString();
    }
 
+   /**
+    * register a shutdown hook that will shut down the database, removing temporary and lock files.
+    *
+    * @param connection -
+    */
+   static private void registerShutdownHook( final Connection connection ) {
+      // Registers a shutdown hook for the Hsql instance so that it
+      // shuts down nicely and any temporary or lock files are cleaned up.
+      Runtime.getRuntime().addShutdownHook( new Thread( () -> {
+         try {
+            final Statement shutdown = connection.createStatement();
+            shutdown.execute( "SHUTDOWN" );
+            shutdown.close();
+            // The db is read-only, so there should be no need to roll back any transactions.
+            connection.clearWarnings();
+            connection.close();
+         } catch ( SQLException sqlE ) {
+            // ignore
+         }
+      } ) );
+   }
+
 }

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java Fri Sep 25 00:55:08 2020
@@ -1,7 +1,7 @@
 package org.apache.ctakes.gui.dictionary.util;
 
+import org.apache.ctakes.core.util.annotation.SemanticTui;
 import org.apache.ctakes.gui.dictionary.umls.Concept;
-import org.apache.ctakes.gui.dictionary.umls.Tui;
 import org.apache.ctakes.gui.dictionary.umls.VocabularyStore;
 import org.apache.log4j.Logger;
 
@@ -92,9 +92,9 @@ final public class RareWordDbWriter {
                continue;
             }
             // write tui table
-            for ( Tui tui : concept.getTuis() ) {
+            for ( SemanticTui tui : concept.getTuis() ) {
                tuiStatement.setLong( CuiTermsField.CUI.__index, cui );
-               tuiStatement.setInt( 2, tui.getIntValue() );
+               tuiStatement.setInt( 2, tui.getCode() );
                tuiStatement.executeUpdate();
                tuiTableCount = incrementCount( "Tui", tuiTableCount );
             }

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java Fri Sep 25 00:55:08 2020
@@ -93,7 +93,7 @@ final public class RareWordUtil {
             break;
          }
       }
-      return hasLetter && !BAD_POS_TERM_SET.contains( token );
+      return hasLetter && !BAD_POS_TERM_SET.contains( token.toLowerCase() );
    }
 
 

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java Fri Sep 25 00:55:08 2020
@@ -94,7 +94,7 @@ final public class TextTokenizer {
 
    static private boolean isPrefix( final String word ) {
       final String prefixQ = word + "-";
-      return PREFIX_SET.contains( prefixQ );
+      return PREFIX_SET.contains( prefixQ.toLowerCase() );
    }
 
    static private boolean isSuffix( final String word, final int startIndex ) {
@@ -106,11 +106,11 @@ final public class TextTokenizer {
          return false;
       }
       final String suffixQ = "-" + nextCharTerm;
-      return SUFFIX_SET.contains( suffixQ );
+      return SUFFIX_SET.contains( suffixQ.toLowerCase() );
    }
 
    static private boolean isOwnerApostrophe( final CharSequence word, final int startIndex ) {
-      return word.length() == startIndex + 1 && word.charAt( startIndex ) == 's';
+      return word.length() == startIndex + 1 && (word.charAt( startIndex ) == 's' || word.charAt( startIndex ) == 'S');
    }
 
    static private boolean isNumberDecimal( final CharSequence word, final int startIndex ) {
@@ -179,7 +179,8 @@ final public class TextTokenizer {
       if ( text.isEmpty() ) {
          return text;
       }
-      final String[] splits = WHITESPACE.split( text.toLowerCase() );
+//      final String[] splits = WHITESPACE.split( text.toLowerCase() );
+      final String[] splits = WHITESPACE.split( text );
       if ( splits.length == 0 ) {
          return "";
       }
@@ -189,7 +190,7 @@ final public class TextTokenizer {
          splits[ splits.length - 1 ] = lastSplit.substring( 0, lastSplit.length() - 1 );
       }
       return Arrays.stream( splits )
-            .map( s -> getTokens( s, separateDigits ) )
+                   .map( s -> getTokens( s, separateDigits ) )
             .flatMap( Collection::stream )
             .collect( Collectors.joining( " " ) );
    }

Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java Fri Sep 25 00:55:08 2020
@@ -1,8 +1,9 @@
 package org.apache.ctakes.gui.dictionary.util;
 
-import java.util.ArrayList;
+import org.apache.ctakes.core.util.StringUtil;
+
+import java.util.Arrays;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.List;
 
 /**
@@ -28,23 +29,24 @@ final public class TokenUtil {
    }
 
    static private List<String> getSeparatedValueItems( final String line, final char separator ) {
-      if ( line == null || line.trim().isEmpty() ) {
-         return Collections.emptyList();
-      }
-      final List<String> tokens = new ArrayList<>();
-      int startIndex = 0;
-      int stopIndex = line.indexOf( separator );
-      while ( stopIndex > 0 && stopIndex < line.length() ) {
-         tokens.add( line.substring( startIndex, stopIndex ) );
-         startIndex = stopIndex + 1;
-         stopIndex = line.indexOf( separator, startIndex );
-      }
-      if ( startIndex < line.length() - 1 ) {
-         tokens.add( line.substring( startIndex ) );
-      } else {
-         tokens.add( "" );
-      }
-      return tokens;
+      return Arrays.asList( StringUtil.fastSplit( line, separator ) );
+//      if ( line == null || line.trim().isEmpty() ) {
+//         return Collections.emptyList();
+//      }
+//      final List<String> tokens = new ArrayList<>();
+//      int startIndex = 0;
+//      int stopIndex = line.indexOf( separator );
+//      while ( stopIndex > 0 && stopIndex < line.length() ) {
+//         tokens.add( line.substring( startIndex, stopIndex ) );
+//         startIndex = stopIndex + 1;
+//         stopIndex = line.indexOf( separator, startIndex );
+//      }
+//      if ( startIndex < line.length() - 1 ) {
+//         tokens.add( line.substring( startIndex ) );
+//      } else {
+//         tokens.add( "" );
+//      }
+//      return tokens;
    }