You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2017/03/16 21:55:01 UTC

svn commit: r1787257 [3/5] - in /ctakes/trunk: ./ ctakes-dictionary-gui/ ctakes-dictionary-gui/resources/ ctakes-dictionary-gui/resources/org/ ctakes-dictionary-gui/resources/org/apache/ ctakes-dictionary-gui/resources/org/apache/ctakes/ ctakes-diction...

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryBuilder.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryBuilder.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryBuilder.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,167 @@
+package org.apache.ctakes.dictionary.creator.gui.ctakes;
+
+
+import org.apache.ctakes.dictionary.creator.gui.umls.*;
+import org.apache.ctakes.dictionary.creator.util.HsqlUtil;
+import org.apache.ctakes.dictionary.creator.util.RareWordDbWriter;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.File;
+import java.util.*;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/11/2015
+ */
+final public class DictionaryBuilder {
+
+   static private final Logger LOGGER = LogManager.getLogger( "DictionaryBuilder" );
+
+   // TODO  static private final String DEFAULT_DATA_DIR = "resources/org/apache/ctakes/dictionary-gui/data/default";
+   static private final String DEFAULT_DATA_DIR = "resources/org/apache/ctakes/dictionary-gui/data/tiny";
+   static private final String CTAKES_APP_DB_PATH = "resources/org/apache/ctakes/dictionary/lookup/fast";
+   static private final String CTAKES_RES_MODULE = "ctakes-dictionary-lookup-fast-res";
+   static private final String CTAKES_RES_DB_PATH = CTAKES_RES_MODULE + "/src/main/" + CTAKES_APP_DB_PATH;
+   static private final String RXNORM = "RXNORM";
+   static private final int MIN_CHAR_LENGTH = 2;
+   static private final int MAX_CHAR_LENGTH = 50;
+   static private final int MAX_WORD_COUNT = 12;
+   static private final int MAX_SYM_COUNT = 7;
+
+   private DictionaryBuilder() {}
+
+   static public boolean buildDictionary( final String umlsDirPath,
+                                          final String ctakesDirPath,
+                                          final String dictionaryName,
+                                          final Collection<String> wantedLanguages,
+                                          final Collection<String> wantedSources,
+                                          final Collection<String> wantedTargets,
+                                          final Collection<Tui> wantedTuis ) {
+      // Set up the term utility
+      final UmlsTermUtil umlsTermUtil = new UmlsTermUtil( DEFAULT_DATA_DIR );
+      final Map<Long,Concept> conceptMap = parseAll( umlsTermUtil, umlsDirPath, wantedLanguages, wantedSources, wantedTargets, wantedTuis );
+
+      // special case for nitric oxide "no"
+      final Concept nitricOxide = conceptMap.get( 28128l );
+      if ( nitricOxide != null ) {
+         nitricOxide.removeTexts( Collections.singletonList( "no" ) );
+      }
+      // special case for nitric oxide synthase "nos"
+      final Concept nitricOxides = conceptMap.get( 132555l );
+      if ( nitricOxides != null ) {
+         nitricOxides.removeTexts( Arrays.asList( "nos", "synthase" ) );
+      }
+
+      return writeDatabase( ctakesDirPath, dictionaryName, conceptMap );
+   }
+
+
+
+
+   static private Map<Long,Concept> parseAll( final UmlsTermUtil umlsTermUtil,
+                                              final String umlsDirPath,
+                                              final Collection<String> wantedLanguages,
+                                              final Collection<String> wantedSources,
+                                              final Collection<String> wantedTargets,
+                                              final Collection<Tui> wantedTuis ) {
+      LOGGER.info( "Parsing Concepts" );
+      // Create a map of Cuis to empty Concepts for all wanted Tuis and source vocabularies
+      final Map<Long,Concept> conceptMap
+            = ConceptMapFactory.createInitialConceptMap( umlsDirPath, wantedSources, wantedTuis );
+      // Fill in information for all valid concepts
+      MrconsoParser.parseAllConcepts( umlsDirPath, conceptMap, wantedTargets, umlsTermUtil,
+            wantedLanguages, true, MIN_CHAR_LENGTH, MAX_CHAR_LENGTH, MAX_WORD_COUNT, MAX_SYM_COUNT );
+      removeUnwantedConcepts( conceptMap );
+      removeUnwantedDrugs( conceptMap, wantedTuis );
+      // Cull non-ANAT texts by ANAT texts as determined by ANAT tuis
+      removeAnatTexts( conceptMap.values(), wantedTuis );
+      conceptMap.values().forEach( Concept::minimizeTexts );
+      LOGGER.info( "Done Parsing Concepts" );
+      return conceptMap;
+   }
+
+   /**
+    * Remove any concepts that are unwanted - don't have any text from a desired vocabulary
+    * @param conceptMap -
+    */
+   static private void removeUnwantedConcepts( final Map<Long, Concept> conceptMap ) {
+      final Collection<Long> empties = conceptMap.entrySet().stream()
+            .filter( e -> e.getValue().isUnwanted() )
+            .map( Map.Entry::getKey )
+            .collect( Collectors.toSet() );
+      conceptMap.keySet().removeAll( empties );
+   }
+
+   static private Collection<String> getAnatTexts( final Collection<Concept> concepts, final Collection<Tui> wantedTuis ) {
+      final Collection<Tui> wantedAnatTuis = new ArrayList<>( wantedTuis );
+      wantedAnatTuis.retainAll( Arrays.asList( TuiTableModel.CTAKES_ANAT ) );
+      return concepts.stream()
+            .filter( c -> c.hasTui( wantedAnatTuis ) )
+            .map( Concept::getTexts )
+            .flatMap( Collection::stream )
+            .collect( Collectors.toSet() );
+   }
+
+   static private void removeAnatTexts( final Collection<Concept> concepts,
+                                        final Collection<Tui> wantedTuis,
+                                        final Collection<String> anatTexts ) {
+      final Collection<Tui> nonAnatTuis = new ArrayList<>( wantedTuis );
+      nonAnatTuis.removeAll( Arrays.asList( TuiTableModel.CTAKES_ANAT ) );
+      concepts.stream()
+            .filter( c -> c.hasTui( nonAnatTuis ) )
+            .forEach( c -> c.removeTexts( anatTexts ) );
+   }
+
+   static private void removeAnatTexts( final Collection<Concept> concepts,
+                                        final Collection<Tui> wantedTuis ) {
+      final Collection<String> anatTexts = getAnatTexts( concepts, wantedTuis );
+      removeAnatTexts( concepts, wantedTuis, anatTexts );
+   }
+
+
+   static private void removeUnwantedDrugs( final Map<Long,Concept> conceptMap, Collection<Tui> wantedTuis ) {
+      // remove concepts that have only drug tuis but are not in rxnorm
+      final Collection<Tui> drugTuis = new ArrayList<>( wantedTuis );
+      drugTuis.retainAll( Arrays.asList( TuiTableModel.CTAKES_DRUG ) );
+      // remove concepts that are in rxnorm but have non-drug tuis
+      final Collection<Tui> nonDrugTuis = new ArrayList<>( wantedTuis );
+      nonDrugTuis.removeAll( Arrays.asList( TuiTableModel.CTAKES_DRUG ) );
+      // if concept has drug tuis but is not in rxnorm || concept is in rxnorm but does not have drug tuis
+      final Predicate<Map.Entry<Long,Concept>> unwantedDrug
+            = e -> ( drugTuis.containsAll( e.getValue().getTuis() )
+            && !e.getValue().getVocabularies().contains( RXNORM ) )
+            || ( e.getValue().getVocabularies().contains( RXNORM )
+            && nonDrugTuis.containsAll( e.getValue().getTuis() ) );
+
+      final Collection<Long> removalCuis = conceptMap.entrySet().stream()
+            .filter( unwantedDrug )
+            .map( Map.Entry::getKey )
+            .collect( Collectors.toSet() );
+      conceptMap.keySet().removeAll( removalCuis );
+   }
+
+
+   static private boolean writeDatabase( final String ctakesDirPath,
+                                         final String dictionaryName,
+                                         final Map<Long,Concept> conceptMap ) {
+      final File ctakesRoot = new File( ctakesDirPath );
+      String databaseDirPath = ctakesDirPath + "/" + CTAKES_APP_DB_PATH;
+      if ( Arrays.asList( ctakesRoot.list() ).contains( CTAKES_RES_MODULE ) ) {
+         databaseDirPath = ctakesDirPath + "/" + CTAKES_RES_DB_PATH;
+      }
+      if ( !HsqlUtil.createDatabase( databaseDirPath, dictionaryName ) ) {
+         return false;
+      }
+      if ( !DictionaryXmlWriter.writeXmlFile( databaseDirPath, dictionaryName ) ) {
+         return false;
+      }
+      final String url = HsqlUtil.URL_PREFIX + databaseDirPath.replace( '\\', '/' ) + "/" + dictionaryName + "/" + dictionaryName;
+      return RareWordDbWriter.writeConcepts( conceptMap, url, "sa", "" );
+   }
+
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryXmlWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryXmlWriter.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryXmlWriter.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryXmlWriter.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,131 @@
+package org.apache.ctakes.dictionary.creator.gui.ctakes;
+
+
+import org.apache.ctakes.dictionary.creator.gui.umls.Vocabulary;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/12/2015
+ */
+public class DictionaryXmlWriter {
+
+   static private final Logger LOGGER = LogManager.getLogger( "DictionaryXmlWriter" );
+
+
+   static public boolean writeXmlFile( final String databaseDir, final String databaseName ) {
+      final File scriptFile = new File( databaseDir, databaseName + ".xml" );
+      try ( final Writer writer = new BufferedWriter( new FileWriter( scriptFile ) ) ) {
+         writer.write( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" );
+         writer.write( "<!--\n" );
+         writer.write( "Licensed to the Apache Software Foundation (ASF) under one\n" );
+         writer.write( "or more contributor license agreements.  See the NOTICE file\n" );
+         writer.write( "distributed with this work for additional information\n" );
+         writer.write( "regarding copyright ownership.  The ASF licenses this file\n" );
+         writer.write( "to you under the Apache License, Version 2.0 (the\n" );
+         writer.write( "\"License\"); you may not use this file except in compliance\n" );
+         writer.write( "with the License.  You may obtain a copy of the License at\n" );
+         writer.write( "http://www.apache.org/licenses/LICENSE-2.0\n" );
+         writer.write( "Unless required by applicable law or agreed to in writing,\n" );
+         writer.write( "software distributed under the License is distributed on an\n" );
+         writer.write( "\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n" );
+         writer.write( "KIND, either express or implied.  See the License for the\n" );
+         writer.write( "specific language governing permissions and limitations\n" );
+         writer.write( "under the License.\n" );
+         writer.write( "-->\n\n" );
+         writer.write( "<!--    New format for the .xml lookup specification.  Uses table name and value type/class for Concept Factories.  -->\n" );
+         writer.write( "<lookupSpecification>\n" );
+         writer.write( "<dictionaries>\n" );
+         writer.write( "   <dictionary>\n" );
+         writer.write( "      <name>" + databaseName + "Terms</name>\n" );
+         writer.write( "      <implementationName>org.apache.ctakes.dictionary.lookup2.dictionary.JdbcRareWordDictionary</implementationName>\n" );
+         writer.write( "      <properties>\n" );
+         writer.write( "<!-- urls for hsqldb memory connections must be file types in hsql 1.8.\n" );
+         writer.write( "These file urls must be either absolute path or relative to current working directory.\n" );
+         writer.write( "They cannot be based upon the classpath.\n" );
+         writer.write( "Though JdbcConnectionFactory will attempt to \"find\" a db based upon the parent dir of the url\n" );
+         writer.write( "for the sake of ide ease-of-use, the user should be aware of these hsql limitations.\n" );
+         writer.write( "-->\n" );
+         writer.write( createProperty( "jdbcDriver", "org.hsqldb.jdbcDriver" ) );
+         writer.write( createProperty( "jdbcUrl",
+               "jdbc:hsqldb:file:resources/org/apache/ctakes/dictionary/lookup/fast/" + databaseName + "/" + databaseName ) );
+         writer.write( createProperty( "jdbcUser", "sa" ) );
+         writer.write( createProperty( "jdbcPass", "" ) );
+         writer.write( createProperty( "rareWordTable", "cui_terms" ) );
+         writer.write( createProperty( "umlsUrl", "https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser" ) );
+         writer.write( createProperty( "umlsVendor", "NLM-6515182895" ) );
+         writer.write( createProperty( "umlsUser", "CHANGE_ME" ) );
+         writer.write( createProperty( "umlsPass", "CHANGE_ME" ) );
+         writer.write( "      </properties>\n" );
+         writer.write( "   </dictionary>\n" );
+         writer.write( "</dictionaries>\n" );
+         writer.write( "\n" );
+         writer.write( "<conceptFactories>\n" );
+         writer.write( "   <conceptFactory>\n" );
+         writer.write( "      <name>" + databaseName + "Concepts</name>\n" );
+         writer.write( "      <implementationName>org.apache.ctakes.dictionary.lookup2.concept.JdbcConceptFactory</implementationName>\n" );
+         writer.write( "      <properties>\n" );
+         writer.write( createProperty( "jdbcDriver", "org.hsqldb.jdbcDriver" ) );
+         writer.write( createProperty( "jdbcUrl",
+               "jdbc:hsqldb:file:resources/org/apache/ctakes/dictionary/lookup/fast/" + databaseName + "/" + databaseName ) );
+         writer.write( createProperty( "jdbcUser", "sa" ) );
+         writer.write( createProperty( "jdbcPass", "" ) );
+         writer.write( createProperty( "umlsUrl", "https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser" ) );
+         writer.write( createProperty( "umlsVendor", "NLM-6515182895" ) );
+         writer.write( createProperty( "umlsUser", "CHANGE_ME" ) );
+         writer.write( createProperty( "umlsPass", "CHANGE_ME" ) );
+         writer.write( createProperty( "tuiTable", "tui" ) );
+         writer.write( createProperty( "prefTermTable", "prefTerm" ) );
+         writer.write( "<!-- Optional tables for optional term info.\n" );
+         writer.write( "Uncommenting these lines alone may not persist term information;\n" );
+         writer.write( "persistence depends upon the TermConsumer.  -->\n" );
+         for ( String vocabulary : Vocabulary.getInstance().getAllVocabularies() ) {
+            writer.write( createProperty( vocabulary.toLowerCase().replace( '.','_' ).replace('-', '_')
+                                          + "Table", Vocabulary.getInstance().getCtakesClass( vocabulary ) ) );
+         }
+         writer.write( "      </properties>\n" );
+         writer.write( "   </conceptFactory>\n" );
+         writer.write( "</conceptFactories>\n" );
+         writer.write( "\n" );
+         writer.write( "<!--  Defines what terms and concepts will be used  -->\n" );
+         writer.write( "<dictionaryConceptPairs>\n" );
+         writer.write( "   <dictionaryConceptPair>\n" );
+         writer.write( "      <name>" + databaseName + "Pair</name>\n" );
+         writer.write( "      <dictionaryName>" + databaseName + "Terms</dictionaryName>\n" );
+         writer.write( "      <conceptFactoryName>" + databaseName + "Concepts</conceptFactoryName>\n" );
+         writer.write( "   </dictionaryConceptPair>\n" );
+         writer.write( "</dictionaryConceptPairs>\n" );
+         writer.write( "\n" );
+         writer.write( "<!-- DefaultTermConsumer will persist all spans.\n" );
+         writer.write( "PrecisionTermConsumer will only persist only the longest overlapping span of any semantic group.\n" );
+         writer.write( "SemanticCleanupTermConsumer works as Precision** but also removes signs/sympoms contained within disease/disorder,\n" );
+         writer.write( "and (just in case) removes any s/s and d/d that are also (exactly) anatomical sites. -->\n" );
+         writer.write( "<rareWordConsumer>\n" );
+         writer.write( "   <name>Term Consumer</name>\n" );
+         writer.write( "   <implementationName>org.apache.ctakes.dictionary.lookup2.consumer.DefaultTermConsumer</implementationName>\n" );
+         writer.write( "   <!--<implementationName>org.apache.ctakes.dictionary.lookup2.consumer.PrecisionTermConsumer</implementationName>-->\n" );
+         writer.write( "   <!--<implementationName>org.apache.ctakes.dictionary.lookup2.consumer.SemanticCleanupTermConsumer</implementationName>-->\n" );
+         writer.write( "   <properties>\n" );
+         writer.write( "<!-- Depending upon the consumer, the value of codingScheme may or may not be used.  With the packaged consumers,\n" );
+         writer.write( "codingScheme is a default value used only for cuis that do not have secondary codes (snomed, rxnorm, etc.)  -->\n" );
+         writer.write( createProperty( "codingScheme", databaseName ) );
+         writer.write( "   </properties>\n" );
+         writer.write( "</rareWordConsumer>\n" );
+         writer.write( "\n" );
+         writer.write( "</lookupSpecification>\n" );
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+         return false;
+      }
+      return true;
+   }
+
+   static private String createProperty( final String name, final String value ) {
+      return "         <property key=\"" + name + "\" value=\"" + value + "\"/>\n";
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/DirChooser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/DirChooser.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/DirChooser.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/DirChooser.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,120 @@
+package org.apache.ctakes.dictionary.creator.gui.main;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import javax.swing.*;
+import javax.swing.border.EmptyBorder;
+import javax.swing.text.JTextComponent;
+import java.awt.*;
+import java.awt.datatransfer.DataFlavor;
+import java.awt.datatransfer.UnsupportedFlavorException;
+import java.awt.dnd.DnDConstants;
+import java.awt.dnd.DropTarget;
+import java.awt.dnd.DropTargetDropEvent;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/10/2015
+ */
+public class DirChooser extends JPanel {
+
+   static private final Logger LOGGER = LogManager.getLogger( "DirChooser" );
+
+   public DirChooser( final String name, final String defaultDirectory, final ActionListener dirChangeListener ) {
+      super( new BorderLayout( 10, 10 ) );
+      setBorder( new EmptyBorder( 2, 10, 2, 10 ) );
+      final JLabel label = new JLabel( name );
+      label.setPreferredSize( new Dimension( 100, 0 ) );
+      label.setHorizontalAlignment( SwingConstants.TRAILING );
+      final JTextField textField = new JTextField( defaultDirectory );
+      textField.setEditable( false );
+      final JButton openChooserButton = new JButton( new OpenDirAction( textField, dirChangeListener ) );
+      add( label, BorderLayout.WEST );
+      add( textField, BorderLayout.CENTER );
+      add( openChooserButton, BorderLayout.EAST );
+
+      textField.setDropTarget( new DirDropTarget( textField, dirChangeListener ) );
+      textField.addActionListener( dirChangeListener );
+   }
+
+   /**
+    * Opens the JFileChooser
+    */
+   private class OpenDirAction extends AbstractAction {
+      private final JFileChooser __chooser;
+      private final JTextComponent __textComponent;
+      private final ActionListener __dirChangeListener;
+
+      private OpenDirAction( final JTextComponent textComponent, final ActionListener dirChangeListener ) {
+         super( "Select Directory" );
+         __textComponent = textComponent;
+         __chooser = new JFileChooser();
+         __chooser.setFileSelectionMode( JFileChooser.DIRECTORIES_ONLY );
+         __dirChangeListener = dirChangeListener;
+      }
+
+      @Override
+      public void actionPerformed( final ActionEvent event ) {
+         final String startDirPath = __textComponent.getText();
+         if ( startDirPath != null && !startDirPath.isEmpty() ) {
+            final File startingDir = new File( startDirPath );
+            if ( startingDir.exists() ) {
+               __chooser.setCurrentDirectory( startingDir );
+            }
+         }
+         final int option = __chooser.showOpenDialog( null );
+         if ( option != JFileChooser.APPROVE_OPTION ) {
+            return;
+         }
+         final File file = __chooser.getSelectedFile();
+         __textComponent.setText( file.getAbsolutePath() );
+         final ActionEvent dirEvent = new ActionEvent( this, ActionEvent.ACTION_FIRST, file.getAbsolutePath() );
+         __dirChangeListener.actionPerformed( dirEvent );
+      }
+   }
+
+
+   private class DirDropTarget extends DropTarget {
+      private final JTextComponent __textComponent;
+      private final ActionListener __dirChangeListener;
+      private DirDropTarget( final JTextComponent textComponent, final ActionListener dirChangeListener ) {
+         __textComponent = textComponent;
+         __dirChangeListener = dirChangeListener;
+      }
+      @Override
+      public synchronized void drop( final DropTargetDropEvent event ) {
+         event.acceptDrop( DnDConstants.ACTION_COPY );
+         try {
+            final Object values = event.getTransferable().getTransferData( DataFlavor.javaFileListFlavor );
+            if ( !(values instanceof Iterable) ) {
+               return;
+            }
+            for ( Object value : (Iterable)values ) {
+               if ( !(value instanceof File) ) {
+                  continue;
+               }
+               final File file = (File)value;
+               if ( !file.isDirectory() ) {
+                  continue;
+               }
+               __textComponent.setText( file.getAbsolutePath() );
+               final ActionEvent dirEvent
+                     = new ActionEvent( this, ActionEvent.ACTION_FIRST, file.getAbsolutePath() );
+               __dirChangeListener.actionPerformed( dirEvent );
+               return;
+            }
+         } catch ( UnsupportedFlavorException | IOException multE ) {
+            LOGGER.warn( multE.getMessage() );
+         }
+      }
+   }
+
+
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/MainPanel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/MainPanel.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/MainPanel.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/MainPanel.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,233 @@
+package org.apache.ctakes.dictionary.creator.gui.main;
+
+import org.apache.ctakes.dictionary.creator.gui.ctakes.DictionaryBuilder;
+import org.apache.ctakes.dictionary.creator.gui.umls.MrconsoIndex;
+import org.apache.ctakes.dictionary.creator.gui.umls.SourceTableModel;
+import org.apache.ctakes.dictionary.creator.gui.umls.Tui;
+import org.apache.ctakes.dictionary.creator.gui.umls.TuiTableModel;
+import org.apache.ctakes.dictionary.creator.util.FileUtil;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import javax.swing.*;
+import javax.swing.border.EmptyBorder;
+import javax.swing.table.TableModel;
+import javax.swing.text.JTextComponent;
+import java.awt.*;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/10/2015
+ */
+final public class MainPanel extends JPanel {
+
+   static private final Logger LOGGER = LogManager.getLogger( "MainPanel" );
+
+   private String _umlsDirPath = System.getProperty( "user.dir" );
+   private String _ctakesPath = System.getProperty( "user.dir" );
+   final TuiTableModel _tuiModel = new TuiTableModel();
+   final SourceTableModel _sourceModel = new SourceTableModel();
+
+   public MainPanel() {
+      super( new BorderLayout() );
+
+      final JComponent sourceDirPanel = new JPanel( new GridLayout( 2, 1 ) );
+      sourceDirPanel.add( new DirChooser( "cTAKES Installation:", _umlsDirPath, new CtakesDirListener() ) );
+      sourceDirPanel.add( new DirChooser( "UMLS Installation:", _ctakesPath, new UmlsDirListener() ) );
+      add( sourceDirPanel, BorderLayout.NORTH );
+
+      final JComponent centerPanel = new JPanel( new GridLayout( 1, 2 ) );
+      centerPanel.add( createSourceTable( _sourceModel ) );
+      centerPanel.add( createTuiTable( _tuiModel ) );
+      add( centerPanel, BorderLayout.CENTER );
+      add( createGoPanel(), BorderLayout.SOUTH );
+   }
+
+   private JComponent createTuiTable( final TableModel tuiModel ) {
+      final JTable tuiTable = new JTable( tuiModel );
+      tuiTable.setCellSelectionEnabled( false );
+      tuiTable.setShowVerticalLines( false );
+      tuiTable.setAutoCreateRowSorter( true );
+      tuiTable.setAutoResizeMode( JTable.AUTO_RESIZE_LAST_COLUMN );
+      tuiTable.getColumnModel().getColumn( 0 ).setMaxWidth( 50 );
+      tuiTable.getColumnModel().getColumn( 1 ).setMaxWidth( 50 );
+      return new JScrollPane( tuiTable );
+   }
+
+   private JComponent createSourceTable( final TableModel sourceModel ) {
+      final JTable tuiTable = new JTable( sourceModel );
+      tuiTable.setCellSelectionEnabled( false );
+      tuiTable.setShowVerticalLines( false );
+      tuiTable.setAutoCreateRowSorter( true );
+      tuiTable.setAutoResizeMode( JTable.AUTO_RESIZE_LAST_COLUMN );
+      tuiTable.getColumnModel().getColumn( 0 ).setMaxWidth( 50 );
+      tuiTable.getColumnModel().getColumn( 1 ).setMaxWidth( 50 );
+      return new JScrollPane( tuiTable );
+   }
+
+   private JComponent createGoPanel() {
+      final JPanel panel = new JPanel( new BorderLayout( 10, 10 ) );
+      panel.setBorder( new EmptyBorder( 2, 10, 2, 10 ) );
+      final JLabel label = new JLabel( "Dictionary Name:" );
+      label.setPreferredSize( new Dimension( 100, 0 ) );
+      label.setHorizontalAlignment( SwingConstants.TRAILING );
+      final JTextField textField = new JTextField( "custom" );
+      final JButton buildButton = new JButton( new BuildDictionaryAction( textField ) );
+      panel.add( label, BorderLayout.WEST );
+      panel.add( textField, BorderLayout.CENTER );
+      panel.add( buildButton, BorderLayout.EAST );
+      return panel;
+   }
+
+   private String setUmlsDirPath( final String umlsDirPath ) {
+      File mrConso = new File( umlsDirPath, "MRCONSO.RRF" );
+      if ( mrConso.isFile() ) {
+         _umlsDirPath = mrConso.getParentFile().getParent();
+      }
+      final String plusMetaPath = new File( umlsDirPath, "META" ).getPath();
+      mrConso = new File( plusMetaPath, "MRCONSO.RRF" );
+      if ( mrConso.isFile() ) {
+         _umlsDirPath = umlsDirPath;
+      } else {
+         error( "Invalid UMLS Installation", umlsDirPath + " is not a valid path to a UMLS installation" );
+      }
+      return _umlsDirPath;
+   }
+
+   private void loadSources() {
+      SwingUtilities.invokeLater( new SourceLoadRunner( _umlsDirPath ) );
+   }
+
+   private class SourceLoadRunner implements Runnable {
+      private final String __umlsDirPath;
+      private SourceLoadRunner( final String umlsDirPath ) {
+         __umlsDirPath = umlsDirPath;
+      }
+      public void run() {
+         SwingUtilities.getRoot( MainPanel.this ).setCursor( Cursor.getPredefinedCursor( Cursor.WAIT_CURSOR ) );
+         final File mrConso = new File( __umlsDirPath + "/META", "MRCONSO.RRF" );
+         final String mrConsoPath = mrConso.getPath();
+         LOGGER.info( "Parsing vocabulary types from " + mrConsoPath );
+         final Collection<String> sources = new HashSet<>();
+         try ( final BufferedReader reader = FileUtil.createReader( mrConsoPath ) ) {
+            int lineCount = 0;
+            java.util.List<String> tokens = FileUtil.readBsvTokens( reader, mrConsoPath );
+            while ( tokens != null ) {
+               lineCount++;
+               if ( tokens.size() > MrconsoIndex.SOURCE._index ) {
+                  sources.add( tokens.get( MrconsoIndex.SOURCE._index ) );
+               }
+               if ( lineCount % 100000 == 0 ) {
+                  LOGGER.info( "File Line " + lineCount + "\t Vocabularies " + sources.size() );
+               }
+               tokens = FileUtil.readBsvTokens( reader, mrConsoPath );
+            }
+            LOGGER.info( "Parsed " + sources.size() + " vocabulary types" );
+            _sourceModel.setSources( sources );
+         } catch ( IOException ioE ) {
+            error( "Vocabulary Parse Error", ioE.getMessage() );
+         }
+         SwingUtilities.getRoot( MainPanel.this ).setCursor( Cursor.getDefaultCursor() );
+      }
+   }
+
+   private void buildDictionary( final String dictionaryName ) {
+      SwingUtilities.invokeLater(
+            new DictionaryBuildRunner( _umlsDirPath, _ctakesPath, dictionaryName, _sourceModel.getWantedSources(),
+                  _sourceModel.getWantedTargets(), _tuiModel.getWantedTuis() ) );
+   }
+
+   private void error( final String title, final String message ) {
+      LOGGER.error( message );
+      JOptionPane.showMessageDialog( MainPanel.this, message, title, JOptionPane.ERROR_MESSAGE );
+   }
+
+
+
+   private class DictionaryBuildRunner implements Runnable {
+      private final String __umlsDirPath;
+      private final String __ctakesDirPath;
+      private final String __dictionaryName;
+      private final Collection<String> __wantedSources;
+      private final Collection<String> __wantedTargets;
+      private final Collection<Tui> __wantedTuis;
+      private DictionaryBuildRunner( final String umlsDirPath, final String ctakesDirPath, final String dictionaryName,
+                                     final Collection<String> wantedSources,
+                                     final Collection<String> wantedTargets,
+                                     final Collection<Tui> wantedTuis ) {
+         __umlsDirPath = umlsDirPath;
+         __ctakesDirPath = ctakesDirPath;
+         __dictionaryName = dictionaryName;
+         __wantedSources = wantedSources;
+         __wantedTargets = new ArrayList<>( wantedTargets );
+         __wantedTuis = new ArrayList<>( wantedTuis );
+      }
+
+      public void run() {
+         SwingUtilities.getRoot( MainPanel.this ).setCursor( Cursor.getPredefinedCursor( Cursor.WAIT_CURSOR ) );
+         if ( DictionaryBuilder.buildDictionary( __umlsDirPath, __ctakesDirPath, __dictionaryName,
+               Collections.singletonList( "ENG" ),
+               __wantedSources, __wantedTargets, __wantedTuis ) ) {
+            final String message = "Dictionary " + __dictionaryName + " successfully built in " + __ctakesDirPath;
+            LOGGER.info( message );
+            JOptionPane.showMessageDialog( MainPanel.this, message, "Dictionary Built", JOptionPane.INFORMATION_MESSAGE );
+         } else {
+            error( "Build Failure", "Dictionary " + __dictionaryName + " could not be built in " + __ctakesDirPath );
+         }
+         SwingUtilities.getRoot( MainPanel.this ).setCursor( Cursor.getDefaultCursor() );
+      }
+   }
+
+
+
+   private class UmlsDirListener implements ActionListener {
+      public void actionPerformed( final ActionEvent event ) {
+         final String oldPath = _umlsDirPath;
+         final String newPath = setUmlsDirPath( event.getActionCommand() );
+         if ( !oldPath.equals( newPath ) ) {
+            loadSources();
+         }
+      }
+   }
+
+
+   private class CtakesDirListener implements ActionListener {
+      public void actionPerformed( final ActionEvent event ) {
+         _ctakesPath = event.getActionCommand();
+      }
+   }
+
+
+   /**
+    * Opens the JFileChooser
+    */
+   private class BuildDictionaryAction extends AbstractAction {
+      private final JTextComponent __textComponent;
+
+      private BuildDictionaryAction( final JTextComponent textComponent ) {
+         super( "Build Dictionary" );
+         __textComponent = textComponent;
+      }
+
+      @Override
+      public void actionPerformed( final ActionEvent event ) {
+         final String dictionaryName = __textComponent.getText();
+         if ( dictionaryName != null && !dictionaryName.isEmpty() ) {
+            buildDictionary( dictionaryName.toLowerCase() );
+         } else {
+            error( "Invalid Dictionary Name", "Please Specify a Dictionary Name" );
+         }
+      }
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Concept.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Concept.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Concept.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Concept.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,127 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+import org.apache.ctakes.dictionary.creator.util.collection.CollectionMap;
+import org.apache.ctakes.dictionary.creator.util.collection.HashSetMap;
+
+import java.util.*;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 11/20/13
+ */
+final public class Concept {
+
+   static public String PREFERRED_TERM_UNKNOWN = "Unknown Preferred Term";
+
+   private String _preferredText = null;
+   private boolean _hasDose = false;
+
+   final private Collection<String> _texts;
+   final private CollectionMap<String, String, ? extends Collection<String>> _codes;
+   final private Collection<Tui> _tuis;
+
+
+
+   public Concept() {
+      _codes = new HashSetMap<>( 0 );
+      _texts = new HashSet<>( 1 );
+      _tuis = EnumSet.noneOf( Tui.class );
+   }
+
+   public boolean addTexts( final Collection<String> texts ) {
+      return _texts.addAll( texts );
+   }
+
+   public void removeTexts( final Collection<String> texts ) {
+      _texts.removeAll( texts );
+   }
+
+   public Collection<String> getTexts() {
+      return _texts;
+   }
+
+   public void minimizeTexts() {
+      if ( _texts.size() < 2 ) {
+         return;
+      }
+      final List<String> textList = new ArrayList<>( _texts );
+      final Collection<String> extensionTexts = new HashSet<>();
+      for ( int i=0; i<textList.size()-1; i++ ) {
+         final String iText = textList.get( i );
+         for ( int j=i+1; j<textList.size(); j++ ) {
+            final String jText = textList.get( j );
+            if ( textContained( jText, iText ) ) {
+               extensionTexts.add( jText );
+            } else if ( textContained( iText, jText ) ) {
+               extensionTexts.add( iText );
+            }
+         }
+      }
+      _texts.removeAll( extensionTexts );
+   }
+
+   static private boolean textContained( final String containerText, final String containedText ) {
+      final int index = containerText.indexOf( containedText );
+      return index >= 0
+             && ( index == 0 || containerText.charAt( index-1 ) == ' ' )
+           && ( index+containedText.length() == containerText.length() || containerText.charAt( index + containedText.length() ) == ' ' );
+   }
+
+   public void setPreferredText( final String text ) {
+      _preferredText = text;
+   }
+
+   public String getPreferredText() {
+      if ( _preferredText != null ) {
+         return _preferredText;
+      }
+      return PREFERRED_TERM_UNKNOWN;
+   }
+
+   public void addCode( final String source, final String code ) {
+      _codes.placeValue( source, code );
+   }
+
+   public Collection<String> getVocabularies() {
+      return _codes.keySet();
+   }
+
+   public Collection<String> getCodes( final String source ) {
+      final Collection<String> codes = _codes.getCollection( source );
+      if ( codes == null ) {
+         return Collections.emptyList();
+      }
+      return codes;
+   }
+
+   public void addTui( final Tui tui ) {
+      _tuis.add( tui );
+   }
+
+   public Collection<Tui> getTuis() {
+      return _tuis;
+   }
+
+   public boolean hasTui( final Collection<Tui> tuis ) {
+      return _tuis.stream().anyMatch( tuis::contains );
+   }
+
+   public boolean isEmpty() {
+//      return _texts.isEmpty() || _codes.isEmpty();
+      return _texts.isEmpty();
+   }
+
+   public void setHasDose() {
+      _hasDose = true;
+   }
+
+   public boolean hasDose() {
+      return _hasDose;
+   }
+
+   public boolean isUnwanted() {
+      return hasDose() || isEmpty();
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/ConceptMapFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/ConceptMapFactory.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/ConceptMapFactory.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/ConceptMapFactory.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,40 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/11/2015
+ */
+public class ConceptMapFactory {
+
+   static private final Logger LOGGER = LogManager.getLogger( "ConceptMapFactory" );
+
+   static public Map<Long,Concept> createInitialConceptMap( final String umlsDirPath,
+                                                     final Collection<String> wantedSources,
+                                                     final Collection<Tui> wantedTuis ) {
+      if ( wantedSources.isEmpty() ) {
+         LOGGER.warn( "No source vocabularies specified" );
+         return Collections.emptyMap();
+      }
+      if ( wantedTuis.isEmpty() ) {
+         LOGGER.warn( "No TUIs specified" );
+         return Collections.emptyMap();
+      }
+      // get the valid Cuis for all wanted Tuis
+      final Map<Long, Concept> concepts = MrstyParser.createConceptsForTuis( umlsDirPath, wantedTuis );
+      // filter out the Cuis that do not belong to the given sources
+      final Collection<Long> validVocabularyCuis = MrconsoParser.getValidVocabularyCuis( umlsDirPath, wantedSources );
+      concepts.keySet().retainAll( validVocabularyCuis );
+      LOGGER.info( "Total Valid Cuis " + concepts.size() + "\t from wanted Tuis and Vocabularies" );
+      return concepts;
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/CuiCodeUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/CuiCodeUtil.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/CuiCodeUtil.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/CuiCodeUtil.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,117 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.logging.Logger;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 9/5/2014
+ */
+public enum CuiCodeUtil {
+   INSTANCE;
+
+   static public CuiCodeUtil getInstance() {
+      return INSTANCE;
+   }
+
+   static private final Logger LOGGER = Logger.getLogger( "CuiCodeUtil" );
+   static private final long PREFIX_MULTIPLIER = 100000000;
+
+   final private List<PrefixerPair> _prefixerPairList = new ArrayList<>();
+
+   CuiCodeUtil() {
+      // Add the standard C as the default encoding prefix
+      _prefixerPairList.add( new PrefixerPair( "C0000000" ) );
+   }
+
+   public String getAsCui( final Long code ) {
+      final long multiplier = code / PREFIX_MULTIPLIER;
+      if ( code < 0 || multiplier < 0 || multiplier >= _prefixerPairList.size() ) {
+         LOGGER.severe( "Could not create Cui String for " + code );
+         return "" + code;
+      }
+      return _prefixerPairList.get( (int)multiplier ).getAsCui( code % PREFIX_MULTIPLIER );
+   }
+
+
+   public Long getCuiCode( final String cui ) {
+      final PrefixerPair prefixerPair = new PrefixerPair( cui );
+      int prefixerIndex = _prefixerPairList.indexOf( prefixerPair );
+      if ( prefixerIndex < 0 ) {
+         prefixerIndex = _prefixerPairList.size();
+         _prefixerPairList.add( prefixerPair );
+      }
+      return prefixerPair.getCuiCode( cui, prefixerIndex );
+   }
+
+
+   static private final class PrefixerPair {
+      final private int __digitCount;
+      final private char[] __prefix;
+      final private int __hashCode;
+
+      private PrefixerPair( final String cui ) {
+         final char[] chars = cui.toCharArray();
+         int digitCount = 0;
+         while ( digitCount < chars.length
+                 && digitCount < 7
+                 && Character.isDigit( chars[ chars.length - 1 - digitCount ] ) ) {
+            digitCount++;
+         }
+         __digitCount = digitCount;
+         __prefix = Arrays.copyOfRange( chars, 0, chars.length - digitCount );
+         __hashCode = digitCount + Arrays.hashCode( __prefix );
+      }
+
+      private Long getCuiCode( final String cui, final int multiplier ) {
+         final String cuiNum = cui.substring( cui.length() - __digitCount, cui.length() );
+         try {
+            return PREFIX_MULTIPLIER * multiplier + Long.parseLong( cuiNum );
+         } catch ( NumberFormatException nfE ) {
+            LOGGER.severe( "Could not create Cui Code for " + cui );
+         }
+         return -1l;
+      }
+
+      private String getAsCui( final Long code ) {
+         final char[] codeChars = String.valueOf( code ).toCharArray();
+         if ( codeChars.length > __digitCount ) {
+            LOGGER.severe( "Invalid code " + code + " for prefix " + __prefix
+                          + " has more than " + __digitCount + " digits" );
+            return String.valueOf( __prefix ) + String.valueOf( codeChars );
+         }
+         final int cuiLength = __prefix.length + __digitCount;
+         final char[] cuiChars = new char[ cuiLength ];
+         System.arraycopy( __prefix, 0, cuiChars, 0, __prefix.length );
+         System.arraycopy( codeChars, 0, cuiChars, cuiLength - codeChars.length, codeChars.length );
+         for ( int i = __prefix.length; i < cuiLength - codeChars.length; i++ ) {
+            cuiChars[ i ] = '0';
+         }
+         return String.valueOf( cuiChars );
+      }
+
+      public int hashCode() {
+         return __hashCode;
+      }
+
+      public boolean equals( final Object other ) {
+         return other instanceof PrefixerPair
+                && __hashCode == ((PrefixerPair)other).__hashCode
+                && __digitCount == ((PrefixerPair)other).__digitCount
+                && Arrays.equals( __prefix, ((PrefixerPair)other).__prefix );
+      }
+   }
+
+   // todo
+   // todo switch to int: 32 bit signed, max = 2,147,483,647
+   // todo if we keep final 7 digits for the numerical then we have 213 possible prefixes
+   // todo
+   // todo can probably change the code and the db will be fine, change the db too
+   // todo
+
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/DoseUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/DoseUtil.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/DoseUtil.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/DoseUtil.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,78 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.logging.Logger;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 7/14/14
+ */
+final public class DoseUtil {
+
+   private DoseUtil() {
+   }
+
+   static private final Logger LOGGER = Logger.getLogger( "DoseUtil" );
+
+
+   // some of these are not strictly units, e.g. "ud" : "ut dictum" or "as directed"
+   // but can be properly trimmed as they appear in the same place as would a unit
+
+   static private final String[] UNIT_ARRAY = { "gr", "gm", "gram", "grams", "g",
+                                           "mg", "milligram", "milligrams", "kg",
+                                           "microgram", "micrograms", "mcg", "ug",
+                                           "millicurie", "mic", "oz",
+                                            "lf", "ml", "liter", "milliliter", "l",
+                                           "milliequivalent", "meq",
+                                           "hour", "hours", "hr", //"day", "days", "daily", //"24hr", "8hr", "12hr",
+                                                "week", "weeks", "weekly", "biweekly",
+                                           "usp", "titradose",
+                                           "unit", "units", "unt", "iu", "u", "mmu",
+                                           "mm", "cm",
+                                           "gauge", "intl","au", "bau", "mci", "ud",
+                                           "ww", "vv", "wv",
+                                           "%", "percent", "%ww", "%vv", "%wv",
+                                           "actuation", "actuat", "vial", "vil", "packet", "pkt" };
+   static private final Collection<String> UNITS = Arrays.asList( UNIT_ARRAY );
+
+
+   static public boolean hasUnit( final String text ) {
+      final String[] splits = text.split( "\\s+" );
+      if ( splits.length <= 1 ) {
+         return false;
+      }
+      for ( int i=1; i<splits.length; i++ ) {
+         for ( String unit : UNITS ) {
+            if ( !splits[i].endsWith( unit ) ) {
+               continue;
+            }
+            final int diff = splits[i].length() - unit.length();
+            if ( diff == 0 ) {
+               if ( i == 1 ) {
+                  for ( int j=0; j<splits[0].length(); j++ ) {
+                     if ( !Character.isDigit( splits[0].charAt( j ) ) ) {
+                        return false;
+                     }
+                  }
+               }
+               return true;
+            }
+            boolean isAmount = true;
+            for ( int j=0; j<diff; j++ ) {
+               if ( !Character.isDigit( splits[i].charAt( j ) ) ) {
+                  isAmount = false;
+                  break;
+               }
+            }
+            if ( isAmount ) {
+               return true;
+            }
+         }
+      }
+      return false;
+   }
+
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoIndex.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoIndex.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoIndex.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoIndex.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,16 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/23/14
+ */
+public enum MrconsoIndex {
+   CUI( 0 ), LANGUAGE( 1 ), STATUS( 2 ), FORM( 4 ), SOURCE( 11 ), TERM_TYPE( 12 ), SOURCE_CODE( 13 ), TEXT( 14 );
+   final public int _index;
+
+   private MrconsoIndex( final int index ) {
+      _index = index;
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoParser.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoParser.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoParser.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,284 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+
+import org.apache.ctakes.dictionary.creator.util.FileUtil;
+import org.apache.ctakes.dictionary.creator.util.TextTokenizer;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.*;
+
+import static org.apache.ctakes.dictionary.creator.gui.umls.MrconsoIndex.*;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/17/14
+ */
+final public class MrconsoParser {
+
+   static private final Logger LOGGER = LogManager.getLogger( "MrConsoParser" );
+
+   static private final String MR_CONSO_SUB_PATH = "/META/MRCONSO.RRF";
+
+   // TODO - put all exclusions in a data file, display for user, allow changes and save, etc.
+
+   //  https://www.nlm.nih.gov/research/umls/sourcereleasedocs
+   //  https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/SNOMEDCT_US/stats.html
+   //  https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/RXNORM/stats.html
+   static private final String[] DEFAULT_EXCLUSIONS = { "FN", "CCS", "CA2", "CA3", "PSN", "TMSY",
+                                                     "SBD", "SBDC", "SBDF", "SBDG",
+                                                     "SCD", "SCDC", "SCDF", "SCDG", "BPCK", "GPCK", "XM" };
+
+   static private final String[] SNOMED_OBSOLETES = { "OF", "MTH_OF", "OAP", "MTH_OAP", "OAF", "MTH_OAF",
+                                                     "IS", "MTH_IS", "OAS", "MTH_OAS",
+                                                     "OP", "MTH_OP" };
+   // Snomed OF  = Obsolete Fully Specified Name      MTH_OF
+   // Snomed OAP = Obsolete Active Preferred Term     MTH_OAP
+   // Snomed OAF = Obsolete Active Full Name          MTH_OAF
+   // Snomed IS  = Obsolete Synonym                   MTH_IS
+   // Snomed OAS = Obsolete Active Synonym            MTH_OAS
+   // Snomed OP  = Obsolete Preferred Name            MTH_OP
+   // Snomed PT  = Preferred Term , but we don't need that for valid cuis ...  or do we want only those with preferred terms?
+   // Snomed PTGB = British Preferred Term
+
+   // GO has same snomed obsoletes +
+   // GO EOT = Obsolete Entry Term
+   // HPO has same snomed obsoletes
+
+   // MTHSPL - DP is Drug Product  as is MTH_RXN_DP      MTHSPL SU is active substance
+   // VANDF AB  is abbreviation for drug  VANDF CD is Clinical Drug.  Both are dosed.
+   //  NDFRT AB?  Looks like ingredient.  NDFRT PT can be dosed
+
+   static private final String[] GO_OBSOLETES = { "EOT" };
+
+   static private final String[] LOINC_OBSOLETES = { "LO", "OLC", "MTH_LO", "OOSN" };
+
+   static private final String[] MEDRA_OBSOLETES = { "OL", "MTH_OL" };
+
+   static private final String[] MESH_EXCLUSIONS = { "N1", "EN", "PEN" };
+
+   static private final String[] RXNORM_EXCLUSIONS = { "SY" };   // What is IN ?  Ingredient?
+
+   static private final String[] NCI_EXCLUSIONS = { "CSN" };
+
+   // Related to, but not synonymous
+   static private final String[] UMDNS_EXCLUSIONS = { "RT" };
+
+   private MrconsoParser() {
+   }
+
+   static public String[] getDefaultExclusions() {
+      return DEFAULT_EXCLUSIONS;
+   }
+
+   static public String[] getSnomedExclusions() {
+      final String[] defaults = getDefaultExclusions();
+      final String[] exclusionTypes = Arrays.copyOf( defaults,
+            defaults.length + SNOMED_OBSOLETES.length );
+      System.arraycopy( SNOMED_OBSOLETES, 0, exclusionTypes, defaults.length, SNOMED_OBSOLETES.length );
+      return exclusionTypes;
+   }
+
+   static public String[] getNonRxnormExclusions() {
+      final String[] snomeds = getSnomedExclusions();
+      final String[] exclusionTypes = Arrays.copyOf( snomeds,
+            snomeds.length
+            + GO_OBSOLETES.length
+            + LOINC_OBSOLETES.length
+            + MEDRA_OBSOLETES.length
+            + MESH_EXCLUSIONS.length
+            + NCI_EXCLUSIONS.length
+            + UMDNS_EXCLUSIONS.length );
+      int start = snomeds.length;
+      System.arraycopy( GO_OBSOLETES, 0, exclusionTypes, start, GO_OBSOLETES.length );
+      start += GO_OBSOLETES.length;
+      System.arraycopy( LOINC_OBSOLETES, 0, exclusionTypes, start, LOINC_OBSOLETES.length );
+      start += LOINC_OBSOLETES.length;
+      System.arraycopy( MEDRA_OBSOLETES, 0, exclusionTypes, start, MEDRA_OBSOLETES.length );
+      start += MEDRA_OBSOLETES.length;
+      System.arraycopy( MESH_EXCLUSIONS, 0, exclusionTypes, start, MESH_EXCLUSIONS.length );
+      start += MESH_EXCLUSIONS.length;
+      System.arraycopy( NCI_EXCLUSIONS, 0, exclusionTypes, start, NCI_EXCLUSIONS.length );
+      start += NCI_EXCLUSIONS.length;
+      System.arraycopy( UMDNS_EXCLUSIONS, 0, exclusionTypes, start, UMDNS_EXCLUSIONS.length );
+      return exclusionTypes;
+   }
+
+
+
+   static public Map<Long, Concept> parseAllConcepts( final String umlsDirPath,
+                                                   final Map<Long, Concept> concepts,
+                                                   final Collection<String> wantedTargets,
+                                                   final UmlsTermUtil umlsTermUtil,
+                                                   final Collection<String> languages,
+                                                   final boolean extractAbbreviations,
+                                                   final int minCharLength,
+                                                      final int maxCharLength,
+                                                   final int maxWordCount,
+                                                      final int maxSymCount ) {
+      final String mrconsoPath = umlsDirPath + MR_CONSO_SUB_PATH;
+      final Collection<String> invalidTypeSet = new HashSet<>( Arrays.asList( getNonRxnormExclusions() ) );
+      LOGGER.info( "Compiling map of Concepts from " + mrconsoPath );
+      long lineCount = 0;
+      long textCount = 0;
+      try ( final BufferedReader reader = FileUtil.createReader( mrconsoPath ) ) {
+         List<String> tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+         while ( tokens != null ) {
+            lineCount++;
+            if ( lineCount % 100000 == 0 ) {
+               LOGGER.info( "File Line " + lineCount + "   Texts " + textCount );
+            }
+            if ( !isRowOk( tokens, languages, invalidTypeSet ) ) {
+               tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+               continue;
+            }
+            final Long cuiCode = CuiCodeUtil.getInstance().getCuiCode( getToken( tokens, CUI ) );
+            final Concept concept = concepts.get( cuiCode );
+            if ( concept == null ) {
+               // cui for current row is unwanted
+               tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+               continue;
+            }
+            final String text = getToken( tokens, TEXT );
+            if ( isPreferredTerm( tokens ) ) {
+               concept.setPreferredText( text );
+            }
+            final String source = getToken( tokens, SOURCE );
+            if ( wantedTargets.contains( source ) ) {
+               final String code = getToken( tokens, SOURCE_CODE );
+               if ( !code.equals( "NOCODE" ) ) {
+                  Vocabulary.getInstance().addVocabulary( source, code );
+                  concept.addCode( source, code );
+               }
+            }
+            final String tokenizedText = TextTokenizer.getTokenizedText( text );
+            if ( tokenizedText == null || tokenizedText.isEmpty()
+                 || !umlsTermUtil.isTextValid( tokenizedText ) ) {
+               // no tokenizable text or tokenized text is invalid for some reason
+//               LOGGER.warn( tokenizedText + " not valid" );
+               tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+               continue;
+            }
+            final String strippedText = umlsTermUtil.getStrippedText( tokenizedText );
+            if ( strippedText == null || strippedText.isEmpty()
+                 || UmlsTermUtil.isTextTooShort( strippedText, minCharLength )
+                 || UmlsTermUtil.isTextTooLong( strippedText, maxCharLength, maxWordCount, maxSymCount ) ) {
+               // after stripping unwanted prefixes and suffixes there is no valid text
+//               LOGGER.warn( tokenizedText + " stripped invalid" );
+               tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+               continue;
+            }
+            final Collection<String> formattedTexts
+                  = umlsTermUtil.getFormattedTexts( strippedText, extractAbbreviations, minCharLength, maxCharLength, maxWordCount, maxSymCount );
+            if ( formattedTexts != null && !formattedTexts.isEmpty() ) {
+               if ( DoseUtil.hasUnit( tokenizedText ) ) {
+                  concept.setHasDose();
+//                  LOGGER.warn( tokenizedText + " has dose" );
+                  tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+                  continue;
+               }
+               if ( concept.addTexts( formattedTexts ) ) {
+                  textCount += formattedTexts.size();
+               }
+            }
+            tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+      }
+      LOGGER.info( "File Lines " + lineCount + "   Texts " + textCount );
+      return concepts;
+   }
+
+
+   static private boolean isRowOk( final List<String> tokens,
+                                   final Collection<String> languages,
+                                   final Collection<String> invalidTypeSet ) {
+      if ( tokens.size() <= TEXT._index || !languages.contains( getToken( tokens, LANGUAGE ) ) ) {
+         return false;
+      }
+      final String type = getToken( tokens, TERM_TYPE );
+      if ( invalidTypeSet.contains( type ) ) {
+         return false;
+      }
+      // "Synonyms" are actually undesirable in the rxnorm vocabulary
+      final String source = getToken( tokens, SOURCE );
+      return !( source.equals( "RXNORM" ) && type.equals( "SY" ) );
+   }
+
+
+   static private boolean isPreferredTerm( final List<String> tokens ) {
+      return getToken( tokens, STATUS ).equals( "P" ) && getToken( tokens, FORM ).equals( "PF" );
+   }
+
+
+   /**
+    * Can cull the given collection of cuis
+    *
+    * @param umlsDirPath     path to the UMLS_ROOT Meta/MRCONSO.RRF file
+    * @param sourceVocabularies desired source type names as appear in rrf: RXNORM, SNOMEDCT, MSH, etc.
+    * @return Subset of cuis that exist in in the given sources
+    */
+   static public Collection<Long> getValidVocabularyCuis( final String umlsDirPath,
+                                                          final Collection<String> sourceVocabularies ) {
+//      return getValidVocabularyCuis( umlsDirPath, sourceVocabularies, getDefaultExclusions() );
+      return getValidVocabularyCuis( umlsDirPath, sourceVocabularies, getNonRxnormExclusions() );
+   }
+
+//   /**
+//    * Can cull the given collection of cuis
+//    *
+//    * @param umlsDirPath     path to the UMLS_ROOT Meta/MRCONSO.RRF file
+//    * @return Subset of cuis that exist in in the given sources
+//    */
+//   static public Collection<Long> getValidRxNormCuis( final String umlsDirPath ) {
+//      return getValidVocabularyCuis( umlsDirPath, Collections.singletonList( "RXNORM" ), getRxnormExclusions() );
+//   }
+
+   /**
+    * Can cull the given collection of cuis
+    *
+    * @param umlsDirPath     path to the UMLS_ROOT Meta/MRCONSO.RRF file
+    * @param sourceVocabularies desired source type names as appear in rrf: RXNORM, SNOMEDCT, MSH, etc.
+    * @param invalidTypes term type names as appear in rrf: FN, CCS, etc. that are not valid
+    * @return Subset of cuis that exist in in the given sources
+    */
+   static private Collection<Long> getValidVocabularyCuis( final String umlsDirPath,
+                                                           final Collection<String> sourceVocabularies,
+                                                           final String... invalidTypes ) {
+      final String mrconsoPath = umlsDirPath + MR_CONSO_SUB_PATH;
+      LOGGER.info( "Compiling list of Cuis with wanted Vocabularies using " + mrconsoPath );
+      final Collection<Long> validCuis = new HashSet<>();
+      long lineCount = 0;
+      try ( final BufferedReader reader = FileUtil.createReader( mrconsoPath ) ) {
+         List<String> tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+         while ( tokens != null ) {
+            lineCount++;
+            if ( lineCount % 100000 == 0 ) {
+               LOGGER.info( "File Line " + lineCount + "\t Valid Cuis " + validCuis.size() );
+            }
+            if ( tokens.size() > SOURCE._index
+                 && sourceVocabularies.stream().anyMatch( getToken( tokens, SOURCE )::equals )
+                 && Arrays.stream( invalidTypes ).noneMatch( getToken( tokens, TERM_TYPE )::equals ) ) {
+               final Long cuiCode = CuiCodeUtil.getInstance().getCuiCode( getToken( tokens, CUI ) );
+               validCuis.add( cuiCode );
+            }
+            tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+      }
+      LOGGER.info( "File Lines " + lineCount + "\t Valid Cuis " + validCuis.size() + "\t for wanted Vocabularies" );
+      return validCuis;
+   }
+
+
+   static private String getToken( final List<String> tokens, final MrconsoIndex mrconsoIndex ) {
+      return tokens.get( mrconsoIndex._index );
+   }
+
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyIndex.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyIndex.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyIndex.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyIndex.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,16 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 3/26/14
+ */
+public enum MrstyIndex {
+   CUI( 0 ), TUI( 1 );
+   final public int _index;
+
+   private MrstyIndex( final int index ) {
+      _index = index;
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyParser.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyParser.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyParser.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,74 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+
+import org.apache.ctakes.dictionary.creator.util.FileUtil;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.*;
+
+import static org.apache.ctakes.dictionary.creator.gui.umls.MrstyIndex.CUI;
+import static org.apache.ctakes.dictionary.creator.gui.umls.MrstyIndex.TUI;
+
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/17/14
+ */
+final public class MrstyParser {
+
+   static private final Logger LOGGER = LogManager.getLogger( "MrStyParser" );
+
+   static private final String MRSTY_SUB_PATH = "/META/MRSTY.RRF";
+
+   private MrstyParser() {
+   }
+
+   static public Map<Long, Concept> createConceptsForTuis( final String umlsPath,
+                                                           final Collection<Tui> wantedTuis ) {
+      final String mrstyPath = umlsPath + MRSTY_SUB_PATH;
+      LOGGER.info( "Compiling list of Cuis with wanted Tuis using " + mrstyPath );
+      long lineCount = 0;
+      final Map<Long,Concept> wantedConcepts = new HashMap<>();
+      final Collection<Tui> usedTuis = new HashSet<>( wantedTuis.size() );
+      try (final BufferedReader reader = FileUtil.createReader( mrstyPath ) ) {
+         List<String> tokens = FileUtil.readBsvTokens( reader, mrstyPath );
+         while ( tokens != null ) {
+            lineCount++;
+            if ( tokens.size() > TUI._index ) {
+               final Tui tuiEnum = Tui.valueOf( tokens.get( TUI._index ) );
+               if ( !wantedTuis.contains( tuiEnum ) ) {
+                  tokens = FileUtil.readBsvTokens( reader, mrstyPath );
+                  continue;
+               }
+               final Long cuiCode = CuiCodeUtil.getInstance().getCuiCode( tokens.get( CUI._index ) );
+               Concept concept = wantedConcepts.get( cuiCode );
+               if ( concept == null ) {
+                  concept = new Concept();
+                  wantedConcepts.put( cuiCode, concept );
+               }
+               concept.addTui( tuiEnum );
+               usedTuis.add( tuiEnum );
+            }
+            if ( lineCount % 100000 == 0 ) {
+               LOGGER.info( "File Line " + lineCount + "\t Valid Cuis " + wantedConcepts.size() );
+            }
+            tokens = FileUtil.readBsvTokens( reader, mrstyPath );
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+      }
+      LOGGER.info( "File Lines " + lineCount + "\t Valid Cuis " + wantedConcepts.size() + "\t for wanted Tuis" );
+      if ( usedTuis.size() != wantedTuis.size() ) {
+         wantedTuis.removeAll( usedTuis );
+         for ( Tui missingTui : wantedTuis ) {
+            LOGGER.warn( "Could not find Cuis for Tui " + missingTui + " " + missingTui.getDescription() );
+         }
+      }
+      return wantedConcepts;
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/SourceTableModel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/SourceTableModel.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/SourceTableModel.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/SourceTableModel.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,188 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import javax.swing.event.EventListenerList;
+import javax.swing.event.TableModelEvent;
+import javax.swing.event.TableModelListener;
+import javax.swing.table.TableModel;
+import java.util.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/10/2015
+ */
+final public class SourceTableModel implements TableModel {
+
+   static private final Logger LOGGER = LogManager.getLogger( "SourceTableModel" );
+
+   static private final String[] COLUMN_NAMES = { "Source", "Target", "Vocabulary" };
+   static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, Boolean.class, String.class };
+
+   static private final String[] CTAKES_SOURCES = { "SNOMEDCT", "SNOMEDCT_US", "RXNORM" };
+
+   private final EventListenerList _listenerList = new EventListenerList();
+   private final Collection<String> _wantedSources = new HashSet<>();
+   private final Collection<String> _wantedTargets = new HashSet<>();
+   private final List<String> _sources = new ArrayList<>();
+
+
+
+
+   public void setSources( final Collection<String> sources ) {
+      _sources.clear();
+      _wantedSources.clear();
+      _wantedTargets.clear();
+      _sources.addAll( sources );
+      Collections.sort( _sources );
+      _wantedSources.addAll( Arrays.asList( CTAKES_SOURCES ) );
+      _wantedTargets.addAll( Arrays.asList( CTAKES_SOURCES ) );
+      fireTableChanged( new TableModelEvent( this ) );
+   }
+
+   public Collection<String> getWantedSources() {
+      return _wantedSources;
+   }
+
+   public Collection<String> getWantedTargets() {
+      return _wantedTargets;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int getRowCount() {
+      return _sources.size();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int getColumnCount() {
+      return 3;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public String getColumnName( final int columnIndex ) {
+      return COLUMN_NAMES[ columnIndex ];
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Class<?> getColumnClass( final int columnIndex ) {
+      return COLUMN_CLASSES[ columnIndex ];
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean isCellEditable( final int rowIndex, final int columnIndex ) {
+      return columnIndex == 0 || (columnIndex == 1 && (Boolean)getValueAt( rowIndex, 0 ) );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Object getValueAt( final int rowIndex, final int columnIndex ) {
+      final String source = _sources.get( rowIndex );
+      switch ( columnIndex ) {
+         case 0 : return isSourceEnabled( source );
+         case 1 : return isTargetEnabled( source );
+         case 2 : return source;
+      }
+      return "ERROR";
+   }
+
+   private boolean isSourceEnabled( final String source ) {
+      return _wantedSources.contains( source );
+   }
+
+   private boolean isTargetEnabled( final String source) {
+      return _wantedTargets.contains( source );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void setValueAt( final Object aValue, final int rowIndex, final int columnIndex ) {
+      if ( aValue instanceof Boolean ) {
+         final String source = _sources.get( rowIndex );
+         if ( columnIndex == 0 ) {
+            selectWantedSource( source, (Boolean)aValue );
+         } else if ( columnIndex == 1 ) {
+            selectWantedTarget( source, (Boolean)aValue );
+         }
+      }
+   }
+
+   private void selectWantedSource( final String source, final boolean select ) {
+      if ( select ) {
+         _wantedSources.add( source );
+      } else {
+         _wantedSources.remove( source );
+      }
+   }
+
+   private void selectWantedTarget( final String target, final boolean select ) {
+      if ( select ) {
+         _wantedTargets.add( target );
+      } else {
+         _wantedTargets.remove( target );
+      }
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void addTableModelListener( final TableModelListener listener ) {
+      _listenerList.add( TableModelListener.class, listener );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void removeTableModelListener( final TableModelListener listener ) {
+      _listenerList.remove( TableModelListener.class, listener );
+   }
+
+   /**
+    * Forwards the given notification event to all
+    * <code>TableModelListeners</code> that registered
+    * themselves as listeners for this table model.
+    *
+    * @param e  the event to be forwarded
+    *
+    * @see #addTableModelListener
+    * @see TableModelEvent
+    * @see EventListenerList
+    */
+   private void fireTableChanged(TableModelEvent e) {
+      // Guaranteed to return a non-null array
+      Object[] listeners = _listenerList.getListenerList();
+      // Process the listeners last to first, notifying
+      // those that are interested in this event
+      for (int i = listeners.length-2; i>=0; i-=2) {
+         if (listeners[i]==TableModelListener.class) {
+            ((TableModelListener)listeners[i+1]).tableChanged(e);
+         }
+      }
+   }
+
+
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Tui.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Tui.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Tui.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Tui.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,185 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/10/2015
+ */
+public enum Tui {
+   // Activities & Behaviors
+   T052( "Activity" ),
+   T053( "Behavior" ),
+   T056( "Daily or Recreational Activity" ),
+   T051( "Event" ),
+   T064( "Governmental or Regulatory Activity" ),
+   T055( "Individual Behavior" ),
+   T066( "Machine Activity" ),
+   T057( "Occupational Activity" ),
+   T054( "Social Behavior" ),
+   // Anatomy
+   T017( "Anatomical Structure" ),
+   T029( "Body Location or Region" ),
+   T023( "Body Part, Organ, or Organ Component" ),
+   T030( "Body Space or Junction" ),
+   T031( "Body Substance" ),
+   T022( "Body System" ),
+   T025( "Cell" ),
+   T026( "Cell Component" ),
+   T018( "Embryonic Structure" ),
+   T021( "Fully Formed Anatomical Structure" ),
+   T024( "Tissue" ),
+   // Chemicals & Drugs
+   T116( "Amino Acid, Peptide, or Protein" ),
+   T195( "Antibiotic" ),
+   T123( "Biologically Active Substance" ),
+   T122( "Biomedical or Dental Material" ),
+   T118( "Carbohydrate" ),
+   T103( "Chemical" ),
+   T120( "Chemical Viewed Functionally" ),
+   T104( "Chemical Viewed Structurally" ),
+   T200( "Clinical Drug" ),
+   T111( "Eicosanoid" ),
+   T196( "Element, Ion, or Isotope" ),
+   T126( "Enzyme" ),
+   T131( "Hazardous or Poisonous Substance" ),
+   T125( "Hormone" ),
+   T129( "Immunologic Factor" ),
+   T130( "Indicator, Reagent, or Diagnostic Aid" ),
+   T197( "Inorganic Chemical" ),
+   T119( "Lipid" ),
+   T124( "Neuroreactive Substance or Biogenic Amine" ),
+   T114( "Nucleic Acid, Nucleoside, or Nucleotide" ),
+   T109( "Organic Chemical" ),
+   T115( "Organophosphorus Compound" ),
+   T121( "Pharmacologic Substance" ),
+   T192( "Receptor" ),
+   T110( "Steroid" ),
+   T127( "Vitamin" ),
+   // Concepts & Ideas
+   T185( "Classification" ),
+   T077( "Conceptual Entity" ),
+   T169( "Functional Concept" ),
+   T102( "Group Attribute" ),
+   T078( "Idea or Concept" ),
+   T170( "Intellectual Product" ),
+   T171( "Language" ),
+   T080( "Qualitative Concept" ),
+   T081( "Quantitative Concept" ),
+   T089( "Regulation or Law" ),
+   T082( "Spatial Concept" ),
+   T079( "Temporal Concept" ),
+   // Devices
+   T203( "Drug Delivery Device" ),
+   T074( "Medical Device" ),
+   T075( "Research Device" ),
+   // Disorders
+   T020( "Acquired Abnormality" ),
+   T190( "Anatomical Abnormality" ),
+   T049( "Cell or Molecular Dysfunction" ),
+   T019( "Congenital Abnormality" ),
+   T047( "Disease or Syndrome" ),
+   T050( "Experimental Model of Disease" ),
+   T033( "Finding" ),
+   T037( "Injury or Poisoning" ),
+   T048( "Mental or Behavioral Dysfunction" ),
+   T191( "Neoplastic Process" ),
+   T046( "Pathologic Function" ),
+   T184( "Sign or Symptom" ),
+   // Genes & Molecular Sequences
+   T087( "Amino Acid Sequence" ),
+   T088( "Carbohydrate Sequence" ),
+   T028( "Gene or Genome" ),
+   T085( "Molecular Sequence" ),
+   T086( "Nucleotide Sequence" ),
+   // Geographic Areas
+   T083( "Geographic Area" ),
+   // Living Beings
+   T100( "Age Group" ),
+   T011( "Amphibian" ),
+   T008( "Animal" ),
+   T194( "Archaeon" ),
+   T007( "Bacterium" ),
+   T012( "Bird" ),
+   T204( "Eukaryote" ),
+   T099( "Family Group" ),
+   T013( "Fish" ),
+   T004( "Fungus" ),
+   T096( "Group" ),
+   T016( "Human" ),
+   T015( "Mammal" ),
+   T001( "Organism" ),
+   T101( "Patient or Disabled Group" ),
+   T002( "Plant" ),
+   T098( "Population Group" ),
+   T097( "Professional or Occupational Group" ),
+   T014( "Reptile" ),
+   T010( "Vertebrate" ),
+   T005( "Virus" ),
+   // Objects
+   T071( "Entity" ),
+   T168( "Food" ),
+   T073( "Manufactured Object" ),
+   T072( "Physical Object" ),
+   T167( "Substance" ),
+   // Occupations
+   T091( "Biomedical Occupation or Discipline" ),
+   T090( "Occupation or Discipline" ),
+   // Organizations
+   T093( "Health Care Related Organization" ),
+   T092( "Organization" ),
+   T094( "Professional Society" ),
+   T095( "Self-help or Relief Organization" ),
+   // Phenomena
+   T038( "Biologic Function" ),
+   T069( "Environmental Effect of Humans" ),
+   T068( "Human-caused Phenomenon or Process" ),
+   T034( "Laboratory or Test Result" ),
+   T070( "Natural Phenomenon or Process" ),
+   T067( "Phenomenon or Process" ),
+   // Physiology
+   T043( "Cell Function" ),
+   T201( "Clinical Attribute" ),
+   T045( "Genetic Function" ),
+   T041( "Mental Process" ),
+   T044( "Molecular Function" ),
+   T032( "Organism Attribute" ),
+   T040( "Organism Function" ),
+   T042( "Organ or Tissue Function" ),
+   T039( "Physiologic Function" ),
+   // Procedures
+   T060( "Diagnostic Procedure" ),
+   T065( "Educational Activity" ),
+   T058( "Health Care Activity" ),
+   T059( "Laboratory Procedure" ),
+   T063( "Molecular Biology Research Technique" ),
+   T062( "Research Activity" ),
+   T061( "Therapeutic or Preventive Procedure" ),
+   // ERROR
+   T999( "Error" );
+
+   final private String _description;
+   private Tui( final String description ) {
+      _description = description;
+   }
+
+   public String getDescription() {
+      return _description;
+   }
+
+   public int getIntValue() {
+      return Integer.parseInt( name().substring( 1 ) );
+   }
+
+//   static public Tui valueOf( final String text ) {
+//
+//
+//
+//      for ( Tui tuiEnum : Tui.values() ) {
+//         if ( tuiEnum.name().equals( text ) ) {
+//            return tuiEnum;
+//         }
+//      }
+//      return Tui.T999;
+//   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiCellRenderer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiCellRenderer.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiCellRenderer.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiCellRenderer.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,40 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import javax.swing.*;
+import javax.swing.table.TableCellRenderer;
+import java.awt.*;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/11/2015
+ */
+final public class TuiCellRenderer implements TableCellRenderer {
+
+   static private final Logger LOGGER = LogManager.getLogger( "TuiCellRenderer" );
+
+   private final TuiTableModel _tuiModel;
+   private final TableCellRenderer _delegate;
+
+   public TuiCellRenderer( final TuiTableModel tuiModel, final TableCellRenderer delegate ) {
+      _tuiModel = tuiModel;
+      _delegate = delegate;
+   }
+
+
+   public Component getTableCellRendererComponent( final JTable table, final Object value,
+                                           final boolean isSelected, final boolean hasFocus,
+                                           final int row, final int column ) {
+      final Component renderer
+            = _delegate.getTableCellRendererComponent( table, value, isSelected, hasFocus, row, column );
+//      final Tui tui = _tuiModel.
+
+      return renderer;
+   }
+
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiTableModel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiTableModel.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiTableModel.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiTableModel.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,139 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import javax.swing.event.EventListenerList;
+import javax.swing.event.TableModelListener;
+import javax.swing.table.TableModel;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.EnumSet;
+
+import static org.apache.ctakes.dictionary.creator.gui.umls.Tui.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/10/2015
+ */
+final public class TuiTableModel implements TableModel {
+
+   static private final Logger LOGGER = LogManager.getLogger( "TuiTableModel" );
+
+   static public final Tui[] CTAKES_ANAT = { T021, T022, T023, T024, T025, T026, T029, T030 };
+   static private final Tui[] CTAKES_DISO = { T019, T020, T037, T047, T048, T049, T050, T190, T191 };
+   static private final Tui[] CTAKES_FIND = { T033, T034, T040, T041, T042, T043, T044, T045, T046, T056, T057, T184 };
+   static private final Tui[] CTAKES_PROC = { T059, T060, T061 };
+   static public final Tui[] CTAKES_DRUG = { T109, T110, T114, T115, T116, T118, T119, T121, T122, T123, T124,
+                                              T125, T126, T127, T129, T130, T131, T195, T196, T197, T200, T203 };
+
+   static private final String[] COLUMN_NAMES = { "Use", "TUI", "Definition" };
+   static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, String.class, String.class };
+
+   private final EventListenerList _listenerList = new EventListenerList();
+   private final Collection<Tui> _wantedTuis = EnumSet.noneOf( Tui.class );
+
+   public TuiTableModel() {
+      _wantedTuis.addAll( Arrays.asList( CTAKES_ANAT ) );
+      _wantedTuis.addAll( Arrays.asList( CTAKES_DISO ) );
+      _wantedTuis.addAll( Arrays.asList( CTAKES_FIND ) );
+      _wantedTuis.addAll( Arrays.asList( CTAKES_PROC ) );
+      _wantedTuis.addAll( Arrays.asList( CTAKES_DRUG ) );
+   }
+
+   public Collection<Tui> getWantedTuis() {
+      return _wantedTuis;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int getRowCount() {
+      return Tui.values().length;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int getColumnCount() {
+      return 3;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public String getColumnName( final int columnIndex ) {
+      return COLUMN_NAMES[ columnIndex ];
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Class<?> getColumnClass( final int columnIndex ) {
+      return COLUMN_CLASSES[ columnIndex ];
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean isCellEditable( final int rowIndex, final int columnIndex ) {
+      return columnIndex == 0;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Object getValueAt( final int rowIndex, final int columnIndex ) {
+      final Tui tui = Tui.values()[ rowIndex ];
+      switch ( columnIndex ) {
+         case 0 : return isTuiEnabled( tui );
+         case 1 : return tui.name();
+         case 2 : return tui.getDescription();
+      }
+      return "ERROR";
+   }
+
+   private boolean isTuiEnabled( final Tui tui ) {
+      return _wantedTuis.contains( tui );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void setValueAt( final Object aValue, final int rowIndex, final int columnIndex ) {
+      if ( aValue instanceof Boolean && columnIndex == 0 ) {
+         final Tui tui = Tui.values()[ rowIndex ];
+         if ( (Boolean)aValue ) {
+            _wantedTuis.add( tui );
+         } else {
+            _wantedTuis.remove( tui );
+         }
+      }
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void addTableModelListener( final TableModelListener listener ) {
+      _listenerList.add( TableModelListener.class, listener );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void removeTableModelListener( final TableModelListener listener ) {
+      _listenerList.remove( TableModelListener.class, listener );
+   }
+
+
+}