You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2017/03/27 14:37:46 UTC
svn commit: r1788936 [5/7] - in /ctakes/trunk: ctakes-gui-res/ ctakes-gui-res/src/ ctakes-gui-res/src/main/ ctakes-gui-res/src/main/resources/ ctakes-gui-res/src/main/resources/org/ ctakes-gui-res/src/main/resources/org/apache/ ctakes-gui-res/src/main/...

Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/UmlsTermUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/UmlsTermUtil.java?rev=1788936&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/UmlsTermUtil.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/UmlsTermUtil.java Mon Mar 27 14:37:44 2017
@@ -0,0 +1,325 @@
+package org.apache.ctakes.gui.dictionary.umls;
+
+import org.apache.ctakes.gui.dictionary.util.FileUtil;
+import org.apache.ctakes.gui.dictionary.util.RareWordUtil;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.function.Consumer;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+
+/**
+ * Contains all the methods used to parse individual text definitions of umls terms
+ * <p/>
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/16/14
+ */
+final public class UmlsTermUtil {
+
+
+   private enum DATA_FILE {
+      REMOVAL_PREFIX_TRIGGERS( "RemovalPrefixTriggers.txt" ),
+      REMOVAL_SUFFIX_TRIGGERS( "RemovalSuffixTriggers.txt" ),
+      REMOVAL_FUNCTION_TRIGGERS( "RemovalFunctionTriggers.txt" ),
+      REMOVAL_COLON_TRIGGERS( "RemovalColonTriggers.txt" ),
+      UNWANTED_PREFIXES( "UnwantedPrefixes.txt" ),
+      UNWANTED_SUFFIXES( "UnwantedSuffixes.txt" ),
+      UNWANTED_TEXTS( "UnwantedTexts.txt" ),
+      MODIFIER_SUFFIXES( "ModifierSuffixes.txt" ),
+      RIGHT_ABBREVIATIONS( "RightAbbreviations.txt" ),
+      KEEP_PREFIX_TRIGGERS( "KeepPrefixTriggers.txt" );
+      final private String __name;
+
+      DATA_FILE( final String name ) {
+         __name = name;
+      }
+   }
+
+   static private final Pattern WHITESPACE = Pattern.compile( "\\s+" );
+   static private final Pattern AUTO_NOTE = Pattern.compile( "@" );
+
+   static private String getDataPath( final String dataDir, final DATA_FILE dataFile ) {
+      return dataDir + '/' + dataFile.__name;
+   }
+
+   final private Collection<String> _removalPrefixTriggers;
+   final private Collection<String> _removalSuffixTriggers;
+   final private Collection<String> _removalColonTriggers;
+   final private Collection<String> _removalFunctionTriggers;
+   final private Collection<String> _unwantedPrefixes;
+   final private Collection<String> _unwantedSuffixes;
+   final private Collection<String> _unwantedTexts;
+   final private Collection<String> _modifierSuffixes;
+   final private Collection<String> _abbreviations;
+   final private Collection<String> _unwantedPosTexts;
+   final private Collection<String> _keepPrefixTriggers;
+
+   public UmlsTermUtil( final String dataDir ) {
+      this( getDataPath( dataDir, DATA_FILE.REMOVAL_PREFIX_TRIGGERS ),
+            getDataPath( dataDir, DATA_FILE.REMOVAL_SUFFIX_TRIGGERS ),
+            getDataPath( dataDir, DATA_FILE.REMOVAL_COLON_TRIGGERS ),
+            getDataPath( dataDir, DATA_FILE.REMOVAL_FUNCTION_TRIGGERS ),
+            getDataPath( dataDir, DATA_FILE.UNWANTED_PREFIXES ),
+            getDataPath( dataDir, DATA_FILE.UNWANTED_SUFFIXES ),
+            getDataPath( dataDir, DATA_FILE.UNWANTED_TEXTS ),
+            getDataPath( dataDir, DATA_FILE.MODIFIER_SUFFIXES ),
+            getDataPath( dataDir, DATA_FILE.RIGHT_ABBREVIATIONS ),
+            getDataPath( dataDir, DATA_FILE.KEEP_PREFIX_TRIGGERS ) );
+   }
+
+   public UmlsTermUtil( final String removalPrefixTriggersPath, final String removalSuffixTriggersPath,
+                        final String removalColonTriggersPath, final String removalFunctionTriggersPath,
+                        final String unwantedPrefixesPath, final String unwantedSuffixesPath,
+                        final String unwantedTextsPath,
+                        final String modifierSuffixesPath, final String abbreviationsPath,
+                        final String keepPrefixTriggersPath ) {
+      _removalPrefixTriggers = FileUtil.readOneColumn( removalPrefixTriggersPath, "term removal Prefix Triggers" );
+      _removalSuffixTriggers = FileUtil.readOneColumn( removalSuffixTriggersPath, "term removal Suffix Triggers" );
+      _removalColonTriggers = FileUtil.readOneColumn( removalColonTriggersPath, "term removal Colon Triggers" );
+      _removalFunctionTriggers = FileUtil
+            .readOneColumn( removalFunctionTriggersPath, "term removal Function Triggers" );
+      _unwantedPrefixes = FileUtil.readOneColumn( unwantedPrefixesPath, "unwanted Prefixes" );
+      _unwantedSuffixes = FileUtil.readOneColumn( unwantedSuffixesPath, "unwanted Suffixes" );
+      _unwantedTexts = FileUtil.readOneColumn( unwantedTextsPath, "unwanted Texts" );
+      _modifierSuffixes = FileUtil.readOneColumn( modifierSuffixesPath, "modifier Suffixes" );
+      _abbreviations = FileUtil.readOneColumn( abbreviationsPath, "Abbreviations to expand" );
+      _keepPrefixTriggers = FileUtil.readOneColumn( keepPrefixTriggersPath, "term keep Prefix Triggers" );
+      _unwantedPosTexts = RareWordUtil.getUnwantedPosTexts();
+   }
+
+   public boolean isTextValid( final String text ) {
+      if ( _keepPrefixTriggers.stream().anyMatch( text::startsWith ) ) {
+         return true;
+      }
+
+      if ( text.startsWith( "fh " ) || text.startsWith( "no fh " )
+           || text.startsWith( "family " ) || text.startsWith( "history " ) ) {
+         return true;
+      }
+      // Check for illegal characters
+      boolean haveChar = false;
+      for ( int i = 0; i < text.length(); i++ ) {
+         if ( text.charAt( i ) < ' ' || text.charAt( i ) > '~' ) {
+            return false;
+         }
+         if ( !haveChar && Character.isAlphabetic( text.charAt( i ) ) ) {
+            haveChar = true;
+         }
+      }
+      if ( !haveChar ) {
+         return false;
+      }
+      if ( text.length() == 3 && text.charAt( 0 ) == '(' ) {
+         return false;
+      }
+      // Check for auto-created note form
+      if ( AUTO_NOTE.split( text ).length > 2 ) {
+         return false;
+      }
+      if ( _unwantedTexts.contains( text ) ) {
+         return false;
+      }
+      if ( _unwantedPosTexts.contains( text ) ) {
+         return false;
+      }
+      if ( _removalPrefixTriggers.stream().anyMatch( text::startsWith ) ) {
+         return false;
+      }
+      if ( _removalSuffixTriggers.stream().anyMatch( text::endsWith ) ) {
+         return false;
+      }
+      if ( _removalColonTriggers.stream().anyMatch( text::contains ) ) {
+         return false;
+      }
+      if ( _removalFunctionTriggers.stream().anyMatch( text::contains ) ) {
+         return false;
+      }
+      return true;
+   }
+
+   static public boolean isTextTooShort( final String text, final int minCharLength ) {
+      return text.length() < minCharLength;
+   }
+
+
+   static public boolean isTextTooLong( final String text, final int maxCharLength,
+                                        final int maxWordCount, final int maxSymCount ) {
+      final String[] splits = WHITESPACE.split( text );
+      int wordCount = 0;
+      int symCount = 0;
+      for ( String split : splits ) {
+         if ( split.length() > maxCharLength ) {
+            return true;
+         }
+         if ( split.length() > 2 ) {
+            wordCount++;
+         } else {
+            symCount++;
+         }
+      }
+      return wordCount > maxWordCount || symCount > maxSymCount;
+   }
+
+
+   public Collection<String> getFormattedTexts( final String strippedText, final boolean extractAbbreviations,
+                                                final int minCharLength, final int maxCharLength,
+                                                final int maxWordCount, final int maxSymCount ) {
+      Collection<String> extractedTerms = Collections.emptySet();
+      if ( extractAbbreviations ) {
+         // add embedded abbreviations
+         extractedTerms = extractAbbreviations( strippedText );
+      }
+      if ( extractedTerms.isEmpty() ) {
+         extractedTerms = extractModifiers( strippedText );
+      }
+      if ( !extractedTerms.isEmpty() ) {
+         extractedTerms.add( strippedText );
+         return getFormattedTexts( getPluralTerms( getStrippedTexts( extractedTerms ) ), minCharLength, maxCharLength, maxWordCount, maxSymCount );
+      }
+      Collection<String> texts = new HashSet<>( 1 );
+      texts.add( strippedText );
+      return getFormattedTexts( getPluralTerms( getStrippedTexts( texts ) ), minCharLength, maxCharLength, maxWordCount, maxSymCount );
+   }
+
+
+   static private Collection<String> getFormattedTexts( final Collection<String> extractedTerms,
+                                                        final int minCharLength, final int maxCharLength,
+                                                        final int maxWordCount, final int maxSymCount ) {
+      return extractedTerms.stream()
+            .filter( t -> !isTextTooShort( t, minCharLength ) )
+            .filter( t -> !isTextTooLong( t, maxCharLength, maxWordCount, maxSymCount ) )
+            .collect( Collectors.toList() );
+   }
+
+   static private Collection<String> getPluralTerms( final Collection<String> texts ) {
+      final Collection<String> plurals = texts.stream()
+            .filter( t -> t.endsWith( "( s )" ) )
+            .collect( Collectors.toList() );
+      if ( plurals.isEmpty() ) {
+         return texts;
+      }
+      texts.removeAll( plurals );
+      final Consumer<String> addPlural = t -> {
+         texts.add( t );
+         texts.add( t + "s" );
+      };
+      plurals.stream()
+            .map( t -> t.substring( 0, t.length() - 5 ) )
+            .forEach( addPlural );
+      return texts;
+   }
+
+   private Collection<String> getStrippedTexts( final Collection<String> texts ) {
+      return texts.stream()
+            .map( this::getStrippedText )
+            .filter( t -> !t.isEmpty() )
+            .collect( Collectors.toSet() );
+   }
+
+   public String getStrippedText( final String text ) {
+      // remove form underlines
+//      if ( text.contains( "_ _ _" ) ) {
+//         final int lastParen = text.lastIndexOf( '(' );
+//         final int lastDash = text.indexOf( "_ _ _" );
+//         final int deleteIndex = Math.max( 0, Math.min( lastParen, lastDash ) );
+//         if ( deleteIndex > 0 ) {
+//            return getStrippedText( text.substring( 0, deleteIndex - 1 ).trim() );
+//         }
+//      }
+      // remove unmatched parentheses, brackets, etc.
+      //      if ( text.startsWith( "(" ) && !text.contains( ")" ) ) {
+      //         return getStrippedText( text.substring( 1 ).trim() );
+      //      }
+      //      if ( text.startsWith( "[" ) && !text.contains( "]" ) ) {
+      //         return getStrippedText( text.substring( 1 ).trim() );
+      //      }
+      //      if ( text.startsWith( "(" ) && text.endsWith( ") or" ) ) {
+      //         return getStrippedText( text.substring( 1, text.length() - 4 ).trim() );
+      //      }
+      //      if ( text.startsWith( "or (" ) ) {
+      //         return getStrippedText( text.substring( 2 ).trim() );
+      //      }
+      //      if ( text.startsWith( "\"" ) && text.endsWith( "\"" ) ) {
+      //         return getStrippedText( text.substring( 1 ).trim() );
+      //      }
+      //      if ( text.startsWith( "(" ) && text.endsWith( ")" ) ) {
+      //         return getStrippedText( text.substring( 1, text.length() - 2 ).trim() );
+      //      }
+      //      if ( text.startsWith( "[" ) && text.endsWith( "]" ) ) {
+      //         return getStrippedText( text.substring( 1, text.length() - 2 ).trim() );
+      //      }
+      //      if ( text.startsWith( "&" ) ) {
+      //         return getStrippedText( text.substring( 1 ).trim() );
+      //      }
+      //      if ( text.endsWith( "]" ) && !text.contains( "[" ) ) {
+      //         return getStrippedText( text.substring( 0, text.length() - 2 ).trim() );
+      //      }
+      //      if ( text.endsWith( ")" ) && !text.contains( "(" ) ) {
+      //         return getStrippedText( text.substring( 0, text.length() - 2 ).trim() );
+      //      }
+      String strippedText = text.trim();
+      // Text in umls can have multiple suffixes and/or prefixes.  Stripping just once doesn't do the trick
+      int lastLength = Integer.MAX_VALUE;
+      while ( lastLength != strippedText.length() ) {
+         lastLength = strippedText.length();
+         for ( String prefix : _unwantedPrefixes ) {
+            if ( strippedText.startsWith( prefix ) ) {
+               strippedText = strippedText.substring( prefix.length() ).trim();
+            }
+         }
+         for ( String suffix : _unwantedSuffixes ) {
+            if ( strippedText.endsWith( suffix ) ) {
+               strippedText = strippedText.substring( 0, strippedText.length() - suffix.length() ).trim();
+            }
+         }
+         if ( !isTextValid( strippedText ) ) {
+            return "";
+         }
+      }
+      if ( strippedText.contains( "(" ) && strippedText.contains( "[" ) ) {
+         return "";
+      }
+      return strippedText;
+   }
+
+
+   private Collection<String> extractAbbreviations( final String tokenizedText ) {
+      for ( String abbreviation : _abbreviations ) {
+         if ( tokenizedText.endsWith( abbreviation )
+              && !tokenizedText.contains( ":" ) && !tokenizedText.contains( " of " )
+              && !tokenizedText.contains( " for " ) ) {
+            final String noAbbrTerm
+                  = tokenizedText.substring( 0, tokenizedText.length() - abbreviation.length() ).trim();
+            final String abbrTerm
+                  = abbreviation.replace( ":", "" ).replace( "(", "" ).replace( ")", "" ).replace( "-", "" )
+                  .replace( "[", "" ).replace( "]", "" ).replace( "&", "" ).trim();
+            final Collection<String> extractedAbbreviations = new HashSet<>( 2 );
+            extractedAbbreviations.add( noAbbrTerm );
+            extractedAbbreviations.add( abbrTerm );
+            return extractedAbbreviations;
+         }
+      }
+      return Collections.emptyList();
+   }
+
+   private Collection<String> extractModifiers( final String tokenizedText ) {
+      for ( String modifier : _modifierSuffixes ) {
+         if ( tokenizedText.endsWith( modifier ) ) {
+            final String mainText = tokenizedText.substring( 0, tokenizedText.length() - modifier.length() ).trim();
+            final String modifierText
+                  = modifier.replace( "(", "" ).replace( ")", "" ).replace( "-", "" ).replace( ",", "" ).trim();
+            final Collection<String> modifiedTexts = new HashSet<>( 2 );
+            modifiedTexts.add( tokenizedText );
+            modifiedTexts.add( modifierText + " " + mainText );
+            return modifiedTexts;
+         }
+      }
+      return Collections.emptyList();
+   }
+
+
+}

Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/VocabularyStore.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/VocabularyStore.java?rev=1788936&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/VocabularyStore.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/VocabularyStore.java Mon Mar 27 14:37:44 2017
@@ -0,0 +1,105 @@
+package org.apache.ctakes.gui.dictionary.umls;
+
+import org.apache.log4j.Logger;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/12/2015
+ */
+public enum VocabularyStore {
+   INSTANCE;
+
+   static public VocabularyStore getInstance() {
+      return INSTANCE;
+   }
+
+   private final Logger LOGGER = Logger.getLogger( "Vocabulary" );
+
+   private final Map<String, Class<?>> _vocabularyClasses = new HashMap<>();
+
+   public Collection<String> getAllVocabularies() {
+      return _vocabularyClasses.keySet();
+   }
+
+   public Class<?> getVocabularyClass( final String vocabulary ) {
+      return _vocabularyClasses.get( vocabulary );
+   }
+
+   public void addVocabulary( final String vocabulary, final String code ) {
+      final Class<?> vocabularyClass = _vocabularyClasses.get( vocabulary );
+      if ( String.class.equals( vocabularyClass ) ) {
+         return;
+      }
+      _vocabularyClasses.put( vocabulary, getBestClass( code ) );
+   }
+
+   public String getJdbcClass( final String vocabulary ) {
+      final Class<?> vocabularyClass = _vocabularyClasses.get( vocabulary );
+      if ( String.class.equals( vocabularyClass ) ) {
+         return "VARCHAR(48)";
+      } else if ( Double.class.equals( vocabularyClass ) ) {
+         return "FLOAT";
+      } else if ( Long.class.equals( vocabularyClass ) ) {
+         return "BIGINT";
+      } else if ( Integer.class.equals( vocabularyClass ) ) {
+         return "INTEGER";
+      } else {
+         LOGGER.error( "Could not derive database class for " + vocabularyClass.getName() );
+      }
+      return "VARCHAR(48)";
+   }
+
+   public String getCtakesClass( final String vocabulary ) {
+      final Class<?> vocabularyClass = _vocabularyClasses.get( vocabulary );
+      if ( String.class.equals( vocabularyClass ) ) {
+         return "text";
+      } else if ( Double.class.equals( vocabularyClass ) ) {
+         return "double";
+      } else if ( Long.class.equals( vocabularyClass ) ) {
+         return "long";
+      } else if ( Integer.class.equals( vocabularyClass ) ) {
+         return "int";
+      } else {
+         LOGGER.error( "Could not derive database class for " + vocabularyClass.getName() );
+      }
+      return "text";
+   }
+
+   static private Class<?> getBestClassFuture( final String code, final Class<?> currentClass ) {
+      boolean haveDot = false;
+      for ( char c : code.toCharArray() ) {
+         if ( !Character.isDigit( c ) ) {
+            if ( c == '.' ) {
+               if ( haveDot ) {
+                  return String.class;
+               }
+               haveDot = true;
+            }
+            return String.class;
+         }
+      }
+      if ( haveDot || Double.class.equals( currentClass ) ) {
+         return Double.class;
+      }
+      if ( code.length() > 9 || Long.class.equals( currentClass ) ) {
+         return Long.class;
+      }
+      return Integer.class;
+   }
+
+   // TODO replace with getBestClassFuture when ctakes is upgraded to accept double and int
+   static private Class<?> getBestClass( final String code ) {
+      for ( char c : code.toCharArray() ) {
+         if ( !Character.isDigit( c ) ) {
+            return String.class;
+         }
+      }
+      return Long.class;
+   }
+
+}

Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/FileUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/FileUtil.java?rev=1788936&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/FileUtil.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/FileUtil.java Mon Mar 27 14:37:44 2017
@@ -0,0 +1,247 @@
+package org.apache.ctakes.gui.dictionary.util;
+
+
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.log4j.Logger;
+
+import javax.swing.filechooser.FileSystemView;
+import java.io.*;
+import java.util.*;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/15/14
+ */
+final public class FileUtil {
+
+   private FileUtil() {
+   }
+
+   static private final Logger LOGGER = Logger.getLogger( "FileUtil" );
+
+   static public String parseDirText( final String dirPath ) {
+      if ( dirPath == null || dirPath.isEmpty() ) {
+         return parseDirText( "." );
+      } else if ( dirPath.startsWith( "~" ) ) {
+         return parseDirText( dirPath.replaceAll( "~", System.getProperty( "user.home" ) ) );
+      } else if ( dirPath.equals( "." ) ) {
+         final String userDir = System.getProperty( "user.dir" );
+         if ( userDir == null || userDir.isEmpty() ) {
+            return FileSystemView.getFileSystemView().getDefaultDirectory().getPath();
+         }
+         return userDir;
+      } else if ( dirPath.startsWith( ".." ) ) {
+         final String userDirPath = parseDirText( "." );
+         File cwd = new File( userDirPath );
+         String cwdPath = dirPath;
+         while ( cwdPath.startsWith( ".." ) ) {
+            if ( !cwd.isDirectory() ) {
+               LOGGER.error( "Invalid directory " + dirPath );
+               System.exit( 1 );
+            }
+            cwd = cwd.getParentFile();
+            if ( cwdPath.equals( ".." ) ) {
+               return cwd.getPath();
+            }
+            cwdPath = cwdPath.substring( 3 );
+         }
+         return cwd.getPath();
+      }
+      return dirPath;
+   }
+
+
+   static public BufferedReader createReader( final String filePath ) {
+//      final String formattedPath = parseDirText( filePath );
+//      final File file = new File( formattedPath );
+      try {
+         final File file = FileLocator.locateFile( filePath );
+         if ( !file.canRead() ) {
+            LOGGER.error( "Cannot read file " + filePath );
+            System.exit( 1 );
+         }
+         return new BufferedReader( new FileReader( file ) );
+      } catch ( IOException ioE ) {
+         LOGGER.error( "Cannot create Reader for " + filePath );
+         LOGGER.error( ioE.getMessage() );
+         System.exit( 1 );
+      }
+      return null;
+   }
+
+   static private BufferedWriter createWriter( final String filePath ) {
+//      final String formattedPath = parseDirText( filePath );
+//      final File file = new File( formattedPath );
+      try {
+         final File file = FileLocator.locateFile( filePath );
+         if ( file.getParentFile() != null && !file.getParentFile().isDirectory() ) {
+            file.getParentFile().mkdirs();
+         }
+         return new BufferedWriter( new FileWriter( file, true ) );
+      } catch ( IOException ioE ) {
+         LOGGER.error( "Cannot create Writer for " + filePath );
+         LOGGER.error( ioE.getMessage() );
+         System.exit( 1 );
+      }
+      return null;
+   }
+
+   static private String readLine( final BufferedReader reader, final String filePath ) {
+      try {
+         String line = reader.readLine();
+         while ( line != null ) {
+            if ( !line.trim().isEmpty() && !line.trim().startsWith( "//" ) ) {
+               return line;
+            }
+            line = reader.readLine();
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( "Error reading from file " + filePath );
+      }
+      return null;
+   }
+
+   static public List<String> readBsvTokens( final BufferedReader reader, final String filePath ) {
+      final String line = readLine( reader, filePath );
+      if ( line == null ) {
+         return null;
+      }
+      return TokenUtil.getBsvItems( line );
+   }
+
+   static public List<String> readCsvTokens( final BufferedReader reader, final String filePath ) {
+      final String line = readLine( reader, filePath );
+      if ( line == null ) {
+         return null;
+      }
+      return TokenUtil.getCsvItems( line );
+   }
+
+   static public List<String> readTildeTokens( final BufferedReader reader, final String filePath ) {
+      final String line = readLine( reader, filePath );
+      if ( line == null ) {
+         return null;
+      }
+      return TokenUtil.getTildeItems( line );
+   }
+
+   static public void writeOneColumn( final String filePath, final String description,
+                                      final Collection<String> list ) {
+      LOGGER.info( "Writing " + description + " to " + filePath );
+      long lineCount = 0;
+      try ( BufferedWriter writer = createWriter( filePath ) ) {
+         for ( String item : list ) {
+            lineCount++;
+            writer.write( item );
+            writer.newLine();
+            if ( lineCount % 100000 == 0 ) {
+               LOGGER.info( "File Line " + lineCount );
+            }
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( "Error writing " + description + " on line " + lineCount + " in file " + filePath );
+      }
+      LOGGER.info( "Wrote " + lineCount + " " + description + " to " + filePath );
+   }
+
+
+   static public Collection<String> readOneColumn( final String listFilePath, final String description ) {
+      LOGGER.info( "Reading " + description + " from " + listFilePath );
+      final Collection<String> listItems = new HashSet<>();
+      long lineCount = 0;
+      try ( BufferedReader reader = createReader( listFilePath ) ) {
+         String line = readLine( reader, listFilePath );
+         while ( line != null ) {
+            lineCount++;
+            listItems.add( line );
+            if ( lineCount % 100000 == 0 ) {
+               LOGGER.info( "File Line " + lineCount );
+            }
+            line = readLine( reader, listFilePath );
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+      }
+      LOGGER.info( "File Lines " + lineCount + "\t " + description + " " + listItems.size() );
+      return listItems;
+   }
+
+//   static public void writeNamedSets( final String filePath, final String description,
+//                                      final HashSetMap<String, String> namedSets ) {
+//      LOGGER.info( "Writing " + description + " to " + filePath );
+//      long lineCount = 0;
+//      try {
+//         final BufferedWriter writer = createWriter( filePath );
+//         for ( Map.Entry<String, Set<String>> namedSet : namedSets.entrySet() ) {
+//            lineCount++;
+//            writer.write( TokenUtil.createBsvLine( namedSet.getKey(),
+//                                                   TokenUtil.createCsvLine( namedSet.getParameterValue() ) ) );
+//            writer.newLine();
+//            if ( lineCount % 100000 == 0 ) {
+//               LOGGER.info( "File Line " + lineCount );
+//            }
+//         }
+//         writer.close();
+//      } catch ( IOException ioE ) {
+//         LOGGER.error( "Error writing " + description + " on line " + lineCount + " in file " + filePath );
+//      }
+//      LOGGER.info( "Wrote " + lineCount + " " + description + " to " + filePath );
+//   }
+
+   /**
+    * @deprecated
+    */
+   static public void writeNamedSets( final String filePath, final String description,
+                                      final Map<String, Collection<String>> namedSets ) {
+      LOGGER.info( "Writing " + description + " to " + filePath );
+      long lineCount = 0;
+      try ( BufferedWriter writer = createWriter( filePath ) ) {
+         for ( Map.Entry<String, Collection<String>> namedSet : namedSets.entrySet() ) {
+            lineCount++;
+            writer.write( TokenUtil.createBsvLine( namedSet.getKey(),
+                  TokenUtil.createCsvLine( namedSet.getValue() ) ) );
+            writer.newLine();
+            if ( lineCount % 100000 == 0 ) {
+               LOGGER.info( "File Line " + lineCount );
+            }
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( "Error writing " + description + " on line " + lineCount + " in file " + filePath );
+      }
+      LOGGER.info( "Wrote " + lineCount + " " + description + " to " + filePath );
+   }
+
+   /**
+    * @deprecated
+    */
+   @Deprecated
+   static public Map<String, Collection<String>> readNamedSetsOld( final String filePath, final String description ) {
+      final Collection<String> lines = readOneColumn( filePath, description );
+      final Map<String, Collection<String>> namedSets = new HashMap<>( lines.size() );
+      for ( String line : lines ) {
+         final List<String> nameAndList = TokenUtil.getBsvItems( line );
+         if ( nameAndList == null || nameAndList.size() != 2 ) {
+            LOGGER.error( "Bad line " + line );
+            continue;
+         }
+         namedSets.put( nameAndList.get( 0 ), TokenUtil.getCsvItems( nameAndList.get( 1 ) ) );
+      }
+      return namedSets;
+   }
+
+//   static public HashSetMap<String, String> readNamedSets( final String filePath, final String description ) {
+//      final Collection<String> lines = readOneColumn( filePath, description );
+//      final HashSetMap<String, String> namedSets = new HashSetMap<>( lines.size() );
+//      for ( String line : lines ) {
+//         final List<String> nameAndList = TokenUtil.getBsvItems( line );
+//         if ( nameAndList == null || nameAndList.size() != 2 ) {
+//            LOGGER.error( "Bad line " + line );
+//            continue;
+//         }
+//         namedSets.addAll( nameAndList.get( 0 ), TokenUtil.getCsvItems( nameAndList.get( 1 ) ) );
+//      }
+//      return namedSets;
+//   }
+
+}

Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/HsqlUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/HsqlUtil.java?rev=1788936&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/HsqlUtil.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/HsqlUtil.java Mon Mar 27 14:37:44 2017
@@ -0,0 +1,110 @@
+package org.apache.ctakes.gui.dictionary.util;
+
+
+import org.apache.ctakes.gui.dictionary.umls.VocabularyStore;
+import org.apache.log4j.Logger;
+
+import java.io.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/12/2015
+ */
+final public class HsqlUtil {
+
+   static private final Logger LOGGER = Logger.getLogger( "HsqlUtil" );
+
+   static public final String URL_PREFIX = "jdbc:hsqldb:file:";
+
+   private HsqlUtil() {
+   }
+
+
+   static public boolean createDatabase( final String databasePath, final String databaseName ) {
+      final File databaseDir = new File( databasePath, databaseName );
+      if ( databaseDir.isFile() ) {
+         LOGGER.error( databaseDir.getPath() + " exists as a file.  Hsqldb requires that path to be a directory" );
+         return false;
+      }
+      databaseDir.mkdirs();
+      return writePropertiesFile( databaseDir, databaseName )
+             && writeScriptFile( databaseDir, databaseName )
+             && writeRcFile( databaseDir, databaseName );
+   }
+
+   static private boolean writePropertiesFile( final File databaseDir, final String databaseName ) {
+      final File propertiesFile = new File( databaseDir, databaseName + ".properties" );
+      try ( final Writer writer = new BufferedWriter( new FileWriter( propertiesFile ) ) ) {
+         writer.write( "#HSQL Database Engine 1.8.0.10\n" );
+         writer.write( "#Thu Sep 04 09:49:09 EDT 2014\n" );
+         writer.write( "hsqldb.script_format=0\n" );
+         writer.write( "runtime.gc_interval=0\n" );
+         writer.write( "sql.enforce_strict_size=false\n" );
+         writer.write( "hsqldb.cache_size_scale=8\n" );
+         writer.write( "readonly=false\n" );
+         writer.write( "hsqldb.nio_data_file=true\n" );
+         writer.write( "hsqldb.cache_scale=14\n" );
+         writer.write( "version=1.8.0\n" );
+         writer.write( "hsqldb.default_table_type=memory\n" );
+         writer.write( "hsqldb.cache_file_scale=1\n" );
+         writer.write( "hsqldb.log_size=200\n" );
+         writer.write( "modified=no\n" );
+         writer.write( "hsqldb.cache_version=1.7.0\n" );
+         writer.write( "hsqldb.original_version=1.8.0\n" );
+         writer.write( "hsqldb.compatible_version=1.8.0\n\n" );
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+         return false;
+      }
+      return true;
+   }
+
+   static private boolean writeScriptFile( final File databaseDir, final String databaseName ) {
+      final File scriptFile = new File( databaseDir, databaseName + ".script" );
+      try ( final Writer writer = new BufferedWriter( new FileWriter( scriptFile ) ) ) {
+         writer.write( "CREATE SCHEMA PUBLIC AUTHORIZATION DBA\n" );
+         // main table
+         writer.write( "CREATE MEMORY TABLE CUI_TERMS(CUI BIGINT,RINDEX INTEGER,TCOUNT INTEGER,TEXT VARCHAR(255),RWORD VARCHAR(48))\n" );
+         writer.write( "CREATE INDEX IDX_CUI_TERMS ON CUI_TERMS(RWORD)\n" );
+         // tui table
+         writer.write( "CREATE MEMORY TABLE TUI(CUI BIGINT,TUI INTEGER)\n" );
+         writer.write( "CREATE INDEX IDX_TUI ON TUI(CUI)\n" );
+         // preferred term table
+         writer.write( "CREATE MEMORY TABLE PREFTERM(CUI BIGINT,PREFTERM VARCHAR(255))\n" );
+         writer.write( "CREATE INDEX IDX_PREFTERM ON PREFTERM(CUI)\n" );
+         // vocabulary tables
+         for ( String vocabulary : VocabularyStore.getInstance().getAllVocabularies() ) {
+            final String jdbcClass = VocabularyStore.getInstance().getJdbcClass( vocabulary );
+            final String tableName = vocabulary.replace( '.', '_' ).replace( '-', '_' );
+            writer.write( "CREATE MEMORY TABLE " + tableName + "(CUI BIGINT," + tableName + " " + jdbcClass + ")\n" );
+            writer.write( "CREATE INDEX IDX_" + tableName + " ON " + tableName + "(CUI)\n" );
+         }
+         writer.write( "CREATE USER SA PASSWORD \"\"\n" );
+         writer.write( "GRANT DBA TO SA\n" );
+         writer.write( "SET WRITE_DELAY 10\n" );
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+         return false;
+      }
+      return true;
+   }
+
+   static private boolean writeRcFile( final File databaseDir, final String databaseName ) {
+      final File scriptFile = new File( databaseDir, databaseName + ".rc" );
+      final String url = HsqlUtil.URL_PREFIX + databaseDir.getPath().replace( '\\', '/' )
+                         + "/" + databaseName;
+      try ( final Writer writer = new BufferedWriter( new FileWriter( scriptFile ) ) ) {
+         writer.write( "urlid " + databaseName + "\n" );
+         writer.write( "url " + url + ";shutdown=true\n" );
+         writer.write( "username sa\n" );
+         writer.write( "password\n" );
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+         return false;
+      }
+      return true;
+   }
+
+
+}

Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java?rev=1788936&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java Mon Mar 27 14:37:44 2017
@@ -0,0 +1,85 @@
+package org.apache.ctakes.gui.dictionary.util;
+
+import org.apache.log4j.Logger;
+
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/21/14
+ */
+final public class JdbcUtil {
+
+   static private final Logger LOGGER = Logger.getLogger( "JdbcUtil" );
+
+   private JdbcUtil() {
+   }
+
+   static private final String JDBC_DRIVER = "org.hsqldb.jdbcDriver";
+
+
+   static public void registerDriver() {
+      try {
+         Driver driver = (Driver)Class.forName( JDBC_DRIVER ).newInstance();
+         DriverManager.registerDriver( driver );
+      } catch ( Exception e ) {
+         // TODO At least four different exceptions are thrown here, and should be caught and handled individually
+         LOGGER.error( "Could not register Driver " + JDBC_DRIVER );
+         LOGGER.error( e.getMessage() );
+         System.exit( 1 );
+      }
+   }
+
+   static public Connection createDatabaseConnection( final String url, final String user, final String pass ) {
+      registerDriver();
+      LOGGER.info( "Connecting to " + url + " as " + user );
+      Connection connection = null;
+      try {
+         connection = DriverManager.getConnection( url, user, pass );
+      } catch ( SQLException sqlE ) {
+         // thrown by Connection.prepareStatement(..) and getTotalRowCount(..)
+         LOGGER.error( "Could not establish connection to " + url + " as " + user );
+         LOGGER.error( sqlE.getMessage() );
+         System.exit( 1 );
+      }
+      return connection;
+   }
+
+   //   static public String createRowInsertSql( final String tableName, final int valueCount ) {
+   static public String createRowInsertSql( final String tableName, final Enum... fields ) {
+      final String[] fieldNames = new String[ fields.length ];
+      int i = 0;
+      for ( Enum field : fields ) {
+         fieldNames[ i ] = field.name();
+         i++;
+      }
+      return createRowInsertSql( tableName, fieldNames );
+   }
+
+   static public String createCodeInsertSql( final String vocabulary ) {
+      return createRowInsertSql( vocabulary.toLowerCase().replace( '.', '_' ).replace( '-', '_' ), "CUI", vocabulary );
+   }
+
+   static public String createRowInsertSql( final String tableName, final String... fieldNames ) {
+      final StringBuilder sb = new StringBuilder( "insert into" );
+      sb.append( " " ).append( tableName );
+      sb.append( " (" );
+      for ( String fieldName : fieldNames ) {
+         sb.append( fieldName ).append( ',' );
+      }
+      // remove last comma
+      sb.setLength( sb.length() - 1 );
+      sb.append( ") " );
+      sb.append( " values (" );
+      for ( int i = 0; i < fieldNames.length - 1; i++ ) {
+         sb.append( "?," );
+      }
+      sb.append( "?)" );
+      return sb.toString();
+   }
+
+}

Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java?rev=1788936&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java Mon Mar 27 14:37:44 2017
@@ -0,0 +1,201 @@
+package org.apache.ctakes.gui.dictionary.util;
+
+import org.apache.ctakes.gui.dictionary.umls.Concept;
+import org.apache.ctakes.gui.dictionary.umls.Tui;
+import org.apache.ctakes.gui.dictionary.umls.VocabularyStore;
+import org.apache.log4j.Logger;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/15/14
+ */
+final public class RareWordDbWriter {
+
+   static private final Logger LOGGER = Logger.getLogger( "RareWordDbWriter" );
+
+
+   private RareWordDbWriter() {
+   }
+
+   private enum CuiTermsField {
+      CUI( 1, Long.class ), RINDEX( 2, Integer.class ), TCOUNT( 3, Integer.class ),
+      TEXT( 4, String.class ), RWORD( 5, String.class );
+      final private int __index;
+      final private Class __classType;
+
+      CuiTermsField( final int index, final Class classType ) {
+         __index = index;
+         __classType = classType;
+      }
+   }
+
+
+   static public boolean writeConcepts( final Map<Long, Concept> concepts,
+                                        final String url, final String user, final String pass ) {
+      // Get Count of appearance in dictionary per term token
+      final Map<String, Long> tokenCounts = RareWordUtil.getTokenCounts( concepts.values() );
+      // Create insert sql statements
+      final String mainTableSql = JdbcUtil.createRowInsertSql( "CUI_TERMS", CuiTermsField.values() );
+      final String tuiTableSql = JdbcUtil.createCodeInsertSql( "tui" );
+      final String preftermTableSql = JdbcUtil.createCodeInsertSql( "prefterm" );
+      final Map<String, String> insertCodeSqls = createCodeInsertSqls();
+
+      long mainTableCount = 0;
+      long tuiTableCount = 0;
+      long preftermTableCount = 0;
+      final Map<String, Long> codeTableCounts = createCodeCounts();
+      final Connection connection = JdbcUtil.createDatabaseConnection( url, user, pass );
+      try {
+         // Create PreparedStatements from insert sql statements
+         final PreparedStatement mainTableStatement = connection.prepareStatement( mainTableSql );
+         final PreparedStatement tuiStatement = connection.prepareStatement( tuiTableSql );
+         final PreparedStatement preftermStatement = connection.prepareStatement( preftermTableSql );
+         final Map<String, PreparedStatement> codeStatements = createCodeStatements( connection, insertCodeSqls );
+
+         for ( Map.Entry<Long, Concept> conceptEntry : concepts.entrySet() ) {
+            final long cui = conceptEntry.getKey();
+            final Concept concept = conceptEntry.getValue();
+            // write main term table
+            boolean conceptOk = false;
+            for ( String text : conceptEntry.getValue().getTexts() ) {
+               final RareWordUtil.IndexedRareWord indexedRareWord = RareWordUtil.getIndexedRareWord( text,
+                     tokenCounts );
+               if ( RareWordUtil.NULL_RARE_WORD.equals( indexedRareWord ) ) {
+                  continue;
+               }
+               conceptOk = true;
+               mainTableStatement.setLong( CuiTermsField.CUI.__index, cui );
+               mainTableStatement.setInt( CuiTermsField.RINDEX.__index, indexedRareWord.__index );
+               mainTableStatement.setInt( CuiTermsField.TCOUNT.__index, indexedRareWord.__tokenCount );
+               mainTableStatement.setString( CuiTermsField.TEXT.__index, text );
+               mainTableStatement.setString( CuiTermsField.RWORD.__index, indexedRareWord.__word );
+               mainTableStatement.executeUpdate();
+               mainTableCount = incrementCount( "Main", mainTableCount );
+            }
+            if ( !conceptOk ) {
+               continue;
+            }
+            // write tui table
+            for ( Tui tui : concept.getTuis() ) {
+               tuiStatement.setLong( CuiTermsField.CUI.__index, cui );
+               tuiStatement.setInt( 2, tui.getIntValue() );
+               tuiStatement.executeUpdate();
+               tuiTableCount = incrementCount( "Tui", tuiTableCount );
+            }
+            // write preferred term table
+            final String preferredText = concept.getPreferredText();
+            if ( preferredText != null
+                 && !preferredText.isEmpty()
+                 && !preferredText.equals( Concept.PREFERRED_TERM_UNKNOWN ) ) {
+               preftermStatement.setLong( CuiTermsField.CUI.__index, cui );
+               preftermStatement.setString( 2, preferredText );
+               preftermStatement.executeUpdate();
+               preftermTableCount = incrementCount( "Preferred Term", preftermTableCount );
+            }
+            // write extra vocabulary code tables
+            final Collection<String> vocabularies = concept.getVocabularies();
+            for ( String vocabulary : vocabularies ) {
+               final PreparedStatement statement = codeStatements.get( vocabulary );
+               statement.setLong( CuiTermsField.CUI.__index, cui );
+               for ( String code : concept.getCodes( vocabulary ) ) {
+                  setCodeAppropriately( statement, code, VocabularyStore.getInstance()
+                        .getVocabularyClass( vocabulary ) );
+                  statement.executeUpdate();
+                  codeTableCounts.put( vocabulary, incrementCount( vocabulary, codeTableCounts.get( vocabulary ) ) );
+               }
+            }
+         }
+         connection.commit();
+         mainTableStatement.close();
+         tuiStatement.close();
+         preftermStatement.close();
+         preftermStatement.close();
+         for ( PreparedStatement codeStatement : codeStatements.values() ) {
+            codeStatement.close();
+         }
+         final Statement writeDelayStatement = connection.createStatement();
+         writeDelayStatement.execute( "SET WRITE_DELAY FALSE" );
+         writeDelayStatement.close();
+         final Statement setBinaryStatement = connection.createStatement();
+         setBinaryStatement.execute( "SET SCRIPTFORMAT BINARY" );
+         setBinaryStatement.close();
+         final Statement readOnlyStatement = connection.createStatement();
+         readOnlyStatement.execute( "SET READONLY TRUE" );
+         readOnlyStatement.close();
+         final Statement shutdownStatement = connection.createStatement();
+         shutdownStatement.execute( "SHUTDOWN" );
+         shutdownStatement.close();
+         connection.commit();
+         connection.close();
+      } catch ( SQLException sqlE ) {
+         LOGGER.error( sqlE.getMessage() );
+         return false;
+      }
+      LOGGER.info( "Main Table Rows " + mainTableCount );
+      LOGGER.info( "Tui Table Rows " + tuiTableCount );
+      LOGGER.info( "Preferred Term Table Rows " + preftermTableCount );
+      final Function<String, String> vocabCount = v -> v + " Table Rows " + codeTableCounts.get( v );
+      VocabularyStore.getInstance().getAllVocabularies().stream()
+            .map( vocabCount )
+            .forEach( LOGGER::info );
+      return true;
+   }
+
+
+   static private Map<String, String> createCodeInsertSqls() {
+      return VocabularyStore.getInstance().getAllVocabularies().stream()
+            .collect( Collectors.toMap( Function.identity(), JdbcUtil::createCodeInsertSql ) );
+   }
+
+   static private Map<String, PreparedStatement> createCodeStatements( final Connection connection,
+                                                                       final Map<String, String> insertCodeSqls )
+         throws SQLException {
+      final Map<String, PreparedStatement> codeStatements = new HashMap<>( insertCodeSqls.size() );
+      for ( Map.Entry<String, String> codeSql : insertCodeSqls.entrySet() ) {
+         codeStatements.put( codeSql.getKey(), connection.prepareStatement( codeSql.getValue() ) );
+      }
+      return codeStatements;
+   }
+
+   static private Map<String, Long> createCodeCounts() {
+      return VocabularyStore.getInstance().getAllVocabularies().stream()
+            .collect( Collectors.toMap( Function.identity(), v -> 0L ) );
+   }
+
+   static private void setCodeAppropriately( final PreparedStatement statement, final String code,
+                                             final Class<?> type ) throws SQLException {
+      if ( String.class.equals( type ) ) {
+         statement.setString( 2, code );
+      } else if ( Double.class.equals( type ) ) {
+         statement.setDouble( 2, Double.valueOf( code ) );
+      } else if ( Long.class.equals( type ) ) {
+         statement.setLong( 2, Long.valueOf( code ) );
+      } else if ( Integer.class.equals( type ) ) {
+         statement.setInt( 2, Integer.valueOf( code ) );
+      } else {
+         LOGGER.error( "Could not set code for " + type.getName() );
+         statement.setString( 2, code );
+      }
+   }
+
+   static private long incrementCount( final String name, long count ) {
+      count++;
+      if ( count % 100000 == 0 ) {
+         LOGGER.info( name + " Table Rows " + count );
+      }
+      return count;
+   }
+
+}

Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java?rev=1788936&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java Mon Mar 27 14:37:44 2017
@@ -0,0 +1,178 @@
+package org.apache.ctakes.gui.dictionary.util;
+
+import org.apache.ctakes.gui.dictionary.umls.Concept;
+
+import java.util.*;
+import java.util.function.Function;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/17/14
+ */
+final public class RareWordUtil {
+
+   private RareWordUtil() {
+   }
+
+   // LookupDesc for the standard excluded pos tags are
+   //   VB,VBD,VBG,VBN,VBP,VBZ,CC,CD,DT,EX,LS,MD,PDT,POS,PP,PP$,PRP,PRP$,RP,TO,WDT,WP,WPS,WRB
+   // Listing every verb in the language seems a pain, but listing the others is possible.
+   // Verbs should be rare in the dictionaries, excepting perhaps the activity and concept dictionaries
+   // CD, CC, DT, EX, MD, PDT, PP, PP$, PRP, PRP$, RP, TO, WDT, WP, WPS, WRB
+   // why not WP$ (possessive wh- pronoun "whose")
+   // PP$ is a Brown POS tag, not Penn Treebank (as are the rest)
+
+   static private final Set<String> BAD_POS_TERM_SET;
+
+   static {
+      final String[] BAD_POS_TERMS = {
+            // VB  verb
+            "be", "has", "have", "had", "do", "does", "did", "is", "isn", "am", "are", "was", "were",
+            // CD  cardinal number
+            "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten",
+            // CC  coordinating conjunction
+            "and", "or", "but", "for", "nor", "so", "yet", "while", "because",
+            // DT  determiner
+            "this", "that", "these", "those", "the", "an", "a",
+            // EX  existential there
+            "there",
+            // MD  modal
+            "can", "should", "will", "may", "shall", "might", "must", "could", "would",
+            // PDT  predeterminer
+            "some", "many", "any", "each", "all", "few", "most", "both", "half", "none", "twice",
+            // PP  prepositional phrase (preposition)
+            "at", "before", "after", "behind", "beneath", "beside", "between", "into", "through", "across", "of",
+            "concerning", "like", "unlike", "except", "with", "within", "without", "toward", "to", "past", "against",
+            "during", "until", "throughout", "below", "besides", "beyond", "from", "inside", "near", "outside", "since",
+            "upon",
+            // PP$  possessive personal pronoun - Brown POS tag, not Penn TreeBank
+            "my", "our", "your", "her", "their", "whose",
+            // PRP  personal pronoun, plurals added
+            "i", "you", "he", "she", "it", "them", "they", "we", "us",
+            // PRP$  possesive pronoun
+            "mine", "yours", "his", "hers", "its", "ours", "theirs",
+            // RP  particle  - this contains some prepositions
+            "about", "off", "up", "along", "away", "back", "by", "down", "forward", "in", "on", "out",
+            "over", "around", "under",
+            // TO  to  - also a preposition
+            "to",
+            // WDT  wh- determiner
+            "what", "whatever", "which", "whichever",
+            // WP, WPS  wh- pronoun, nominative wh- pronoun
+            "who", "whom", "which", "that", "whoever", "whomever",
+            // WRB
+            "how", "where", "when", "however", "wherever", "whenever",
+            // Mine ... some correlative conjunctions, etc.
+            "no", "not", "oh", "mr", "mrs", "miss", "dr", "as", "only", "also", "either", "neither", "whether",
+            // additional numbers
+            "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen",
+            "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety",
+            "hundred", "thousand", "million", "billion", "trillion",
+            };
+      BAD_POS_TERM_SET = new HashSet<>( Arrays.asList( BAD_POS_TERMS ) );
+   }
+
+   static private final Pattern SPACE_PATTERN = Pattern.compile( "\\s+" );
+
+   static public Collection<String> getUnwantedPosTexts() {
+      return Collections.unmodifiableCollection( BAD_POS_TERM_SET );
+   }
+
+   static public boolean isRarableToken( final String token ) {
+      if ( token.length() <= 1 ) {
+         return false;
+      }
+      boolean hasLetter = false;
+      for ( int i = 0; i < token.length(); i++ ) {
+         if ( Character.isLetter( token.charAt( i ) ) ) {
+            hasLetter = true;
+            break;
+         }
+      }
+      return hasLetter && !BAD_POS_TERM_SET.contains( token );
+   }
+
+
+   static public Map<String, Long> getTokenCounts( final Collection<Concept> concepts ) {
+      return concepts.stream()
+            .map( Concept::getTexts )
+            .flatMap( Collection::stream )
+            .map( SPACE_PATTERN::split )
+            .flatMap( Arrays::stream )
+            .filter( RareWordUtil::isRarableToken )
+            .collect( Collectors.groupingBy( Function.identity(), Collectors.counting() ) );
+   }
+
+   static private void incrementCount( final Map<String, Integer> tokenCounts, final String token ) {
+      Integer count = tokenCounts.get( token );
+      if ( count == null ) {
+         count = 0;
+      }
+      tokenCounts.put( token, (count + 1) );
+   }
+
+   //   static public String getRareToken( final Map<String,Integer> tokenCounts, final String text ) {
+   //      final String[] tokens = text.split( "\\s+" );
+   //      int bestIndex = 0;
+   //      int bestCount = Integer.MAX_VALUE;
+   //      for ( int i = 0; i < tokens.length; i++ ) {
+   //         Integer count = tokenCounts.get( tokens[i] );
+   //         if ( count != null && count < bestCount ) {
+   //            bestIndex = i;
+   //            bestCount = count;
+   //         }
+   //      }
+   //      return tokens[bestIndex];
+   //   }
+   //
+   //   static public int getRareTokenIndex( final Map<String,Integer> tokenCounts, final String text ) {
+   //      final String[] tokens = text.split( "\\s+" );
+   //      int bestIndex = 0;
+   //      int bestCount = Integer.MAX_VALUE;
+   //      for ( int i = 0; i < tokens.length; i++ ) {
+   //         Integer count = tokenCounts.get( tokens[i] );
+   //         if ( count != null && count < bestCount ) {
+   //            bestIndex = i;
+   //            bestCount = count;
+   //         }
+   //      }
+   //      return bestIndex;
+   //   }
+
+
+   static public final class IndexedRareWord {
+      final public String __word;
+      final public int __index;
+      final public int __tokenCount;
+
+      private IndexedRareWord( final String word, final int index, final int tokenCount ) {
+         __word = word;
+         __index = index;
+         __tokenCount = tokenCount;
+      }
+   }
+
+   static public final IndexedRareWord NULL_RARE_WORD = new IndexedRareWord( null, -1, -1 );
+
+   static public IndexedRareWord getIndexedRareWord( final String text,
+                                                     final Map<String, Long> tokenCounts ) {
+      final String[] tokens = SPACE_PATTERN.split( text );
+      int bestIndex = 0;
+      long bestCount = Long.MAX_VALUE;
+      for ( int i = 0; i < tokens.length; i++ ) {
+         Long count = tokenCounts.get( tokens[ i ] );
+         if ( count != null && count < bestCount ) {
+            bestIndex = i;
+            bestCount = count;
+         }
+      }
+      if ( bestCount == Long.MAX_VALUE ) {
+         return NULL_RARE_WORD;
+      }
+      return new IndexedRareWord( tokens[ bestIndex ], bestIndex, tokens.length );
+   }
+}

Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java?rev=1788936&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java Mon Mar 27 14:37:44 2017
@@ -0,0 +1,198 @@
+package org.apache.ctakes.gui.dictionary.util;
+
+import java.util.*;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/16/14
+ */
+final public class TextTokenizer {
+
+   private TextTokenizer() {
+   }
+
+   static private final String[] PREFIXES = {
+         "e-",
+         "a-",
+         "u-",
+         "x-",
+         "agro-",
+         "ante-",
+         "anti-",
+         "arch-",
+         "be-",
+         "bi-",
+         "bio-",
+         "co-",
+         "counter-",
+         "cross-",
+         "cyber-",
+         "de-",
+         "eco-",
+         "ex-",
+         "extra-",
+         "inter-",
+         "intra-",
+         "macro-",
+         "mega-",
+         "micro-",
+         "mid-",
+         "mini-",
+         "multi-",
+         "neo-",
+         "non-",
+         "over-",
+         "pan-",
+         "para-",
+         "peri-",
+         "post-",
+         "pre-",
+         "pro-",
+         "pseudo-",
+         "quasi-",
+         "re-",
+         "semi-",
+         "sub-",
+         "super-",
+         "tri-",
+         "ultra-",
+         "un-",
+         "uni-",
+         "vice-",
+         // From email from Colin Warner <co...@ldc.upenn.edu> on 7/25/2010
+         "electro-",
+         "gasto-",
+         "homo-",
+         "hetero-",
+         "ortho-",
+         "phospho-",
+         };
+
+   static private final String[] SUFFIXES = { "-esque", "-ette", "-fest", "-fold", "-gate", "-itis", "-less", "-most",
+                                              "-o-torium", "-rama", "-wise" };
+
+   static private final Set<String> PREFIX_SET = new HashSet<>( Arrays.asList( PREFIXES ) );
+   static private final Set<String> SUFFIX_SET = new HashSet<>( Arrays.asList( SUFFIXES ) );
+
+   static private Pattern WHITESPACE = Pattern.compile( "\\s+" );
+
+   static private String getNextCharTerm( final String word ) {
+      final StringBuilder sb = new StringBuilder();
+      final int count = word.length();
+      for ( int i = 0; i < count; i++ ) {
+         final char c = word.charAt( i );
+         if ( !Character.isLetterOrDigit( c ) ) {
+            return sb.toString();
+         }
+         sb.append( c );
+      }
+      return sb.toString();
+   }
+
+   static private boolean isPrefix( final String word ) {
+      final String prefixQ = word + "-";
+      return PREFIX_SET.contains( prefixQ );
+   }
+
+   static private boolean isSuffix( final String word, final int startIndex ) {
+      if ( word.length() <= startIndex ) {
+         return false;
+      }
+      final String nextCharTerm = getNextCharTerm( word.substring( startIndex ) );
+      if ( nextCharTerm.isEmpty() ) {
+         return false;
+      }
+      final String suffixQ = "-" + nextCharTerm;
+      return SUFFIX_SET.contains( suffixQ );
+   }
+
+   static private boolean isOwnerApostrophe( final CharSequence word, final int startIndex ) {
+      return word.length() == startIndex + 1 && word.charAt( startIndex ) == 's';
+   }
+
+   static private boolean isNumberDecimal( final CharSequence word, final int startIndex ) {
+      // Bizarre scenario in which ctakes tokenizes ".2" as a fraction, but not ".22"
+      return word.length() == startIndex + 1 && Character.isDigit( word.charAt( startIndex ) );
+   }
+
+   static public List<String> getTokens( final String word ) {
+      return getTokens( word, false );
+   }
+
+   static public List<String> getTokens( final String word, final boolean separateDigits ) {
+      final List<String> tokens = new ArrayList<>();
+      final StringBuilder sb = new StringBuilder();
+      final int count = word.length();
+      boolean wasDigit = false;
+      for ( int i = 0; i < count; i++ ) {
+         final char c = word.charAt( i );
+         if ( Character.isLetterOrDigit( c ) ) {
+            if ( sb.length() != 0 && separateDigits && (wasDigit && !Character.isDigit( c )) ) {
+               // separating characters from digits, add the current word
+               tokens.add( sb.toString() );
+               sb.setLength( 0 );
+            }
+            wasDigit = Character.isDigit( c );
+            // Appending character to current word
+            sb.append( c );
+            continue;
+         }
+         wasDigit = false;
+         if ( c == '-' && (isPrefix( sb.toString() ) || isSuffix( word, i + 1 )) ) {
+            // what precedes is a prefix or what follows is a suffix so append the dash to the current word and move on
+            sb.append( c );
+            continue;
+         }
+         if ( (c == '\'' && isOwnerApostrophe( word, i + 1 ))
+              || (c == '.' && isNumberDecimal( word, i + 1 )) ) {
+            // what follows is an 's or .# so add the preceding and move on
+            if ( sb.length() != 0 ) {
+               tokens.add( sb.toString() );
+               sb.setLength( 0 );
+            }
+            sb.append( c );
+            continue;
+         }
+         // Wasn't a special symbol for consideration, so add the previous and symbol separately
+         if ( sb.length() != 0 ) {
+            tokens.add( sb.toString() );
+            sb.setLength( 0 );
+         }
+         tokens.add( "" + c );
+      }
+      if ( sb.length() != 0 ) {
+         // add the final word
+         tokens.add( sb.toString() );
+      }
+      return tokens;
+   }
+
+   static public String getTokenizedText( final String text ) {
+      return getTokenizedText( text, false );
+   }
+
+
+   static public String getTokenizedText( final String text, final boolean separateDigits ) {
+      if ( text.isEmpty() ) {
+         return text;
+      }
+      final String[] splits = WHITESPACE.split( text.toLowerCase() );
+      if ( splits.length == 0 ) {
+         return "";
+      }
+      final String lastSplit = splits[ splits.length - 1 ];
+      if ( lastSplit.endsWith( "," ) || lastSplit.endsWith( ";" ) || lastSplit.endsWith( "." ) ) {
+         // get rid of last comma or semicolon or period
+         splits[ splits.length - 1 ] = lastSplit.substring( 0, lastSplit.length() - 1 );
+      }
+      return Arrays.stream( splits )
+            .map( s -> getTokens( s, separateDigits ) )
+            .flatMap( Collection::stream )
+            .collect( Collectors.joining( " " ) );
+   }
+
+
+}

Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java?rev=1788936&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java Mon Mar 27 14:37:44 2017
@@ -0,0 +1,89 @@
+package org.apache.ctakes.gui.dictionary.util;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/15/14
+ */
+final public class TokenUtil {
+
+   private TokenUtil() {
+   }
+
+   static public List<String> getBsvItems( final String line ) {
+      return getSeparatedValueItems( line, '|' );
+   }
+
+   static public List<String> getTildeItems( final String line ) {
+      return getSeparatedValueItems( line, '~' );
+   }
+
+   static public List<String> getCsvItems( final String line ) {
+      return getSeparatedValueItems( line, ',' );
+   }
+
+   static private List<String> getSeparatedValueItems( final String line, final char separator ) {
+      if ( line == null || line.trim().isEmpty() ) {
+         return Collections.emptyList();
+      }
+      final List<String> tokens = new ArrayList<>();
+      int startIndex = 0;
+      int stopIndex = line.indexOf( separator );
+      while ( stopIndex > 0 && stopIndex < line.length() ) {
+         tokens.add( line.substring( startIndex, stopIndex ) );
+         startIndex = stopIndex + 1;
+         stopIndex = line.indexOf( separator, startIndex );
+      }
+      if ( startIndex < line.length() - 1 ) {
+         tokens.add( line.substring( startIndex ) );
+      } else {
+         tokens.add( "" );
+      }
+      return tokens;
+   }
+
+
+   static public String createBsvLine( final Collection<String> values ) {
+      if ( values == null ) {
+         return "";
+      }
+      return createBsvLine( values.toArray( new String[ values.size() ] ) );
+   }
+
+   static public String createBsvLine( final String... values ) {
+      if ( values.length == 0 ) {
+         return "";
+      }
+      final StringBuilder sb = new StringBuilder();
+      for ( String value : values ) {
+         sb.append( value ).append( "|" );
+      }
+      sb.setLength( sb.length() - 1 );
+      return sb.toString();
+   }
+
+   static public String createCsvLine( final Collection<String> values ) {
+      if ( values == null ) {
+         return "";
+      }
+      return createCsvLine( values.toArray( new String[ values.size() ] ) );
+   }
+
+   static public String createCsvLine( final String... values ) {
+      if ( values.length == 0 ) {
+         return "";
+      }
+      final StringBuilder sb = new StringBuilder();
+      for ( String value : values ) {
+         sb.append( value ).append( "," );
+      }
+      sb.setLength( sb.length() - 1 );
+      return sb.toString();
+   }
+
+}

Added: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/pipeline/MainPanel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/pipeline/MainPanel.java?rev=1788936&view=auto
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/pipeline/MainPanel.java (added)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/pipeline/MainPanel.java Mon Mar 27 14:37:44 2017
@@ -0,0 +1,316 @@
+package org.apache.ctakes.gui.pipeline;
+
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.gui.component.DisablerPane;
+import org.apache.ctakes.gui.component.LoggerPanel;
+import org.apache.ctakes.gui.component.PositionedSplitPane;
+import org.apache.ctakes.gui.component.SmoothTipList;
+import org.apache.ctakes.gui.pipeline.bit.BitCellRenderer;
+import org.apache.ctakes.gui.pipeline.bit.PipeBitFinder;
+import org.apache.ctakes.gui.pipeline.bit.available.AvailablesListModel;
+import org.apache.ctakes.gui.pipeline.bit.available.AvailablesRenderer;
+import org.apache.ctakes.gui.pipeline.bit.info.PipeBitInfoPanel;
+import org.apache.ctakes.gui.pipeline.bit.user.*;
+import org.apache.log4j.Logger;
+
+import javax.swing.*;
+import java.awt.*;
+import java.awt.event.ActionEvent;
+import java.awt.event.MouseAdapter;
+import java.awt.event.MouseEvent;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+/**
+ * TODO this interface is completely graphical and needs a lot of attention to be done well
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/20/2016
+ */
+final class MainPanel extends JPanel {
+
+   static private final Logger LOGGER = Logger.getLogger( "MainPanel" );
+
+
+   private final AvailablesListModel _availablesListModel = new AvailablesListModel();
+   private JList<PipeBitInfo> _availablesList;
+   private JList<UserBit> _userBitsList;
+
+   MainPanel() {
+      super( new BorderLayout( 10, 10 ) );
+
+      final JSplitPane logSplit = new PositionedSplitPane( JSplitPane.VERTICAL_SPLIT );
+      logSplit.setTopComponent( createMainPanel() );
+      logSplit.setBottomComponent( LoggerPanel.createLoggerPanel() );
+      logSplit.setDividerLocation( 0.6d );
+
+      add( logSplit, BorderLayout.CENTER );
+   }
+
+
+   private JComponent createWestPanel() {
+      final JLabel header = new JLabel( "Available Pipe Bits" );
+      header.setPreferredSize( new Dimension( 100, 30 ) );
+      header.setHorizontalAlignment( SwingConstants.CENTER );
+      _availablesList = createPipeBitList( _availablesListModel );
+
+      final ListCellRenderer<Object> availableRenderer = new AvailablesRenderer();
+      _availablesList.setCellRenderer( availableRenderer );
+      final JScrollPane scroll = new JScrollPane( _availablesList );
+      scroll.setColumnHeaderView( header );
+      scroll.setHorizontalScrollBarPolicy( ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER );
+
+      final JSplitPane split = new PositionedSplitPane();
+      split.setLeftComponent( scroll );
+      split.setRightComponent( createBitInfoPanel( _availablesList ) );
+      split.setDividerLocation( 0.3d );
+      return split;
+   }
+
+
+   private JComponent createEastPanel() {
+      final JLabel header = new JLabel( "User Pipeline" );
+      header.setPreferredSize( new Dimension( 100, 30 ) );
+      header.setHorizontalAlignment( SwingConstants.CENTER );
+      final UserBitListModel userBitListModel = new UserBitListModel();
+      _userBitsList = createUserBitList( userBitListModel );
+      final ListCellRenderer<Object> usersRenderer = new UserBitRenderer();
+      _userBitsList.setCellRenderer( usersRenderer );
+      final JScrollPane scroll = new JScrollPane( _userBitsList );
+      scroll.setColumnHeaderView( header );
+
+      // Listener for mouse clicks and float-over in availables list
+      final AvailablesMouseListener availablesMouse = new AvailablesMouseListener( _availablesList, userBitListModel );
+      _availablesList.addMouseListener( availablesMouse );
+      _availablesList.addMouseMotionListener( availablesMouse );
+
+      // Listener for mouse clicks and float-over in users list
+      final UsersMouseListener usersMouse = new UsersMouseListener( _userBitsList, userBitListModel );
+      _userBitsList.addMouseListener( usersMouse );
+      _userBitsList.addMouseMotionListener( usersMouse );
+
+      final JSplitPane split = new PositionedSplitPane();
+      split.setLeftComponent( scroll );
+      split.setRightComponent( createUserBitPanel( _userBitsList ) );
+      split.setDividerLocation( 0.3d );
+      return split;
+   }
+
+
+   private JComponent createMainPanel() {
+      final JComponent westPanel = createWestPanel();
+      final JComponent eastPanel = createEastPanel();
+      return new JSplitPane( JSplitPane.HORIZONTAL_SPLIT, westPanel, eastPanel );
+   }
+
+
+//   private JComponent createCenterPanel() {
+//      final JPanel panel = new JPanel( new BorderLayout() );
+//      final JList<PipeBitInfo> pipeBitList = createPipeBitList( _availablesListModel );
+//      final JSplitPane centerSplit = new PositionedSplitPane();
+//      centerSplit.setLeftComponent( new JScrollPane( pipeBitList ) );
+//      final PipeBitInfoPanel pipeBitInfoPanel = createPipeBitPanel();
+//      centerSplit.setRightComponent( pipeBitInfoPanel );
+//      centerSplit.setDividerLocation( 0.25d );
+//      panel.add( centerSplit, BorderLayout.CENTER );
+//      panel.add( createGoPanel(), BorderLayout.SOUTH );
+//      pipeBitInfoPanel.addPipeBitListListener( pipeBitList );
+//      return panel;
+//   }
+
+   static private JList<PipeBitInfo> createPipeBitList( final ListModel<PipeBitInfo> model ) {
+      final JList<PipeBitInfo> bitList = new SmoothTipList<>( model );
+      bitList.setCellRenderer( new BitCellRenderer() );
+      bitList.setFixedCellHeight( 20 );
+      return bitList;
+   }
+
+   static private JList<UserBit> createUserBitList( final ListModel<UserBit> model ) {
+      final JList<UserBit> bitList = new SmoothTipList<>( model );
+      bitList.setFixedCellHeight( 20 );
+      return bitList;
+   }
+
+   static private PipeBitInfoPanel createBitInfoPanel( final JList<PipeBitInfo> list ) {
+      final PipeBitInfoPanel pipeBitInfoPanel = new PipeBitInfoPanel();
+      pipeBitInfoPanel.setPipeBitInfoList( list );
+      return pipeBitInfoPanel;
+   }
+
+   static private UserBitInfoPanel createUserBitPanel( final JList<UserBit> list ) {
+      final UserBitInfoPanel userBitPanelPanel = new UserBitInfoPanel();
+      userBitPanelPanel.setUserBitList( list );
+      return userBitPanelPanel;
+   }
+
+
+   private JComponent createGoPanel() {
+      return new JButton( new FindPipeBitsAction() );
+   }
+
+
+   public void findPipeBits() {
+      final ExecutorService executor = Executors.newSingleThreadExecutor();
+      executor.execute( new PiperBitParser() );
+   }
+
+   private class PiperBitParser implements Runnable {
+      @Override
+      public void run() {
+         final JFrame frame = (JFrame)SwingUtilities.getRoot( MainPanel.this );
+         frame.setCursor( Cursor.getPredefinedCursor( Cursor.WAIT_CURSOR ) );
+         DisablerPane.getInstance().setVisible( true );
+         PipeBitFinder.getInstance().scan();
+         _availablesListModel.setPipeBits( PipeBitFinder.getInstance().getPipeBits() );
+         DisablerPane.getInstance().setVisible( false );
+         frame.setCursor( Cursor.getDefaultCursor() );
+      }
+   }
+
+
+   /**
+    * Builds the dictionary
+    */
+   private class FindPipeBitsAction extends AbstractAction {
+      private FindPipeBitsAction() {
+         super( "Find Readers, Annotators and Writers" );
+      }
+
+      @Override
+      public void actionPerformed( final ActionEvent event ) {
+         final ExecutorService executor = Executors.newSingleThreadExecutor();
+         executor.execute( new PiperBitParser() );
+      }
+   }
+
+   static private final class AvailablesMouseListener extends MouseAdapter {
+      private final JList<PipeBitInfo> _list;
+      private final UserBitListModel __userBitListModel;
+      private int _currentFocusIndex = -1;
+
+      private AvailablesMouseListener( final JList<PipeBitInfo> list, final UserBitListModel userBitListModel ) {
+         _list = list;
+         __userBitListModel = userBitListModel;
+      }
+
+      @Override
+      public void mouseReleased( final MouseEvent event ) {
+         final Point p = _list.getMousePosition();
+         if ( p.getX() < _list.getWidth() - 37 ) {
+            return;
+         }
+         final int index = _list.locationToIndex( p );
+         final AvailablesListModel availablesModel = (AvailablesListModel)_list.getModel();
+         final PipeBitInfo pipeBitInfo = availablesModel.getElementAt( index );
+         final UserBit userBit = new DefaultUserBit( pipeBitInfo, availablesModel.getPipeBit( pipeBitInfo ) );
+         __userBitListModel.addUserBit( userBit );
+      }
+
+      @Override
+      public void mouseEntered( final MouseEvent event ) {
+         setFocus( _list.getMousePosition() );
+      }
+
+      @Override
+      public void mouseExited( final MouseEvent event ) {
+         setFocus( _list.getMousePosition() );
+      }
+
+      @Override
+      public void mouseDragged( final MouseEvent event ) {
+         setFocus( _list.getMousePosition() );
+      }
+
+      @Override
+      public void mouseMoved( final MouseEvent event ) {
+         setFocus( _list.getMousePosition() );
+      }
+
+      private void setFocus( final Point p ) {
+         if ( p == null ) {
+            if ( _currentFocusIndex >= 0 ) {
+               _currentFocusIndex = -1;
+               _list.repaint();
+            }
+            return;
+         }
+         final int index = _list.locationToIndex( p );
+         if ( index == _currentFocusIndex ) {
+            return;
+         }
+         _currentFocusIndex = index;
+         _list.repaint();
+      }
+   }
+
+   static private final class UsersMouseListener extends MouseAdapter {
+      private final JList<UserBit> _list;
+      private final UserBitListModel __userBitListModel;
+      private int _currentFocusIndex = -1;
+
+      private UsersMouseListener( final JList<UserBit> list, final UserBitListModel userBitListModel ) {
+         _list = list;
+         __userBitListModel = userBitListModel;
+      }
+
+      @Override
+      public void mouseReleased( final MouseEvent event ) {
+         final Point p = _list.getMousePosition();
+         final int widthMinusX = _list.getWidth() - p.x;
+         if ( widthMinusX > 65 ) {
+            return;
+         }
+         _list.getSelectionModel().clearSelection();
+         UserBitRenderer.SUSPEND_BUTTONS = true;
+         final int index = _list.locationToIndex( p );
+         if ( widthMinusX > 45 ) {
+            __userBitListModel.moveUserBitUp( index );
+         } else if ( widthMinusX > 25 ) {
+            __userBitListModel.moveUserBitDown( index );
+         } else {
+            __userBitListModel.removeUserBit( index );
+         }
+         UserBitRenderer.SUSPEND_BUTTONS = false;
+         _list.repaint();
+      }
+
+      @Override
+      public void mouseEntered( final MouseEvent event ) {
+         setFocus( _list.getMousePosition() );
+      }
+
+      @Override
+      public void mouseExited( final MouseEvent event ) {
+         setFocus( _list.getMousePosition() );
+      }
+
+      @Override
+      public void mouseDragged( final MouseEvent event ) {
+         setFocus( _list.getMousePosition() );
+      }
+
+      @Override
+      public void mouseMoved( final MouseEvent event ) {
+         setFocus( _list.getMousePosition() );
+      }
+
+      private void setFocus( final Point p ) {
+         if ( p == null ) {
+            if ( _currentFocusIndex >= 0 ) {
+               _currentFocusIndex = -1;
+               _list.repaint();
+            }
+            return;
+         }
+         final int index = _list.locationToIndex( p );
+         if ( index == _currentFocusIndex ) {
+            return;
+         }
+         _currentFocusIndex = index;
+         _list.repaint();
+      }
+   }
+
+
+}