You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2017/03/16 21:55:01 UTC

svn commit: r1787257 [4/5] - in /ctakes/trunk: ./ ctakes-dictionary-gui/ ctakes-dictionary-gui/resources/ ctakes-dictionary-gui/resources/org/ ctakes-dictionary-gui/resources/org/apache/ ctakes-dictionary-gui/resources/org/apache/ctakes/ ctakes-diction...

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/UmlsTermUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/UmlsTermUtil.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/UmlsTermUtil.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/UmlsTermUtil.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,286 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+import org.apache.ctakes.dictionary.creator.util.FileUtil;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+
+
+/**
+ * Contains all the methods used to parse individual text definitions of umls terms
+ * <p/>
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/16/14
+ */
+final public class UmlsTermUtil {
+
+
+   private enum DATA_FILE {
+      REMOVAL_PREFIX_TRIGGERS( "RemovalPrefixTriggers.txt" ),
+      REMOVAL_SUFFIX_TRIGGERS( "RemovalSuffixTriggers.txt" ),
+      REMOVAL_FUNCTION_TRIGGERS( "RemovalFunctionTriggers.txt" ),
+      REMOVAL_COLON_TRIGGERS( "RemovalColonTriggers.txt" ),
+      UNWANTED_PREFIXES( "UnwantedPrefixes.txt" ),
+      UNWANTED_SUFFIXES( "UnwantedSuffixes.txt" ),
+      MODIFIER_SUFFIXES( "ModifierSuffixes.txt" ),
+      RIGHT_ABBREVIATIONS( "RightAbbreviations.txt" );
+      final private String __name;
+
+      DATA_FILE( final String name ) {
+         __name = name;
+      }
+   }
+
+   static private String getDataPath( final String dataDir, final DATA_FILE dataFile ) {
+      return dataDir + '/' + dataFile.__name;
+   }
+
+   final private Collection<String> _removalPrefixTriggers;
+   final private Collection<String> _removalSuffixTriggers;
+   final private Collection<String> _removalColonTriggers;
+   final private Collection<String> _removalFunctionTriggers;
+   final private Collection<String> _unwantedPrefixes;
+   final private Collection<String> _unwantedSuffixes;
+   final private Collection<String> _modifierSuffixes;
+   final private Collection<String> _abbreviations;
+
+   public UmlsTermUtil( final String dataDir ) {
+      this( getDataPath( dataDir, DATA_FILE.REMOVAL_PREFIX_TRIGGERS ),
+            getDataPath( dataDir, DATA_FILE.REMOVAL_SUFFIX_TRIGGERS ),
+            getDataPath( dataDir, DATA_FILE.REMOVAL_COLON_TRIGGERS ),
+            getDataPath( dataDir, DATA_FILE.REMOVAL_FUNCTION_TRIGGERS ),
+            getDataPath( dataDir, DATA_FILE.UNWANTED_PREFIXES ),
+            getDataPath( dataDir, DATA_FILE.UNWANTED_SUFFIXES ),
+            getDataPath( dataDir, DATA_FILE.MODIFIER_SUFFIXES ),
+            getDataPath( dataDir, DATA_FILE.RIGHT_ABBREVIATIONS ) );
+   }
+
+   public UmlsTermUtil( final String removalPrefixTriggersPath, final String removalSuffixTriggersPath,
+                        final String removalColonTriggersPath, final String removalFunctionTriggersPath,
+                        final String unwantedPrefixesPath, final String unwantedSuffixesPath,
+                        final String modifierSuffixesPath, final String abbreviationsPath ) {
+      _removalPrefixTriggers = FileUtil.readOneColumn( removalPrefixTriggersPath, "term removal Prefix Triggers" );
+      _removalSuffixTriggers = FileUtil.readOneColumn( removalSuffixTriggersPath, "term removal Suffix Triggers" );
+      _removalColonTriggers = FileUtil.readOneColumn( removalColonTriggersPath, "term removal Colon Triggers" );
+      _removalFunctionTriggers = FileUtil.readOneColumn( removalFunctionTriggersPath, "term removal Function Triggers" );
+      _unwantedPrefixes = FileUtil.readOneColumn( unwantedPrefixesPath, "unwanted Prefixes" );
+      _unwantedSuffixes = FileUtil.readOneColumn( unwantedSuffixesPath, "unwanted Suffixes" );
+      _modifierSuffixes = FileUtil.readOneColumn( modifierSuffixesPath, "modifier Suffixes" );
+      _abbreviations = FileUtil.readOneColumn( abbreviationsPath, "Abbreviations to expand" );
+   }
+
+   public boolean isTextValid( final String text ) {
+      // Check for illegal characters
+      for ( int i = 0; i < text.length(); i++ ) {
+         if ( text.charAt( i ) < ' ' || text.charAt( i ) > '~' ) {
+            return false;
+         }
+      }
+      // Check for auto-created note form
+      if ( text.split( "@" ).length > 2 ) {
+         return false;
+      }
+      if ( text.length() == 3 && text.charAt( 0 ) == '(' ) {
+         return false;
+      }
+      if ( _removalPrefixTriggers.stream().anyMatch( text::startsWith ) ) {
+         return false;
+      }
+      if ( _removalSuffixTriggers.stream().anyMatch( text::endsWith ) ) {
+         return false;
+      }
+      if ( _removalColonTriggers.stream().anyMatch( text::contains ) ) {
+         return false;
+      }
+      if ( _removalFunctionTriggers.stream().anyMatch( text::contains ) ) {
+         return false;
+      }
+      return true;
+   }
+
+   static public boolean isTextTooShort( final String text, final int minCharLength ) {
+      return text.length() < minCharLength;
+   }
+
+
+   static public boolean isTextTooLong( final String text, final int maxCharLength,
+                                 final int maxWordCount, final int maxSymCount ) {
+      final String[] splits = text.split( "\\s+" );
+      int wordCount = 0;
+      int symCount = 0;
+      for ( String split : splits ) {
+         if ( split.length() > maxCharLength ) {
+            return true;
+         }
+         if ( split.length() > 2 ) {
+            wordCount++;
+         } else {
+            symCount++;
+         }
+      }
+      return wordCount > maxWordCount || symCount > maxSymCount;
+   }
+
+
+   public Collection<String> getFormattedTexts( final String strippedText, final boolean extractAbbreviations,
+                                                final int minCharLength, final int maxCharLength,
+                                                final int maxWordCount, final int maxSymCount ) {
+      Collection<String> extractedTerms = Collections.emptySet();
+      if ( extractAbbreviations ) {
+         // add embedded abbreviations
+         extractedTerms = extractAbbreviations( strippedText );
+      }
+      if ( extractedTerms.isEmpty() ) {
+         extractedTerms = extractModifiers( strippedText );
+      }
+      if ( !extractedTerms.isEmpty() ) {
+         extractedTerms.add( strippedText );
+         return getFormattedTexts( getPluralTerms( getStrippedTexts( extractedTerms ) ), minCharLength, maxCharLength, maxWordCount, maxSymCount );
+      }
+      Collection<String> texts = new HashSet<>( 1 );
+      texts.add( strippedText );
+      return getFormattedTexts( getPluralTerms( getStrippedTexts( texts ) ), minCharLength, maxCharLength, maxWordCount, maxSymCount );
+   }
+
+
+   static private Collection<String> getFormattedTexts( final Collection<String> extractedTerms,
+                                                final int minCharLength, final int maxCharLength,
+                                                final int maxWordCount, final int maxSymCount ) {
+      return extractedTerms.stream()
+            .filter( t -> !isTextTooShort( t, minCharLength ) )
+            .filter( t -> !isTextTooLong( t, maxCharLength, maxWordCount, maxSymCount ) )
+            .collect( Collectors.toList() );
+   }
+
+   static private Collection<String> getPluralTerms( final Collection<String> texts ) {
+      final Collection<String> plurals = texts.stream()
+            .filter( t -> t.endsWith( "( s )" ) )
+            .collect( Collectors.toList() );
+      if ( plurals.isEmpty() ) {
+         return texts;
+      }
+      texts.removeAll( plurals );
+      final Consumer<String> addPlural = t -> {
+         texts.add( t );
+         texts.add( t + "s" );
+      };
+      plurals.stream()
+            .map( t -> t.substring( 0, t.length() - 5 ) )
+            .forEach( addPlural );
+      return texts;
+   }
+
+   private Collection<String> getStrippedTexts( final Collection<String> texts ) {
+      return texts.stream()
+            .map( this::getStrippedText )
+            .filter( t -> !t.isEmpty() )
+            .collect( Collectors.toSet() );
+   }
+
+   public String getStrippedText( final String text ) {
+      // remove form underlines
+//      if ( text.contains( "_ _ _" ) ) {
+//         final int lastParen = text.lastIndexOf( '(' );
+//         final int lastDash = text.indexOf( "_ _ _" );
+//         final int deleteIndex = Math.max( 0, Math.min( lastParen, lastDash ) );
+//         if ( deleteIndex > 0 ) {
+//            return getStrippedText( text.substring( 0, deleteIndex - 1 ).trim() );
+//         }
+//      }
+      // remove unmatched parentheses, brackets, etc.
+      //      if ( text.startsWith( "(" ) && !text.contains( ")" ) ) {
+      //         return getStrippedText( text.substring( 1 ).trim() );
+      //      }
+      //      if ( text.startsWith( "[" ) && !text.contains( "]" ) ) {
+      //         return getStrippedText( text.substring( 1 ).trim() );
+      //      }
+      //      if ( text.startsWith( "(" ) && text.endsWith( ") or" ) ) {
+      //         return getStrippedText( text.substring( 1, text.length() - 4 ).trim() );
+      //      }
+      //      if ( text.startsWith( "or (" ) ) {
+      //         return getStrippedText( text.substring( 2 ).trim() );
+      //      }
+      //      if ( text.startsWith( "\"" ) && text.endsWith( "\"" ) ) {
+      //         return getStrippedText( text.substring( 1 ).trim() );
+      //      }
+      //      if ( text.startsWith( "(" ) && text.endsWith( ")" ) ) {
+      //         return getStrippedText( text.substring( 1, text.length() - 2 ).trim() );
+      //      }
+      //      if ( text.startsWith( "[" ) && text.endsWith( "]" ) ) {
+      //         return getStrippedText( text.substring( 1, text.length() - 2 ).trim() );
+      //      }
+      //      if ( text.startsWith( "&" ) ) {
+      //         return getStrippedText( text.substring( 1 ).trim() );
+      //      }
+      //      if ( text.endsWith( "]" ) && !text.contains( "[" ) ) {
+      //         return getStrippedText( text.substring( 0, text.length() - 2 ).trim() );
+      //      }
+      //      if ( text.endsWith( ")" ) && !text.contains( "(" ) ) {
+      //         return getStrippedText( text.substring( 0, text.length() - 2 ).trim() );
+      //      }
+      String strippedText = text.trim();
+      // Text in umls can have multiple suffixes and/or prefixes.  Stripping just once doesn't do the trick
+      int lastLength = Integer.MAX_VALUE;
+      while ( lastLength != strippedText.length() ) {
+         lastLength = strippedText.length();
+         for ( String prefix : _unwantedPrefixes ) {
+            if ( strippedText.startsWith( prefix ) ) {
+               strippedText = strippedText.substring( prefix.length() ).trim();
+            }
+         }
+         for ( String suffix : _unwantedSuffixes ) {
+            if ( strippedText.endsWith( suffix ) ) {
+               strippedText = strippedText.substring( 0, strippedText.length() - suffix.length() ).trim();
+            }
+         }
+         if ( !isTextValid( strippedText ) ) {
+            return "";
+         }
+      }
+      if ( strippedText.contains( "(" ) && strippedText.contains( "[" ) ) {
+         return "";
+      }
+      return strippedText;
+   }
+
+
+   private Collection<String> extractAbbreviations( final String tokenizedText ) {
+      for ( String abbreviation : _abbreviations ) {
+         if ( tokenizedText.endsWith( abbreviation )
+               && !tokenizedText.contains( ":" ) && !tokenizedText.contains( " of " )
+               && !tokenizedText.contains( " for " ) ) {
+            final String noAbbrTerm
+                  = tokenizedText.substring( 0, tokenizedText.length() - abbreviation.length() ).trim();
+            final String abbrTerm
+                  = abbreviation.replace( ":", "" ).replace( "(", "" ).replace( ")", "" ).replace( "-", "" )
+                  .replace( "[", "" ).replace( "]", "" ).replace( "&", "" ).trim();
+            final Collection<String> extractedAbbreviations = new HashSet<>( 2 );
+            extractedAbbreviations.add( noAbbrTerm );
+            extractedAbbreviations.add( abbrTerm );
+            return extractedAbbreviations;
+         }
+      }
+      return Collections.emptyList();
+   }
+
+   private Collection<String> extractModifiers( final String tokenizedText ) {
+      for ( String modifier : _modifierSuffixes ) {
+         if ( tokenizedText.endsWith( modifier ) ) {
+            final String mainText = tokenizedText.substring( 0, tokenizedText.length() - modifier.length() ).trim();
+            final String modifierText
+                  = modifier.replace( "(", "" ).replace( ")", "" ).replace( "-", "" ).replace( ",", "" ).trim();
+            final Collection<String> modifiedTexts = new HashSet<>( 2 );
+            modifiedTexts.add( tokenizedText );
+            modifiedTexts.add( modifierText + " " + mainText );
+            return modifiedTexts;
+         }
+      }
+      return Collections.emptyList();
+   }
+
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Vocabulary.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Vocabulary.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Vocabulary.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Vocabulary.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,105 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/12/2015
+ */
+public enum Vocabulary {
+   INSTANCE;
+   static public Vocabulary getInstance() {
+      return INSTANCE;
+   }
+
+   private final Logger LOGGER = LogManager.getLogger( "Vocabulary" );
+
+   private final Map<String,Class<?>> _vocabularyClasses = new HashMap<>();
+
+   public Collection<String> getAllVocabularies() {
+      return _vocabularyClasses.keySet();
+   }
+
+   public Class<?> getVocabularyClass( final String vocabulary ) {
+      return _vocabularyClasses.get( vocabulary );
+   }
+
+   public void addVocabulary( final String vocabulary, final String code ) {
+      final Class<?> vocabularyClass = _vocabularyClasses.get( vocabulary );
+      if ( String.class.equals( vocabularyClass ) ) {
+         return;
+      }
+      _vocabularyClasses.put( vocabulary, getBestClass( code ) );
+   }
+
+   public String getJdbcClass( final String vocabulary ) {
+      final Class<?> vocabularyClass = _vocabularyClasses.get( vocabulary );
+      if ( String.class.equals( vocabularyClass ) ) {
+         return "VARCHAR(48)";
+      } else if ( Double.class.equals( vocabularyClass ) ) {
+         return "FLOAT";
+      } else if ( Long.class.equals( vocabularyClass ) ) {
+         return "BIGINT";
+      } else if ( Integer.class.equals( vocabularyClass ) ) {
+         return "INTEGER";
+      } else {
+         LOGGER.error( "Could not derive database class for " + vocabularyClass.getName() );
+      }
+      return "VARCHAR(48)";
+   }
+
+   public String getCtakesClass( final String vocabulary ) {
+      final Class<?> vocabularyClass = _vocabularyClasses.get( vocabulary );
+      if ( String.class.equals( vocabularyClass ) ) {
+         return "text";
+      } else if ( Double.class.equals( vocabularyClass ) ) {
+         return "double";
+      } else if ( Long.class.equals( vocabularyClass ) ) {
+         return "long";
+      } else if ( Integer.class.equals( vocabularyClass ) ) {
+         return "int";
+      } else {
+         LOGGER.error( "Could not derive database class for " + vocabularyClass.getName() );
+      }
+      return "text";
+   }
+
+   static private Class<?> getBestClassFuture( final String code, final Class<?> currentClass ) {
+      boolean haveDot = false;
+      for ( char c : code.toCharArray() ) {
+         if ( !Character.isDigit( c ) ) {
+            if ( c == '.' ) {
+               if ( haveDot ) {
+                  return String.class;
+               }
+               haveDot = true;
+            }
+            return String.class;
+         }
+      }
+      if ( haveDot || Double.class.equals( currentClass ) ) {
+         return Double.class;
+      }
+      if ( code.length() > 9 || Long.class.equals( currentClass ) ) {
+         return Long.class;
+      }
+      return Integer.class;
+   }
+
+   // TODO replace with getBestClassFuture when ctakes is upgraded to accept double and int
+   static private Class<?> getBestClass( final String code ) {
+      for ( char c : code.toCharArray() ) {
+         if ( !Character.isDigit( c ) ) {
+            return String.class;
+         }
+      }
+      return Long.class;
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/FileUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/FileUtil.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/FileUtil.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/FileUtil.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,249 @@
+package org.apache.ctakes.dictionary.creator.util;
+
+//import org.apache.ctakes.dictionarytool.util.collection.HashSetMap;
+
+import javax.swing.filechooser.FileSystemView;
+import java.io.*;
+import java.util.*;
+import java.util.logging.Logger;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/15/14
+ */
+final public class FileUtil {
+
+   private FileUtil() {
+   }
+
+   static private final Logger LOGGER = Logger.getLogger( "FileUtil" );
+
+   static public String parseDirText( final String dirPath ) {
+      if ( dirPath == null || dirPath.isEmpty() ) {
+         return parseDirText( "." );
+      } else if ( dirPath.startsWith( "~" ) ) {
+         return parseDirText( dirPath.replaceAll( "~", System.getProperty( "user.home" ) ) );
+      } else if ( dirPath.equals( "." ) ) {
+         final String userDir = System.getProperty( "user.dir" );
+         if ( userDir == null || userDir.isEmpty() ) {
+            return FileSystemView.getFileSystemView().getDefaultDirectory().getPath();
+         }
+         return userDir;
+      } else if ( dirPath.startsWith( ".." ) ) {
+         final String userDirPath = parseDirText( "." );
+         File cwd = new File( userDirPath );
+         String cwdPath = dirPath;
+         while ( cwdPath.startsWith( ".." ) ) {
+            if ( !cwd.isDirectory() ) {
+               LOGGER.severe( "Invalid directory " + dirPath );
+               System.exit( 1 );
+            }
+            cwd = cwd.getParentFile();
+            if ( cwdPath.equals( ".." ) ) {
+               return cwd.getPath();
+            }
+            cwdPath = cwdPath.substring( 3 );
+         }
+         return cwd.getPath();
+      }
+      return dirPath;
+   }
+
+
+   static public BufferedReader createReader( final String filePath ) {
+      final String formattedPath = parseDirText( filePath );
+      final File file = new File( formattedPath );
+      if ( !file.canRead() ) {
+         System.err.println( "Cannot read file " + filePath );
+         System.exit( 1 );
+      }
+      try {
+         return new BufferedReader( new FileReader( file ) );
+      } catch ( IOException ioE ) {
+         System.err.println( "Cannot create Reader for " + filePath );
+         System.err.println( ioE.getMessage() );
+         System.exit( 1 );
+      }
+      return null;
+   }
+
+   static public BufferedWriter createWriter( final String filePath ) {
+      final String formattedPath = parseDirText( filePath );
+      final File file = new File( formattedPath );
+      if ( file.getParentFile() != null && !file.getParentFile().isDirectory() ) {
+         file.getParentFile().mkdirs();
+      }
+      try {
+         return new BufferedWriter( new FileWriter( file, true ) );
+      } catch ( IOException ioE ) {
+         System.err.println( "Cannot create Writer for " + filePath );
+         System.err.println( ioE.getMessage() );
+         System.exit( 1 );
+      }
+      return null;
+   }
+
+   static public String readLine( final BufferedReader reader, final String filePath ) {
+      try {
+         String line = reader.readLine();
+         while ( line != null ) {
+            if ( !line.trim().isEmpty() && !line.trim().startsWith( "//" ) ) {
+               return line;
+            }
+            line = reader.readLine();
+         }
+      } catch ( IOException ioE ) {
+         System.err.println( "Error reading from file " + filePath );
+      }
+      return null;
+   }
+
+   static public List<String> readBsvTokens( final BufferedReader reader, final String filePath ) {
+      final String line = readLine( reader, filePath );
+      if ( line == null ) {
+         return null;
+      }
+      return TokenUtil.getBsvItems( line );
+   }
+
+   static public List<String> readCsvTokens( final BufferedReader reader, final String filePath ) {
+      final String line = readLine( reader, filePath );
+      if ( line == null ) {
+         return null;
+      }
+      return TokenUtil.getCsvItems( line );
+   }
+
+   static public List<String> readTildeTokens( final BufferedReader reader, final String filePath ) {
+      final String line = readLine( reader, filePath );
+      if ( line == null ) {
+         return null;
+      }
+      return TokenUtil.getTildeItems( line );
+   }
+
+   static public void writeOneColumn( final String filePath, final String description,
+                                      final Collection<String> list ) {
+      System.out.println( "Writing " + description + " to " + filePath );
+      long lineCount = 0;
+      try {
+         final BufferedWriter writer = createWriter( filePath );
+         for ( String item : list ) {
+            lineCount++;
+            writer.write( item );
+            writer.newLine();
+            if ( lineCount % 100000 == 0 ) {
+               System.out.println( "File Line " + lineCount );
+            }
+         }
+         writer.close();
+      } catch ( IOException ioE ) {
+         System.err.println( "Error writing " + description + " on line " + lineCount + " in file " + filePath );
+      }
+      System.out.println( "Wrote " + lineCount + " " + description + " to " + filePath );
+   }
+
+
+   static public Collection<String> readOneColumn( final String listFilePath, final String description ) {
+      System.out.println( "Reading " + description + " from " + listFilePath );
+      final Collection<String> listItems = new HashSet<>();
+      long lineCount = 0;
+      try {
+         final BufferedReader reader = createReader( listFilePath );
+         String line = readLine( reader, listFilePath );
+         while ( line != null ) {
+            lineCount++;
+            listItems.add( line );
+            if ( lineCount % 100000 == 0 ) {
+               System.out.println( "File Line " + lineCount );
+            }
+            line = readLine( reader, listFilePath );
+         }
+         reader.close();
+      } catch ( IOException ioE ) {
+         System.err.println( ioE.getMessage() );
+      }
+      System.out.println( "File Lines " + lineCount + "\t " + description + " " + listItems.size() );
+      return listItems;
+   }
+
+//   static public void writeNamedSets( final String filePath, final String description,
+//                                      final HashSetMap<String, String> namedSets ) {
+//      System.out.println( "Writing " + description + " to " + filePath );
+//      long lineCount = 0;
+//      try {
+//         final BufferedWriter writer = createWriter( filePath );
+//         for ( Map.Entry<String, Set<String>> namedSet : namedSets.entrySet() ) {
+//            lineCount++;
+//            writer.write( TokenUtil.createBsvLine( namedSet.getKey(),
+//                                                   TokenUtil.createCsvLine( namedSet.getValue() ) ) );
+//            writer.newLine();
+//            if ( lineCount % 100000 == 0 ) {
+//               System.out.println( "File Line " + lineCount );
+//            }
+//         }
+//         writer.close();
+//      } catch ( IOException ioE ) {
+//         System.err.println( "Error writing " + description + " on line " + lineCount + " in file " + filePath );
+//      }
+//      System.out.println( "Wrote " + lineCount + " " + description + " to " + filePath );
+//   }
+
+   /**
+    * @deprecated
+    */
+   static public void writeNamedSets( final String filePath, final String description,
+                                      final Map<String, Collection<String>> namedSets ) {
+      System.out.println( "Writing " + description + " to " + filePath );
+      long lineCount = 0;
+      try {
+         final BufferedWriter writer = createWriter( filePath );
+         for ( Map.Entry<String, Collection<String>> namedSet : namedSets.entrySet() ) {
+            lineCount++;
+            writer.write( TokenUtil.createBsvLine( namedSet.getKey(),
+                                                   TokenUtil.createCsvLine( namedSet.getValue() ) ) );
+            writer.newLine();
+            if ( lineCount % 100000 == 0 ) {
+               System.out.println( "File Line " + lineCount );
+            }
+         }
+         writer.close();
+      } catch ( IOException ioE ) {
+         System.err.println( "Error writing " + description + " on line " + lineCount + " in file " + filePath );
+      }
+      System.out.println( "Wrote " + lineCount + " " + description + " to " + filePath );
+   }
+
+   /**
+    * @deprecated
+    */
+   static public Map<String, Collection<String>> readNamedSetsOld( final String filePath, final String description ) {
+      final Collection<String> lines = readOneColumn( filePath, description );
+      final Map<String, Collection<String>> namedSets = new HashMap<>( lines.size() );
+      for ( String line : lines ) {
+         final List<String> nameAndList = TokenUtil.getBsvItems( line );
+         if ( nameAndList == null || nameAndList.size() != 2 ) {
+            System.err.println( "Bad line " + line );
+            continue;
+         }
+         namedSets.put( nameAndList.get( 0 ), TokenUtil.getCsvItems( nameAndList.get( 1 ) ) );
+      }
+      return namedSets;
+   }
+
+//   static public HashSetMap<String, String> readNamedSets( final String filePath, final String description ) {
+//      final Collection<String> lines = readOneColumn( filePath, description );
+//      final HashSetMap<String, String> namedSets = new HashSetMap<>( lines.size() );
+//      for ( String line : lines ) {
+//         final List<String> nameAndList = TokenUtil.getBsvItems( line );
+//         if ( nameAndList == null || nameAndList.size() != 2 ) {
+//            System.err.println( "Bad line " + line );
+//            continue;
+//         }
+//         namedSets.addAll( nameAndList.get( 0 ), TokenUtil.getCsvItems( nameAndList.get( 1 ) ) );
+//      }
+//      return namedSets;
+//   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/HsqlUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/HsqlUtil.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/HsqlUtil.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/HsqlUtil.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,111 @@
+package org.apache.ctakes.dictionary.creator.util;
+
+
+import org.apache.ctakes.dictionary.creator.gui.umls.Vocabulary;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/12/2015
+ */
+final public class HsqlUtil {
+
+   static private final Logger LOGGER = LogManager.getLogger( "HsqlUtil" );
+
+   static public final String URL_PREFIX = "jdbc:hsqldb:file:";
+
+   private HsqlUtil() {
+   }
+
+
+   static public boolean createDatabase( final String databasePath, final String databaseName ) {
+      final File databaseDir = new File( databasePath, databaseName );
+      if ( databaseDir.isFile() ) {
+         LOGGER.error( databaseDir.getPath() + " exists as a file.  Hsqldb requires that path to be a directory" );
+         return false;
+      }
+      databaseDir.mkdirs();
+      return writePropertiesFile( databaseDir, databaseName )
+             && writeScriptFile( databaseDir, databaseName )
+             && writeRcFile( databaseDir, databaseName );
+   }
+
+   static private boolean writePropertiesFile( final File databaseDir, final String databaseName ) {
+      final File propertiesFile = new File( databaseDir, databaseName + ".properties" );
+      try ( final Writer writer = new BufferedWriter( new FileWriter( propertiesFile ) ) ) {
+         writer.write( "#HSQL Database Engine 1.8.0.10\n" );
+         writer.write( "#Thu Sep 04 09:49:09 EDT 2014\n" );
+         writer.write( "hsqldb.script_format=0\n" );
+         writer.write( "runtime.gc_interval=0\n" );
+         writer.write( "sql.enforce_strict_size=false\n" );
+         writer.write( "hsqldb.cache_size_scale=8\n" );
+         writer.write( "readonly=false\n" );
+         writer.write( "hsqldb.nio_data_file=true\n" );
+         writer.write( "hsqldb.cache_scale=14\n" );
+         writer.write( "version=1.8.0\n" );
+         writer.write( "hsqldb.default_table_type=memory\n" );
+         writer.write( "hsqldb.cache_file_scale=1\n" );
+         writer.write( "hsqldb.log_size=200\n" );
+         writer.write( "modified=no\n" );
+         writer.write( "hsqldb.cache_version=1.7.0\n" );
+         writer.write( "hsqldb.original_version=1.8.0\n" );
+         writer.write( "hsqldb.compatible_version=1.8.0\n\n" );
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+         return false;
+      }
+      return true;
+   }
+
+   static private boolean writeScriptFile( final File databaseDir, final String databaseName ) {
+      final File scriptFile = new File( databaseDir, databaseName + ".script" );
+      try ( final Writer writer = new BufferedWriter( new FileWriter( scriptFile ) ) ) {
+         writer.write( "CREATE SCHEMA PUBLIC AUTHORIZATION DBA\n" );
+         // main table
+         writer.write( "CREATE MEMORY TABLE CUI_TERMS(CUI BIGINT,RINDEX INTEGER,TCOUNT INTEGER,TEXT VARCHAR(255),RWORD VARCHAR(48))\n" );
+         writer.write( "CREATE INDEX IDX_CUI_TERMS ON CUI_TERMS(RWORD)\n" );
+         // tui table
+         writer.write( "CREATE MEMORY TABLE TUI(CUI BIGINT,TUI INTEGER)\n" );
+         writer.write( "CREATE INDEX IDX_TUI ON TUI(CUI)\n" );
+         // preferred term table
+         writer.write( "CREATE MEMORY TABLE PREFTERM(CUI BIGINT,PREFTERM VARCHAR(255))\n" );
+         writer.write( "CREATE INDEX IDX_PREFTERM ON PREFTERM(CUI)\n" );
+         // vocabulary tables
+         for ( String vocabulary : Vocabulary.getInstance().getAllVocabularies() ) {
+            final String jdbcClass = Vocabulary.getInstance().getJdbcClass( vocabulary );
+            final String tableName = vocabulary.replace( '.','_' ).replace('-', '_');
+            writer.write( "CREATE MEMORY TABLE " + tableName + "(CUI BIGINT," + tableName + " " + jdbcClass + ")\n" );
+            writer.write( "CREATE INDEX IDX_" + tableName + " ON " + tableName + "(CUI)\n" );
+         }
+         writer.write( "CREATE USER SA PASSWORD \"\"\n" );
+         writer.write( "GRANT DBA TO SA\n" );
+         writer.write( "SET WRITE_DELAY 10\n" );
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+         return false;
+      }
+      return true;
+   }
+
+   static private boolean writeRcFile( final File databaseDir, final String databaseName ) {
+      final File scriptFile = new File( databaseDir, databaseName + ".rc" );
+      final String url = HsqlUtil.URL_PREFIX + databaseDir.getPath().replace( '\\', '/' )
+                         + "/" + databaseName;
+      try ( final Writer writer = new BufferedWriter( new FileWriter( scriptFile ) ) ) {
+         writer.write( "urlid " + databaseName + "\n" );
+         writer.write( "url " + url + ";shutdown=true\n" );
+         writer.write( "username sa\n" );
+         writer.write( "password\n" );
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+         return false;
+      }
+      return true;
+   }
+
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/JdbcUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/JdbcUtil.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/JdbcUtil.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/JdbcUtil.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,86 @@
+package org.apache.ctakes.dictionary.creator.util;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/21/14
+ */
+final public class JdbcUtil {
+
+   static private final Logger LOGGER = LogManager.getLogger( "JdbcUtil" );
+   
+   private JdbcUtil() {
+   }
+
+   static private final String JDBC_DRIVER = "org.hsqldb.jdbcDriver";
+
+
+   static public void registerDriver() {
+      try {
+         Driver driver = (Driver) Class.forName( JDBC_DRIVER ).newInstance();
+         DriverManager.registerDriver( driver );
+      } catch ( Exception e ) {
+         // TODO At least four different exceptions are thrown here, and should be caught and handled individually
+         LOGGER.error( "Could not register Driver " + JDBC_DRIVER );
+         LOGGER.error( e.getMessage() );
+         System.exit( 1 );
+      }
+   }
+
+   static public Connection createDatabaseConnection( final String url, final String user, final String pass ) {
+      registerDriver();
+      LOGGER.info( "Connecting to " + url + " as " + user );
+      Connection connection = null;
+      try {
+         connection = DriverManager.getConnection( url, user, pass );
+      } catch ( SQLException sqlE ) {
+         // thrown by Connection.prepareStatement(..) and getTotalRowCount(..)
+         LOGGER.error( "Could not establish connection to " + url + " as " + user );
+         LOGGER.error( sqlE.getMessage() );
+         System.exit( 1 );
+      }
+      return connection;
+   }
+
+   //   static public String createRowInsertSql( final String tableName, final int valueCount ) {
+   static public String createRowInsertSql( final String tableName, final Enum... fields ) {
+      final String[] fieldNames = new String[fields.length];
+      int i = 0;
+      for ( Enum field : fields ) {
+         fieldNames[i] = field.name();
+         i++;
+      }
+      return createRowInsertSql( tableName, fieldNames );
+   }
+
+   static public String createCodeInsertSql( final String vocabulary ) {
+      return createRowInsertSql( vocabulary.toLowerCase().replace( '.','_' ).replace('-', '_'), "CUI", vocabulary.replace('-', '_') );
+   }
+
+   static public String createRowInsertSql( final String tableName, final String... fieldNames ) {
+      final StringBuilder sb = new StringBuilder( "insert into" );
+      sb.append( " " ).append( tableName );
+      sb.append( " (" );
+      for ( String fieldName : fieldNames ) {
+         sb.append( fieldName ).append( ',' );
+      }
+      // remove last comma
+      sb.setLength( sb.length() - 1 );
+      sb.append( ") " );
+      sb.append( " values (" );
+      for ( int i = 0; i < fieldNames.length - 1; i++ ) {
+         sb.append( "?," );
+      }
+      sb.append( "?)" );
+      return sb.toString();
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/LambdaUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/LambdaUtil.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/LambdaUtil.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/LambdaUtil.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,28 @@
+package org.apache.ctakes.dictionary.creator.util;
+
+
+import java.util.function.BinaryOperator;
+import java.util.function.Function;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/12/2015
+ */
+final public class LambdaUtil {
+
+   private LambdaUtil() {}
+
+   static public final Function<String, String> asSelf = value -> value;
+
+   static public final Function<String, Integer> zeroInt = value -> 0;
+   static public final Function<String, Long> zeroLong = value -> 0l;
+
+   static public final Function<String, Integer> one = value -> 1;
+
+   static public final BinaryOperator<Integer> sumInt = ( count1, count2 ) -> count1 + count2;
+   static public final BinaryOperator<Long> sumLong = ( count1, count2 ) -> count1 + count2;
+
+
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/RareWordDbWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/RareWordDbWriter.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/RareWordDbWriter.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/RareWordDbWriter.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,222 @@
+package org.apache.ctakes.dictionary.creator.util;
+
+import org.apache.ctakes.dictionary.creator.gui.umls.Concept;
+import org.apache.ctakes.dictionary.creator.gui.umls.Tui;
+import org.apache.ctakes.dictionary.creator.gui.umls.Vocabulary;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import static org.apache.ctakes.dictionary.creator.util.LambdaUtil.asSelf;
+import static org.apache.ctakes.dictionary.creator.util.LambdaUtil.zeroLong;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/15/14
+ */
+final public class RareWordDbWriter {
+
+   static private final Logger LOGGER = LogManager.getLogger( "RareWordDbWriter" );
+
+
+   private RareWordDbWriter() {
+   }
+
+   private enum CuiTermsField {
+      CUI( 1, Long.class ), RINDEX( 2, Integer.class ), TCOUNT( 3, Integer.class ),
+      TEXT( 4, String.class ), RWORD( 5, String.class );
+      final private int __index;
+      final private Class __classType;
+
+      CuiTermsField( final int index, final Class classType ) {
+         __index = index;
+         __classType = classType;
+      }
+   }
+
+
+
+   static public boolean writeConcepts( final Map<Long, Concept> concepts,
+                                     final String url, final String user, final String pass ) {
+      // Get Count of appearance in dictionary per term token
+      final Map<String, Integer> tokenCounts = RareWordUtil.getTokenCounts( concepts.values() );
+      // For pmsdn tesseract user-words
+//      try ( Writer rareWordWriter = new BufferedWriter( new FileWriter( "/tmp/ctakes-dictionary-gui/dictionary-temp/rarewords.txt" ) ) ) {
+//         for ( Map.Entry<String,Integer> entry : tokenCounts.entrySet() ) {
+//            if ( entry.getValue() > 24 && entry.getKey().length() > 4 && entry.getKey().length() < 15 ) {
+//               boolean allAlpha = true;
+//               for ( char c : entry.getKey().toCharArray() ) {
+//                  if ( !Character.isLetter( c ) ) {
+//                     allAlpha = false;
+//                     break;
+//                  }
+//               }
+//               if ( allAlpha ) {
+//                  rareWordWriter.write( entry.getKey() + "\n" );
+//               }
+//            }
+//         }
+//      } catch ( IOException ioE ) {
+//         LOGGER.error( ioE.getMessage() );
+//      }
+      // Create insert sql statements
+      final String mainTableSql = JdbcUtil.createRowInsertSql( "CUI_TERMS", CuiTermsField.values() );
+      final String tuiTableSql = JdbcUtil.createCodeInsertSql( "tui" );
+      final String preftermTableSql = JdbcUtil.createCodeInsertSql( "prefterm" );
+      final Map<String,String> insertCodeSqls = createCodeInsertSqls();
+
+      long mainTableCount = 0;
+      long tuiTableCount = 0;
+      long preftermTableCount = 0;
+      final Map<String,Long> codeTableCounts = createCodeCounts();
+      final Connection connection = JdbcUtil.createDatabaseConnection( url, user, pass );
+      try {
+         // Create PreparedStatements from insert sql statements
+         final PreparedStatement mainTableStatement = connection.prepareStatement( mainTableSql );
+         final PreparedStatement tuiStatement = connection.prepareStatement( tuiTableSql );
+         final PreparedStatement preftermStatement = connection.prepareStatement( preftermTableSql );
+         final Map<String,PreparedStatement> codeStatements = createCodeStatements( connection, insertCodeSqls );
+
+         for ( Map.Entry<Long, Concept> conceptEntry : concepts.entrySet() ) {
+            final long cui = conceptEntry.getKey();
+            final Concept concept = conceptEntry.getValue();
+            // write main term table
+            boolean conceptOk = false;
+            for ( String text : conceptEntry.getValue().getTexts() ) {
+               final RareWordUtil.IndexedRareWord indexedRareWord = RareWordUtil.getIndexedRareWord( text,
+                                                                                                     tokenCounts );
+               if ( RareWordUtil.NULL_RARE_WORD.equals( indexedRareWord ) ) {
+                  continue;
+               }
+               conceptOk = true;
+               mainTableStatement.setLong( CuiTermsField.CUI.__index, cui );
+               mainTableStatement.setInt( CuiTermsField.RINDEX.__index, indexedRareWord.__index );
+               mainTableStatement.setInt( CuiTermsField.TCOUNT.__index, indexedRareWord.__tokenCount );
+               mainTableStatement.setString( CuiTermsField.TEXT.__index, text );
+               mainTableStatement.setString( CuiTermsField.RWORD.__index, indexedRareWord.__word );
+               mainTableStatement.executeUpdate();
+               mainTableCount = incrementCount( "Main", mainTableCount );
+            }
+            if ( !conceptOk ) {
+               continue;
+            }
+            // write tui table
+            for ( Tui tui : concept.getTuis() ) {
+               tuiStatement.setLong( CuiTermsField.CUI.__index, cui );
+               tuiStatement.setInt( 2, tui.getIntValue() );
+               tuiStatement.executeUpdate();
+               tuiTableCount = incrementCount( "Tui", tuiTableCount );
+            }
+            // write preferred term table
+            final String preferredText = concept.getPreferredText();
+            if ( preferredText != null
+                 && !preferredText.isEmpty()
+                 && !preferredText.equals( Concept.PREFERRED_TERM_UNKNOWN ) ) {
+               preftermStatement.setLong( CuiTermsField.CUI.__index, cui );
+               preftermStatement.setString( 2, preferredText );
+               preftermStatement.executeUpdate();
+               preftermTableCount = incrementCount( "Preferred Term", preftermTableCount );
+            }
+            // write extra vocabulary code tables
+            final Collection<String> vocabularies = concept.getVocabularies();
+            for ( String vocabulary : vocabularies ) {
+               final PreparedStatement statement = codeStatements.get( vocabulary.replace('-', '_') );
+               statement.setLong( CuiTermsField.CUI.__index, cui );
+               for ( String code : concept.getCodes( vocabulary ) ) {
+                  setCodeAppropriately( statement, code, Vocabulary.getInstance().getVocabularyClass( vocabulary ) );
+                  statement.executeUpdate();
+                  codeTableCounts.put( vocabulary, incrementCount( vocabulary, codeTableCounts.get( vocabulary ) ) );
+               }
+            }
+         }
+         connection.commit();
+         mainTableStatement.close();
+         tuiStatement.close();
+         preftermStatement.close();
+         preftermStatement.close();
+         for ( PreparedStatement codeStatement : codeStatements.values() ) {
+            codeStatement.close();
+         }
+         final Statement writeDelayStatement = connection.createStatement();
+         writeDelayStatement.execute( "SET WRITE_DELAY FALSE" );
+         writeDelayStatement.close();
+         final Statement setBinaryStatement = connection.createStatement();
+         setBinaryStatement.execute( "SET SCRIPTFORMAT BINARY" );
+         setBinaryStatement.close();
+         final Statement readOnlyStatement = connection.createStatement();
+         readOnlyStatement.execute( "SET READONLY TRUE" );
+         readOnlyStatement.close();
+         final Statement shutdownStatement = connection.createStatement();
+         shutdownStatement.execute( "SHUTDOWN" );
+         shutdownStatement.close();
+         connection.commit();
+         connection.close();
+      } catch ( SQLException sqlE ) {
+         LOGGER.error( sqlE.getMessage() );
+         return false;
+      }
+      LOGGER.info( "Main Table Rows " + mainTableCount );
+      LOGGER.info( "Tui Table Rows " + tuiTableCount );
+      LOGGER.info( "Preferred Term Table Rows " + preftermTableCount );
+      final Function<String,String> vocabCount = v -> v + " Table Rows " + codeTableCounts.get( v );
+      Vocabulary.getInstance().getAllVocabularies().stream()
+            .map( vocabCount )
+            .forEach( LOGGER::info );
+      return true;
+   }
+
+
+   static private Map<String,String> createCodeInsertSqls() {
+      return Vocabulary.getInstance().getAllVocabularies().stream().map(vocab -> vocab.replace('-', '_'))
+            .collect( Collectors.toMap( asSelf, JdbcUtil::createCodeInsertSql ) );
+   }
+
+   static private Map<String,PreparedStatement> createCodeStatements( final Connection connection,
+                                                                      final Map<String,String> insertCodeSqls )
+         throws SQLException {
+      final Map<String,PreparedStatement> codeStatements = new HashMap<>( insertCodeSqls.size() );
+      for ( Map.Entry<String,String> codeSql : insertCodeSqls.entrySet() ) {
+         codeStatements.put( codeSql.getKey(), connection.prepareStatement( codeSql.getValue() ) );
+      }
+      return codeStatements;
+   }
+
+   static private Map<String,Long> createCodeCounts() {
+      return Vocabulary.getInstance().getAllVocabularies().stream().collect( Collectors.toMap( asSelf, zeroLong ) );
+   }
+
+   static private void setCodeAppropriately( final PreparedStatement statement, final String code,
+                                             final Class<?> type ) throws SQLException {
+      if ( String.class.equals( type ) ) {
+         statement.setString( 2, code );
+      } else if ( Double.class.equals( type ) ) {
+         statement.setDouble( 2, Double.valueOf( code ) );
+      } else if ( Long.class.equals( type ) ) {
+         statement.setLong( 2, Long.valueOf( code ) );
+      } else if ( Integer.class.equals( type ) ) {
+         statement.setInt( 2, Integer.valueOf( code ) );
+      } else {
+         LOGGER.error( "Could not set code for " + type.getName() );
+         statement.setString( 2, code );
+      }
+   }
+
+   static private long incrementCount( final String name, long count ) {
+      count++;
+      if ( count % 100000 == 0 ) {
+         LOGGER.info( name + " Table Rows " + count );
+      }
+      return count;
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/RareWordUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/RareWordUtil.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/RareWordUtil.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/RareWordUtil.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,172 @@
+package org.apache.ctakes.dictionary.creator.util;
+
+import org.apache.ctakes.dictionary.creator.gui.umls.Concept;
+
+import java.util.*;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import static org.apache.ctakes.dictionary.creator.util.LambdaUtil.*;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/17/14
+ */
+final public class RareWordUtil {
+
+   private RareWordUtil() {
+   }
+
+   // LookupDesc for the standard excluded pos tags are
+   //   VB,VBD,VBG,VBN,VBP,VBZ,CC,CD,DT,EX,LS,MD,PDT,POS,PP,PP$,PRP,PRP$,RP,TO,WDT,WP,WPS,WRB
+   // Listing every verb in the language seems a pain, but listing the others is possible.
+   // Verbs should be rare in the dictionaries, excepting perhaps the activity and concept dictionaries
+   // CD, CC, DT, EX, MD, PDT, PP, PP$, PRP, PRP$, RP, TO, WDT, WP, WPS, WRB
+   // why not WP$ (possessive wh- pronoun "whose")
+   // PP$ is a Brown POS tag, not Penn Treebank (as are the rest)
+
+   static private Set<String> BAD_POS_TERM_SET;
+
+   static {
+      final String[] BAD_POS_TERMS = {
+            // CD  cardinal number
+            "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten",
+            // CC  coordinating conjunction
+            "and", "or", "but", "for", "nor", "so", "yet",
+            // DT  determiner
+            "this", "that", "these", "those", "the",
+            // EX  existential there
+            "there",
+            // MD  modal
+            "can", "should", "will", "may", "might", "must", "could", "would",
+            // PDT  predeterminer
+            "some", "any", "all", "both", "half", "none", "twice",
+            // PP  prepositional phrase (preposition)
+            "at", "before", "after", "behind", "beneath", "beside", "between", "into", "through", "across", "of",
+            "concerning", "like", "except", "with", "without", "toward", "to", "past", "against", "during", "until",
+            "throughout", "below", "besides", "beyond", "from", "inside", "near", "outside", "since", "upon",
+            // PP$  possessive personal pronoun - Brown POS tag, not Penn TreeBank
+            "my", "our",
+            // PRP  personal pronoun
+            "i", "you", "he", "she", "it",
+            // PRP$  possesive pronoun
+            "mine", "yours", "his", "hers", "its", "ours", "theirs",
+            // RP  particle  - this contains some prepositions
+            "about", "off", "up", "along", "away", "back", "by", "down", "forward", "in", "on", "out",
+            "over", "around", "under",
+            // TO  to  - also a preposition
+            "to",
+            // WDT  wh- determiner
+            "what", "whatever", "which", "whichever",
+            // WP, WPS  wh- pronoun, nominative wh- pronoun
+            "who", "whom", "which", "that", "whoever", "whomever",
+            // WRB
+            "how", "where", "when", "however", "wherever", "whenever",
+            // Mine ...
+            "no",
+            // additional numbers
+            "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen",
+            "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety",
+            "hundred", "thousand", "million", "billion", "trillion",
+      };
+      BAD_POS_TERM_SET = new HashSet<>( Arrays.asList( BAD_POS_TERMS ) );
+   }
+
+   static public boolean isRarableToken( final String token ) {
+      if ( token.length() <= 1 ) {
+         return false;
+      }
+      boolean hasLetter = false;
+      for ( int i = 0; i < token.length(); i++ ) {
+         if ( Character.isLetter( token.charAt( i ) ) ) {
+            hasLetter = true;
+            break;
+         }
+      }
+      return hasLetter && !BAD_POS_TERM_SET.contains( token );
+   }
+
+
+   static private final Pattern SPACE_PATTERN = Pattern.compile( "\\s+" );
+
+
+   static public Map<String, Integer> getTokenCounts( final Collection<Concept> concepts ) {
+      return concepts.stream()
+            .map( Concept::getTexts )
+            .flatMap( Collection::stream )
+            .map( SPACE_PATTERN::split )
+            .flatMap( Arrays::stream )
+            .filter( RareWordUtil::isRarableToken )
+            .collect( Collectors.toMap( asSelf, one, sumInt ) );
+   }
+
+   static private void incrementCount( final Map<String,Integer> tokenCounts, final String token ) {
+      Integer count = tokenCounts.get( token );
+      if ( count == null ) {
+         count = 0;
+      }
+      tokenCounts.put( token, (count + 1) );
+   }
+
+   //   static public String getRareToken( final Map<String,Integer> tokenCounts, final String text ) {
+   //      final String[] tokens = text.split( "\\s+" );
+   //      int bestIndex = 0;
+   //      int bestCount = Integer.MAX_VALUE;
+   //      for ( int i = 0; i < tokens.length; i++ ) {
+   //         Integer count = tokenCounts.get( tokens[i] );
+   //         if ( count != null && count < bestCount ) {
+   //            bestIndex = i;
+   //            bestCount = count;
+   //         }
+   //      }
+   //      return tokens[bestIndex];
+   //   }
+   //
+   //   static public int getRareTokenIndex( final Map<String,Integer> tokenCounts, final String text ) {
+   //      final String[] tokens = text.split( "\\s+" );
+   //      int bestIndex = 0;
+   //      int bestCount = Integer.MAX_VALUE;
+   //      for ( int i = 0; i < tokens.length; i++ ) {
+   //         Integer count = tokenCounts.get( tokens[i] );
+   //         if ( count != null && count < bestCount ) {
+   //            bestIndex = i;
+   //            bestCount = count;
+   //         }
+   //      }
+   //      return bestIndex;
+   //   }
+
+
+   static public final class IndexedRareWord {
+      final public String __word;
+      final public int __index;
+      final public int __tokenCount;
+
+      private IndexedRareWord( final String word, final int index, final int tokenCount ) {
+         __word = word;
+         __index = index;
+         __tokenCount = tokenCount;
+      }
+   }
+
+   static public final IndexedRareWord NULL_RARE_WORD = new IndexedRareWord( null, -1, -1 );
+
+   static public IndexedRareWord getIndexedRareWord( final String text,
+                                                     final Map<String, Integer> tokenCounts ) {
+      final String[] tokens = text.split( "\\s+" );
+      int bestIndex = 0;
+      int bestCount = Integer.MAX_VALUE;
+      for ( int i = 0; i < tokens.length; i++ ) {
+         Integer count = tokenCounts.get( tokens[i] );
+         if ( count != null && count < bestCount ) {
+            bestIndex = i;
+            bestCount = count;
+         }
+      }
+      if ( bestCount == Integer.MAX_VALUE ) {
+         return NULL_RARE_WORD;
+      }
+      return new IndexedRareWord( tokens[bestIndex], bestIndex, tokens.length );
+   }
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/TextTokenizer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/TextTokenizer.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/TextTokenizer.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/TextTokenizer.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,194 @@
+package org.apache.ctakes.dictionary.creator.util;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/16/14
+ */
+final public class TextTokenizer {
+
+   private TextTokenizer() {
+   }
+
+   static private final String[] PREFIXES = {
+         "e-",
+         "a-",
+         "u-",
+         "x-",
+         "agro-",
+         "ante-",
+         "anti-",
+         "arch-",
+         "be-",
+         "bi-",
+         "bio-",
+         "co-",
+         "counter-",
+         "cross-",
+         "cyber-",
+         "de-",
+         "eco-",
+         "ex-",
+         "extra-",
+         "inter-",
+         "intra-",
+         "macro-",
+         "mega-",
+         "micro-",
+         "mid-",
+         "mini-",
+         "multi-",
+         "neo-",
+         "non-",
+         "over-",
+         "pan-",
+         "para-",
+         "peri-",
+         "post-",
+         "pre-",
+         "pro-",
+         "pseudo-",
+         "quasi-",
+         "re-",
+         "semi-",
+         "sub-",
+         "super-",
+         "tri-",
+         "ultra-",
+         "un-",
+         "uni-",
+         "vice-",
+         // From email from Colin Warner <co...@ldc.upenn.edu> on 7/25/2010
+         "electro-",
+         "gasto-",
+         "homo-",
+         "hetero-",
+         "ortho-",
+         "phospho-",
+   };
+
+   static private final String[] SUFFIXES = {"-esque", "-ette", "-fest", "-fold", "-gate", "-itis", "-less", "-most",
+                                             "-o-torium", "-rama", "-wise"};
+
+   static private final Set<String> PREFIX_SET = new HashSet<>( Arrays.asList( PREFIXES ) );
+   static private final Set<String> SUFFIX_SET = new HashSet<>( Arrays.asList( SUFFIXES ) );
+
+   static private String getNextCharTerm( final String word ) {
+      final StringBuilder sb = new StringBuilder();
+      final int count = word.length();
+      for ( int i = 0; i < count; i++ ) {
+         final char c = word.charAt( i );
+         if ( !Character.isLetterOrDigit( c ) ) {
+            return sb.toString();
+         }
+         sb.append( c );
+      }
+      return sb.toString();
+   }
+
+   static private boolean isPrefix( final String word ) {
+      final String prefixQ = word + "-";
+      return PREFIX_SET.contains( prefixQ );
+   }
+
+   static private boolean isSuffix( final String word, final int startIndex ) {
+      if ( word.length() <= startIndex ) {
+         return false;
+      }
+      final String nextCharTerm = getNextCharTerm( word.substring( startIndex ) );
+      if ( nextCharTerm.isEmpty() ) {
+         return false;
+      }
+      final String suffixQ = "-" + nextCharTerm;
+      return SUFFIX_SET.contains( suffixQ );
+   }
+
+   static private boolean isOwnerApostrophe( final CharSequence word, final int startIndex ) {
+      return word.length() == startIndex+1 && word.charAt( startIndex ) == 's';
+   }
+
+   static private boolean isNumberDecimal( final CharSequence word, final int startIndex ) {
+      // Bizarre scenario in which ctakes tokenizes ".2" as a fraction, but not ".22"
+      return word.length() == startIndex+1 && Character.isDigit( word.charAt( startIndex ) );
+   }
+
+   static public List<String> getTokens( final String word ) {
+      return getTokens( word, false );
+   }
+
+   static public List<String> getTokens( final String word, final boolean separateDigits ) {
+      final List<String> tokens = new ArrayList<>();
+      final StringBuilder sb = new StringBuilder();
+      final int count = word.length();
+      boolean wasDigit = false;
+      for ( int i = 0; i < count; i++ ) {
+         final char c = word.charAt( i );
+         if ( Character.isLetterOrDigit( c ) ) {
+            if ( sb.length() != 0 && separateDigits && (wasDigit && !Character.isDigit( c )) ) {
+               // separating characters from digits, add the current word
+               tokens.add( sb.toString() );
+               sb.setLength( 0 );
+            }
+            wasDigit = Character.isDigit( c );
+            // Appending character to current word
+            sb.append( c );
+            continue;
+         }
+         wasDigit = false;
+         if ( c == '-' && (isPrefix( sb.toString() ) || isSuffix( word, i+1 )) ) {
+            // what precedes is a prefix or what follows is a suffix so append the dash to the current word and move on
+            sb.append( c );
+            continue;
+         }
+         if ( ( c == '\'' && isOwnerApostrophe( word, i+1 ) )
+              || ( c == '.' && isNumberDecimal( word, i+1 ) ) ) {
+            // what follows is an 's or .# so add the preceding and move on
+            if ( sb.length() != 0 ) {
+               tokens.add( sb.toString() );
+               sb.setLength( 0 );
+            }
+            sb.append( c );
+            continue;
+         }
+         // Wasn't a special symbol for consideration, so add the previous and symbol separately
+         if ( sb.length() != 0 ) {
+            tokens.add( sb.toString() );
+            sb.setLength( 0 );
+         }
+         tokens.add( "" + c );
+      }
+      if ( sb.length() != 0 ) {
+         // add the final word
+         tokens.add( sb.toString() );
+      }
+      return tokens;
+   }
+
+   static public String getTokenizedText( final String text ) {
+      return getTokenizedText( text, false );
+   }
+
+   static public String getTokenizedText( final String text, final boolean separateDigits ) {
+      if ( text.isEmpty() ) {
+         return text;
+      }
+      final String[] splits = text.toLowerCase().split( "\\s+" );
+      if ( splits.length == 0 ) {
+         return "";
+      }
+      final String lastSplit = splits[splits.length - 1];
+      if ( lastSplit.endsWith( "," ) || lastSplit.endsWith( ";" ) || lastSplit.endsWith( "." ) ) {
+         // get rid of last comma or semicolon or period
+         splits[splits.length - 1] = lastSplit.substring( 0, lastSplit.length() - 1 );
+      }
+      return Arrays.stream( splits )
+            .map( s -> getTokens( s, separateDigits ) )
+            .flatMap( Collection::stream )
+            .collect( Collectors.joining( " " ) );
+   }
+
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/TokenUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/TokenUtil.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/TokenUtil.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/TokenUtil.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,89 @@
+package org.apache.ctakes.dictionary.creator.util;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/15/14
+ */
+final public class TokenUtil {
+
+   private TokenUtil() {
+   }
+
+   static public List<String> getBsvItems( final String line ) {
+      return getSeparatedValueItems( line, '|' );
+   }
+
+   static public List<String> getTildeItems( final String line ) {
+      return getSeparatedValueItems( line, '~' );
+   }
+
+   static public List<String> getCsvItems( final String line ) {
+      return getSeparatedValueItems( line, ',' );
+   }
+
+   static private List<String> getSeparatedValueItems( final String line, final char separator ) {
+      if ( line == null || line.trim().isEmpty() ) {
+         return Collections.emptyList();
+      }
+      final List<String> tokens = new ArrayList<>();
+      int startIndex = 0;
+      int stopIndex = line.indexOf( separator );
+      while ( stopIndex > 0 && stopIndex < line.length() ) {
+         tokens.add( line.substring( startIndex, stopIndex ) );
+         startIndex = stopIndex + 1;
+         stopIndex = line.indexOf( separator, startIndex );
+      }
+      if ( startIndex < line.length() - 1 ) {
+         tokens.add( line.substring( startIndex ) );
+      } else {
+         tokens.add( "" );
+      }
+      return tokens;
+   }
+
+
+   static public String createBsvLine( final Collection<String> values ) {
+      if ( values == null ) {
+         return "";
+      }
+      return createBsvLine( values.toArray( new String[values.size()] ) );
+   }
+
+   static public String createBsvLine( final String... values ) {
+      if ( values.length == 0 ) {
+         return "";
+      }
+      final StringBuilder sb = new StringBuilder();
+      for ( String value : values ) {
+         sb.append( value ).append( "|" );
+      }
+      sb.setLength( sb.length() - 1 );
+      return sb.toString();
+   }
+
+   static public String createCsvLine( final Collection<String> values ) {
+      if ( values == null ) {
+         return "";
+      }
+      return createCsvLine( values.toArray( new String[values.size()] ) );
+   }
+
+   static public String createCsvLine( final String... values ) {
+      if ( values.length == 0 ) {
+         return "";
+      }
+      final StringBuilder sb = new StringBuilder();
+      for ( String value : values ) {
+         sb.append( value ).append( "," );
+      }
+      sb.setLength( sb.length() - 1 );
+      return sb.toString();
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/ArrayListMap.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/ArrayListMap.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/ArrayListMap.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/ArrayListMap.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,225 @@
+package org.apache.ctakes.dictionary.creator.util.collection;
+
+import java.util.*;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 7/23/14
+ */
+final public class ArrayListMap<K, V> implements CollectionMap<K, V, List<V>> {
+
+   private final CollectionMap<K, V, List<V>> _delegate;
+
+
+   public ArrayListMap() {
+      final Map<K, List<V>> hashMap = new HashMap<>();
+      final CollectionCreator<V, List<V>> creator = CollectionCreatorFactory.createListCreator();
+      _delegate = new DefaultCollectionMap<>( hashMap, creator );
+   }
+
+   /**
+    * @param size initial size of the HashSetMap
+    */
+   public ArrayListMap( final int size ) {
+      final Map<K, List<V>> hashMap = new HashMap<>( size );
+      final CollectionCreator<V, List<V>> creator = CollectionCreatorFactory.createListCreator();
+      _delegate = new DefaultCollectionMap<>( hashMap, creator );
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Iterator<Entry<K, List<V>>> iterator() {
+      return _delegate.iterator();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<List<V>> getAllCollections() {
+      return new HashSet<>( _delegate.values() );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public List<V> getCollection( final K key ) {
+      return _delegate.getCollection( key );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public List<V> obtainCollection( final K key ) {
+      return _delegate.obtainCollection( key );
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean containsValue( final K key, final V value ) {
+      return _delegate.containsValue( key, value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean placeValue( final K key, final V value ) {
+      return _delegate.placeValue( key, value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean placeMap( final Map<K, V> map ) {
+      return _delegate.placeMap( map );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void removeValue( final K key, final V value ) {
+      _delegate.removeValue( key, value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public <C extends Collection<V>> int addAllValues( final K key, final C collection ) {
+      return _delegate.addAllValues( key, collection );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void clearCollection( final K key ) {
+      _delegate.clearCollection( key );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int size() {
+      return _delegate.size();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean isEmpty() {
+      return _delegate.isEmpty();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean containsKey( final Object key ) {
+      return _delegate.containsKey( key );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean containsValue( final Object value ) {
+      return _delegate.containsValue( value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public List<V> get( final Object key ) {
+      return _delegate.get( key );
+   }
+
+   // Modification Operations
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public List<V> put( final K key, final List<V> value ) {
+      return _delegate.put( key, value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public List<V> remove( final Object key ) {
+      return _delegate.remove( key );
+   }
+
+
+   // Bulk Operations
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void putAll( final Map<? extends K, ? extends List<V>> map ) {
+      _delegate.putAll( map );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void clear() {
+      _delegate.clear();
+   }
+
+
+   // Views
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Set<K> keySet() {
+      return _delegate.keySet();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<List<V>> values() {
+      return _delegate.values();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Set<Entry<K, List<V>>> entrySet() {
+      return _delegate.entrySet();
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Map<K, List<V>> toSimpleMap() {
+      return _delegate;
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/CollectionCreator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/CollectionCreator.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/CollectionCreator.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/CollectionCreator.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,17 @@
+package org.apache.ctakes.dictionary.creator.util.collection;
+
+import java.util.Collection;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/23/2014
+ */
+public interface CollectionCreator<V, T extends Collection<V>> {
+
+   public T createCollection();
+
+   public T createCollection( int size );
+
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/CollectionCreatorFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/CollectionCreatorFactory.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/CollectionCreatorFactory.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/CollectionCreatorFactory.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,46 @@
+package org.apache.ctakes.dictionary.creator.util.collection;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/23/2014
+ */
+final public class CollectionCreatorFactory {
+
+   private CollectionCreatorFactory() {
+   }
+
+   static public <V> CollectionCreator<V, Set<V>> createSetCreator() {
+      return new CollectionCreator<V, Set<V>>() {
+         @Override
+         public Set<V> createCollection() {
+            return new HashSet<>();
+         }
+
+         @Override
+         public Set<V> createCollection( final int size ) {
+            return new HashSet<>( size );
+         }
+      };
+   }
+
+   static public <V> CollectionCreator<V, List<V>> createListCreator() {
+      return new CollectionCreator<V, List<V>>() {
+         @Override
+         public List<V> createCollection() {
+            return new ArrayList<>();
+         }
+
+         @Override
+         public List<V> createCollection( final int size ) {
+            return new ArrayList<>( size );
+         }
+      };
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/CollectionMap.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/CollectionMap.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/CollectionMap.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/CollectionMap.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,93 @@
+package org.apache.ctakes.dictionary.creator.util.collection;
+
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 6/24/14
+ */
+public interface CollectionMap<K, V, T extends Collection<V>> extends Map<K, T>, Iterable<Map.Entry<K, T>> {
+
+   /**
+    * @return all of the collections for all keys
+    */
+   public Collection<T> getAllCollections();
+
+
+   /**
+    * gets a collection mapped with key.  If one does not exist then an empty collection is returned
+    *
+    * @param key key for internal collection
+    * @return collection mapped with key or an empty collection if there is none
+    */
+   public T getCollection( K key );
+
+   /**
+    * obtains a collection mapped with key.  If one does not exist then one is added to this CollectionMap
+    *
+    * @param key key for internal collection
+    * @return (possibly new) collection mapped with key
+    */
+   public T obtainCollection( K key );
+
+   /**
+    * check the collection map for a key and value combination
+    *
+    * @param key   key for internal collection
+    * @param value value to check in internal collection
+    * @return <tt>true</tt> if this CollectionMap contain the value for the given key
+    */
+   public boolean containsValue( K key, V value );
+
+   /**
+    * places value into a collection mapped with key
+    *
+    * @param key   key for internal collection
+    * @param value value to placeValue in internal collection
+    * @return <tt>true</tt> if this set did not already contain the value
+    */
+   public boolean placeValue( K key, V value );
+
+   /**
+    * places each value of a map into a collection mapped with the appropriate key
+    *
+    * @param map map to store
+    * @return <tt>true</tt> if this set did not already contain the value
+    */
+   public boolean placeMap( Map<K, V> map );
+
+   /**
+    * removes value from a collection mapped with key
+    *
+    * @param key   key for internal collection
+    * @param value value to remove from internal collection
+    */
+   public void removeValue( K key, V value );
+
+   /**
+    * adds everything from the given collection to the internal collection mapped with key
+    *
+    * @param key        key for internal collection
+    * @param collection collection of values to place in internal collection
+    * @return the number of new items added
+    */
+   public <C extends Collection<V>> int addAllValues( K key, C collection );
+
+   /**
+    * clearCollection the collection mapped with key
+    *
+    * @param key key for internal collection
+    */
+   public void clearCollection( K key );
+
+   /**
+    * Copy of this object as a simple (java.util.Collection) map of Collection
+    *
+    * @return map of java.util.Collection
+    */
+   public Map<K, T> toSimpleMap();
+
+
+}

Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/DefaultCollectionMap.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/DefaultCollectionMap.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/DefaultCollectionMap.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/collection/DefaultCollectionMap.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,279 @@
+package org.apache.ctakes.dictionary.creator.util.collection;
+
+import java.util.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/23/2014
+ */
+final public class DefaultCollectionMap<K, V, T extends Collection<V>> implements CollectionMap<K, V, T> {
+
+   private final Map<K, T> _delegate;
+   private final CollectionCreator<V, T> _collectionCreator;
+   private final T EMPTY_COLLECTION;
+
+   public DefaultCollectionMap( final Map<K, T> delegate, final CollectionCreator<V, T> collectionCreator ) {
+      _delegate = delegate;
+      _collectionCreator = collectionCreator;
+      EMPTY_COLLECTION = collectionCreator.createCollection();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Iterator<Entry<K, T>> iterator() {
+      final Iterator<Entry<K, T>> setIterator = _delegate.entrySet().iterator();
+      return new Iterator<Entry<K, T>>() {
+         public boolean hasNext() {
+            return setIterator.hasNext();
+         }
+
+         public Entry<K, T> next() {
+            final Entry<K, T> next = setIterator.next();
+            return new Entry<K, T>() {
+               public K getKey() {
+                  return next.getKey();
+               }
+
+               public T getValue() {
+                  return next.getValue();
+               }
+
+               public T setValue( final T value ) {
+                  return null;
+               }
+            };
+         }
+
+         public void remove() {
+         }
+      };
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<T> getAllCollections() {
+      return new HashSet<>( _delegate.values() );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public T getCollection( final K key ) {
+      final T collection = _delegate.get( key );
+      if ( collection != null ) {
+         return collection;
+      }
+      return EMPTY_COLLECTION;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public T obtainCollection( final K key ) {
+      T collection = _delegate.get( key );
+      if ( collection == null ) {
+         collection = _collectionCreator.createCollection();
+         _delegate.put( key, collection );
+      }
+      return collection;
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean containsValue( final K key, final V value ) {
+      final T collection = _delegate.get( key );
+      return collection != null && collection.contains( value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean placeValue( final K key, final V value ) {
+      T collection = _delegate.get( key );
+      if ( collection == null ) {
+         collection = _collectionCreator.createCollection();
+         _delegate.put( key, collection );
+      }
+      return collection.add( value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean placeMap( final Map<K, V> map ) {
+      boolean placedAny = false;
+      for ( Entry<K, V> entry : map.entrySet() ) {
+         final boolean placed = placeValue( entry.getKey(), entry.getValue() );
+         placedAny = placedAny || placed;
+      }
+      return placedAny;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void removeValue( final K key, final V value ) {
+      final T collection = _delegate.get( key );
+      if ( collection == null ) {
+         return;
+      }
+      collection.remove( value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public <C extends Collection<V>> int addAllValues( final K key, final C values ) {
+      if ( values == null || values.isEmpty() ) {
+         return 0;
+      }
+      T collection = _delegate.get( key );
+      if ( collection == null ) {
+         collection = _collectionCreator.createCollection();
+         _delegate.put( key, collection );
+      }
+      final int oldSize = collection.size();
+      collection.addAll( values );
+      return collection.size() - oldSize;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void clearCollection( final K key ) {
+      final T collection = _delegate.get( key );
+      if ( collection != null ) {
+         collection.clear();
+      }
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int size() {
+      return _delegate.size();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean isEmpty() {
+      return _delegate.isEmpty();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean containsKey( final Object key ) {
+      return _delegate.containsKey( key );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean containsValue( final Object value ) {
+      return _delegate.containsValue( value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public T get( final Object key ) {
+      return _delegate.get( key );
+   }
+
+   // Modification Operations
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public T put( final K key, final T value ) {
+      return _delegate.put( key, value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public T remove( final Object key ) {
+      return _delegate.remove( key );
+   }
+
+
+   // Bulk Operations
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void putAll( final Map<? extends K, ? extends T> map ) {
+      _delegate.putAll( map );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void clear() {
+      _delegate.clear();
+   }
+
+
+   // Views
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Set<K> keySet() {
+      return _delegate.keySet();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<T> values() {
+      return _delegate.values();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Set<Entry<K, T>> entrySet() {
+      return _delegate.entrySet();
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Map<K, T> toSimpleMap() {
+      return _delegate;
+   }
+
+
+}