You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2017/03/16 21:55:01 UTC
svn commit: r1787257 [3/5] - in /ctakes/trunk: ./ ctakes-dictionary-gui/
ctakes-dictionary-gui/resources/ ctakes-dictionary-gui/resources/org/
ctakes-dictionary-gui/resources/org/apache/
ctakes-dictionary-gui/resources/org/apache/ctakes/ ctakes-diction...
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryBuilder.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryBuilder.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryBuilder.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,167 @@
+package org.apache.ctakes.dictionary.creator.gui.ctakes;
+
+
+import org.apache.ctakes.dictionary.creator.gui.umls.*;
+import org.apache.ctakes.dictionary.creator.util.HsqlUtil;
+import org.apache.ctakes.dictionary.creator.util.RareWordDbWriter;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.File;
+import java.util.*;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/11/2015
+ */
+final public class DictionaryBuilder {
+
+ static private final Logger LOGGER = LogManager.getLogger( "DictionaryBuilder" );
+
+ // TODO static private final String DEFAULT_DATA_DIR = "resources/org/apache/ctakes/dictionary-gui/data/default";
+ static private final String DEFAULT_DATA_DIR = "resources/org/apache/ctakes/dictionary-gui/data/tiny";
+ static private final String CTAKES_APP_DB_PATH = "resources/org/apache/ctakes/dictionary/lookup/fast";
+ static private final String CTAKES_RES_MODULE = "ctakes-dictionary-lookup-fast-res";
+ static private final String CTAKES_RES_DB_PATH = CTAKES_RES_MODULE + "/src/main/" + CTAKES_APP_DB_PATH;
+ static private final String RXNORM = "RXNORM";
+ static private final int MIN_CHAR_LENGTH = 2;
+ static private final int MAX_CHAR_LENGTH = 50;
+ static private final int MAX_WORD_COUNT = 12;
+ static private final int MAX_SYM_COUNT = 7;
+
+ private DictionaryBuilder() {}
+
+ static public boolean buildDictionary( final String umlsDirPath,
+ final String ctakesDirPath,
+ final String dictionaryName,
+ final Collection<String> wantedLanguages,
+ final Collection<String> wantedSources,
+ final Collection<String> wantedTargets,
+ final Collection<Tui> wantedTuis ) {
+ // Set up the term utility
+ final UmlsTermUtil umlsTermUtil = new UmlsTermUtil( DEFAULT_DATA_DIR );
+ final Map<Long,Concept> conceptMap = parseAll( umlsTermUtil, umlsDirPath, wantedLanguages, wantedSources, wantedTargets, wantedTuis );
+
+ // special case for nitric oxide "no"
+ final Concept nitricOxide = conceptMap.get( 28128l );
+ if ( nitricOxide != null ) {
+ nitricOxide.removeTexts( Collections.singletonList( "no" ) );
+ }
+ // special case for nitric oxide synthase "nos"
+ final Concept nitricOxides = conceptMap.get( 132555l );
+ if ( nitricOxides != null ) {
+ nitricOxides.removeTexts( Arrays.asList( "nos", "synthase" ) );
+ }
+
+ return writeDatabase( ctakesDirPath, dictionaryName, conceptMap );
+ }
+
+
+
+
+ static private Map<Long,Concept> parseAll( final UmlsTermUtil umlsTermUtil,
+ final String umlsDirPath,
+ final Collection<String> wantedLanguages,
+ final Collection<String> wantedSources,
+ final Collection<String> wantedTargets,
+ final Collection<Tui> wantedTuis ) {
+ LOGGER.info( "Parsing Concepts" );
+ // Create a map of Cuis to empty Concepts for all wanted Tuis and source vocabularies
+ final Map<Long,Concept> conceptMap
+ = ConceptMapFactory.createInitialConceptMap( umlsDirPath, wantedSources, wantedTuis );
+ // Fill in information for all valid concepts
+ MrconsoParser.parseAllConcepts( umlsDirPath, conceptMap, wantedTargets, umlsTermUtil,
+ wantedLanguages, true, MIN_CHAR_LENGTH, MAX_CHAR_LENGTH, MAX_WORD_COUNT, MAX_SYM_COUNT );
+ removeUnwantedConcepts( conceptMap );
+ removeUnwantedDrugs( conceptMap, wantedTuis );
+ // Cull non-ANAT texts by ANAT texts as determined by ANAT tuis
+ removeAnatTexts( conceptMap.values(), wantedTuis );
+ conceptMap.values().forEach( Concept::minimizeTexts );
+ LOGGER.info( "Done Parsing Concepts" );
+ return conceptMap;
+ }
+
+ /**
+ * Remove any concepts that are unwanted - don't have any text from a desired vocabulary
+ * @param conceptMap -
+ */
+ static private void removeUnwantedConcepts( final Map<Long, Concept> conceptMap ) {
+ final Collection<Long> empties = conceptMap.entrySet().stream()
+ .filter( e -> e.getValue().isUnwanted() )
+ .map( Map.Entry::getKey )
+ .collect( Collectors.toSet() );
+ conceptMap.keySet().removeAll( empties );
+ }
+
+ static private Collection<String> getAnatTexts( final Collection<Concept> concepts, final Collection<Tui> wantedTuis ) {
+ final Collection<Tui> wantedAnatTuis = new ArrayList<>( wantedTuis );
+ wantedAnatTuis.retainAll( Arrays.asList( TuiTableModel.CTAKES_ANAT ) );
+ return concepts.stream()
+ .filter( c -> c.hasTui( wantedAnatTuis ) )
+ .map( Concept::getTexts )
+ .flatMap( Collection::stream )
+ .collect( Collectors.toSet() );
+ }
+
+ static private void removeAnatTexts( final Collection<Concept> concepts,
+ final Collection<Tui> wantedTuis,
+ final Collection<String> anatTexts ) {
+ final Collection<Tui> nonAnatTuis = new ArrayList<>( wantedTuis );
+ nonAnatTuis.removeAll( Arrays.asList( TuiTableModel.CTAKES_ANAT ) );
+ concepts.stream()
+ .filter( c -> c.hasTui( nonAnatTuis ) )
+ .forEach( c -> c.removeTexts( anatTexts ) );
+ }
+
+ static private void removeAnatTexts( final Collection<Concept> concepts,
+ final Collection<Tui> wantedTuis ) {
+ final Collection<String> anatTexts = getAnatTexts( concepts, wantedTuis );
+ removeAnatTexts( concepts, wantedTuis, anatTexts );
+ }
+
+
+ static private void removeUnwantedDrugs( final Map<Long,Concept> conceptMap, Collection<Tui> wantedTuis ) {
+ // remove concepts that have only drug tuis but are not in rxnorm
+ final Collection<Tui> drugTuis = new ArrayList<>( wantedTuis );
+ drugTuis.retainAll( Arrays.asList( TuiTableModel.CTAKES_DRUG ) );
+ // remove concepts that are in rxnorm but have non-drug tuis
+ final Collection<Tui> nonDrugTuis = new ArrayList<>( wantedTuis );
+ nonDrugTuis.removeAll( Arrays.asList( TuiTableModel.CTAKES_DRUG ) );
+ // if concept has drug tuis but is not in rxnorm || concept is in rxnorm but does not have drug tuis
+ final Predicate<Map.Entry<Long,Concept>> unwantedDrug
+ = e -> ( drugTuis.containsAll( e.getValue().getTuis() )
+ && !e.getValue().getVocabularies().contains( RXNORM ) )
+ || ( e.getValue().getVocabularies().contains( RXNORM )
+ && nonDrugTuis.containsAll( e.getValue().getTuis() ) );
+
+ final Collection<Long> removalCuis = conceptMap.entrySet().stream()
+ .filter( unwantedDrug )
+ .map( Map.Entry::getKey )
+ .collect( Collectors.toSet() );
+ conceptMap.keySet().removeAll( removalCuis );
+ }
+
+
+ static private boolean writeDatabase( final String ctakesDirPath,
+ final String dictionaryName,
+ final Map<Long,Concept> conceptMap ) {
+ final File ctakesRoot = new File( ctakesDirPath );
+ String databaseDirPath = ctakesDirPath + "/" + CTAKES_APP_DB_PATH;
+ if ( Arrays.asList( ctakesRoot.list() ).contains( CTAKES_RES_MODULE ) ) {
+ databaseDirPath = ctakesDirPath + "/" + CTAKES_RES_DB_PATH;
+ }
+ if ( !HsqlUtil.createDatabase( databaseDirPath, dictionaryName ) ) {
+ return false;
+ }
+ if ( !DictionaryXmlWriter.writeXmlFile( databaseDirPath, dictionaryName ) ) {
+ return false;
+ }
+ final String url = HsqlUtil.URL_PREFIX + databaseDirPath.replace( '\\', '/' ) + "/" + dictionaryName + "/" + dictionaryName;
+ return RareWordDbWriter.writeConcepts( conceptMap, url, "sa", "" );
+ }
+
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryXmlWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryXmlWriter.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryXmlWriter.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryXmlWriter.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,131 @@
+package org.apache.ctakes.dictionary.creator.gui.ctakes;
+
+
+import org.apache.ctakes.dictionary.creator.gui.umls.Vocabulary;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/12/2015
+ */
+public class DictionaryXmlWriter {
+
+ static private final Logger LOGGER = LogManager.getLogger( "DictionaryXmlWriter" );
+
+
+ static public boolean writeXmlFile( final String databaseDir, final String databaseName ) {
+ final File scriptFile = new File( databaseDir, databaseName + ".xml" );
+ try ( final Writer writer = new BufferedWriter( new FileWriter( scriptFile ) ) ) {
+ writer.write( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" );
+ writer.write( "<!--\n" );
+ writer.write( "Licensed to the Apache Software Foundation (ASF) under one\n" );
+ writer.write( "or more contributor license agreements. See the NOTICE file\n" );
+ writer.write( "distributed with this work for additional information\n" );
+ writer.write( "regarding copyright ownership. The ASF licenses this file\n" );
+ writer.write( "to you under the Apache License, Version 2.0 (the\n" );
+ writer.write( "\"License\"); you may not use this file except in compliance\n" );
+ writer.write( "with the License. You may obtain a copy of the License at\n" );
+ writer.write( "http://www.apache.org/licenses/LICENSE-2.0\n" );
+ writer.write( "Unless required by applicable law or agreed to in writing,\n" );
+ writer.write( "software distributed under the License is distributed on an\n" );
+ writer.write( "\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n" );
+ writer.write( "KIND, either express or implied. See the License for the\n" );
+ writer.write( "specific language governing permissions and limitations\n" );
+ writer.write( "under the License.\n" );
+ writer.write( "-->\n\n" );
+ writer.write( "<!-- New format for the .xml lookup specification. Uses table name and value type/class for Concept Factories. -->\n" );
+ writer.write( "<lookupSpecification>\n" );
+ writer.write( "<dictionaries>\n" );
+ writer.write( " <dictionary>\n" );
+ writer.write( " <name>" + databaseName + "Terms</name>\n" );
+ writer.write( " <implementationName>org.apache.ctakes.dictionary.lookup2.dictionary.JdbcRareWordDictionary</implementationName>\n" );
+ writer.write( " <properties>\n" );
+ writer.write( "<!-- urls for hsqldb memory connections must be file types in hsql 1.8.\n" );
+ writer.write( "These file urls must be either absolute path or relative to current working directory.\n" );
+ writer.write( "They cannot be based upon the classpath.\n" );
+ writer.write( "Though JdbcConnectionFactory will attempt to \"find\" a db based upon the parent dir of the url\n" );
+ writer.write( "for the sake of ide ease-of-use, the user should be aware of these hsql limitations.\n" );
+ writer.write( "-->\n" );
+ writer.write( createProperty( "jdbcDriver", "org.hsqldb.jdbcDriver" ) );
+ writer.write( createProperty( "jdbcUrl",
+ "jdbc:hsqldb:file:resources/org/apache/ctakes/dictionary/lookup/fast/" + databaseName + "/" + databaseName ) );
+ writer.write( createProperty( "jdbcUser", "sa" ) );
+ writer.write( createProperty( "jdbcPass", "" ) );
+ writer.write( createProperty( "rareWordTable", "cui_terms" ) );
+ writer.write( createProperty( "umlsUrl", "https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser" ) );
+ writer.write( createProperty( "umlsVendor", "NLM-6515182895" ) );
+ writer.write( createProperty( "umlsUser", "CHANGE_ME" ) );
+ writer.write( createProperty( "umlsPass", "CHANGE_ME" ) );
+ writer.write( " </properties>\n" );
+ writer.write( " </dictionary>\n" );
+ writer.write( "</dictionaries>\n" );
+ writer.write( "\n" );
+ writer.write( "<conceptFactories>\n" );
+ writer.write( " <conceptFactory>\n" );
+ writer.write( " <name>" + databaseName + "Concepts</name>\n" );
+ writer.write( " <implementationName>org.apache.ctakes.dictionary.lookup2.concept.JdbcConceptFactory</implementationName>\n" );
+ writer.write( " <properties>\n" );
+ writer.write( createProperty( "jdbcDriver", "org.hsqldb.jdbcDriver" ) );
+ writer.write( createProperty( "jdbcUrl",
+ "jdbc:hsqldb:file:resources/org/apache/ctakes/dictionary/lookup/fast/" + databaseName + "/" + databaseName ) );
+ writer.write( createProperty( "jdbcUser", "sa" ) );
+ writer.write( createProperty( "jdbcPass", "" ) );
+ writer.write( createProperty( "umlsUrl", "https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser" ) );
+ writer.write( createProperty( "umlsVendor", "NLM-6515182895" ) );
+ writer.write( createProperty( "umlsUser", "CHANGE_ME" ) );
+ writer.write( createProperty( "umlsPass", "CHANGE_ME" ) );
+ writer.write( createProperty( "tuiTable", "tui" ) );
+ writer.write( createProperty( "prefTermTable", "prefTerm" ) );
+ writer.write( "<!-- Optional tables for optional term info.\n" );
+ writer.write( "Uncommenting these lines alone may not persist term information;\n" );
+ writer.write( "persistence depends upon the TermConsumer. -->\n" );
+ for ( String vocabulary : Vocabulary.getInstance().getAllVocabularies() ) {
+ writer.write( createProperty( vocabulary.toLowerCase().replace( '.','_' ).replace('-', '_')
+ + "Table", Vocabulary.getInstance().getCtakesClass( vocabulary ) ) );
+ }
+ writer.write( " </properties>\n" );
+ writer.write( " </conceptFactory>\n" );
+ writer.write( "</conceptFactories>\n" );
+ writer.write( "\n" );
+ writer.write( "<!-- Defines what terms and concepts will be used -->\n" );
+ writer.write( "<dictionaryConceptPairs>\n" );
+ writer.write( " <dictionaryConceptPair>\n" );
+ writer.write( " <name>" + databaseName + "Pair</name>\n" );
+ writer.write( " <dictionaryName>" + databaseName + "Terms</dictionaryName>\n" );
+ writer.write( " <conceptFactoryName>" + databaseName + "Concepts</conceptFactoryName>\n" );
+ writer.write( " </dictionaryConceptPair>\n" );
+ writer.write( "</dictionaryConceptPairs>\n" );
+ writer.write( "\n" );
+ writer.write( "<!-- DefaultTermConsumer will persist all spans.\n" );
+ writer.write( "PrecisionTermConsumer will only persist only the longest overlapping span of any semantic group.\n" );
+ writer.write( "SemanticCleanupTermConsumer works as Precision** but also removes signs/sympoms contained within disease/disorder,\n" );
+ writer.write( "and (just in case) removes any s/s and d/d that are also (exactly) anatomical sites. -->\n" );
+ writer.write( "<rareWordConsumer>\n" );
+ writer.write( " <name>Term Consumer</name>\n" );
+ writer.write( " <implementationName>org.apache.ctakes.dictionary.lookup2.consumer.DefaultTermConsumer</implementationName>\n" );
+ writer.write( " <!--<implementationName>org.apache.ctakes.dictionary.lookup2.consumer.PrecisionTermConsumer</implementationName>-->\n" );
+ writer.write( " <!--<implementationName>org.apache.ctakes.dictionary.lookup2.consumer.SemanticCleanupTermConsumer</implementationName>-->\n" );
+ writer.write( " <properties>\n" );
+ writer.write( "<!-- Depending upon the consumer, the value of codingScheme may or may not be used. With the packaged consumers,\n" );
+ writer.write( "codingScheme is a default value used only for cuis that do not have secondary codes (snomed, rxnorm, etc.) -->\n" );
+ writer.write( createProperty( "codingScheme", databaseName ) );
+ writer.write( " </properties>\n" );
+ writer.write( "</rareWordConsumer>\n" );
+ writer.write( "\n" );
+ writer.write( "</lookupSpecification>\n" );
+ } catch ( IOException ioE ) {
+ LOGGER.error( ioE.getMessage() );
+ return false;
+ }
+ return true;
+ }
+
+ static private String createProperty( final String name, final String value ) {
+ return " <property key=\"" + name + "\" value=\"" + value + "\"/>\n";
+ }
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/DirChooser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/DirChooser.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/DirChooser.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/DirChooser.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,120 @@
+package org.apache.ctakes.dictionary.creator.gui.main;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import javax.swing.*;
+import javax.swing.border.EmptyBorder;
+import javax.swing.text.JTextComponent;
+import java.awt.*;
+import java.awt.datatransfer.DataFlavor;
+import java.awt.datatransfer.UnsupportedFlavorException;
+import java.awt.dnd.DnDConstants;
+import java.awt.dnd.DropTarget;
+import java.awt.dnd.DropTargetDropEvent;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/10/2015
+ */
+public class DirChooser extends JPanel {
+
+ static private final Logger LOGGER = LogManager.getLogger( "DirChooser" );
+
+ public DirChooser( final String name, final String defaultDirectory, final ActionListener dirChangeListener ) {
+ super( new BorderLayout( 10, 10 ) );
+ setBorder( new EmptyBorder( 2, 10, 2, 10 ) );
+ final JLabel label = new JLabel( name );
+ label.setPreferredSize( new Dimension( 100, 0 ) );
+ label.setHorizontalAlignment( SwingConstants.TRAILING );
+ final JTextField textField = new JTextField( defaultDirectory );
+ textField.setEditable( false );
+ final JButton openChooserButton = new JButton( new OpenDirAction( textField, dirChangeListener ) );
+ add( label, BorderLayout.WEST );
+ add( textField, BorderLayout.CENTER );
+ add( openChooserButton, BorderLayout.EAST );
+
+ textField.setDropTarget( new DirDropTarget( textField, dirChangeListener ) );
+ textField.addActionListener( dirChangeListener );
+ }
+
+ /**
+ * Opens the JFileChooser
+ */
+ private class OpenDirAction extends AbstractAction {
+ private final JFileChooser __chooser;
+ private final JTextComponent __textComponent;
+ private final ActionListener __dirChangeListener;
+
+ private OpenDirAction( final JTextComponent textComponent, final ActionListener dirChangeListener ) {
+ super( "Select Directory" );
+ __textComponent = textComponent;
+ __chooser = new JFileChooser();
+ __chooser.setFileSelectionMode( JFileChooser.DIRECTORIES_ONLY );
+ __dirChangeListener = dirChangeListener;
+ }
+
+ @Override
+ public void actionPerformed( final ActionEvent event ) {
+ final String startDirPath = __textComponent.getText();
+ if ( startDirPath != null && !startDirPath.isEmpty() ) {
+ final File startingDir = new File( startDirPath );
+ if ( startingDir.exists() ) {
+ __chooser.setCurrentDirectory( startingDir );
+ }
+ }
+ final int option = __chooser.showOpenDialog( null );
+ if ( option != JFileChooser.APPROVE_OPTION ) {
+ return;
+ }
+ final File file = __chooser.getSelectedFile();
+ __textComponent.setText( file.getAbsolutePath() );
+ final ActionEvent dirEvent = new ActionEvent( this, ActionEvent.ACTION_FIRST, file.getAbsolutePath() );
+ __dirChangeListener.actionPerformed( dirEvent );
+ }
+ }
+
+
+ private class DirDropTarget extends DropTarget {
+ private final JTextComponent __textComponent;
+ private final ActionListener __dirChangeListener;
+ private DirDropTarget( final JTextComponent textComponent, final ActionListener dirChangeListener ) {
+ __textComponent = textComponent;
+ __dirChangeListener = dirChangeListener;
+ }
+ @Override
+ public synchronized void drop( final DropTargetDropEvent event ) {
+ event.acceptDrop( DnDConstants.ACTION_COPY );
+ try {
+ final Object values = event.getTransferable().getTransferData( DataFlavor.javaFileListFlavor );
+ if ( !(values instanceof Iterable) ) {
+ return;
+ }
+ for ( Object value : (Iterable)values ) {
+ if ( !(value instanceof File) ) {
+ continue;
+ }
+ final File file = (File)value;
+ if ( !file.isDirectory() ) {
+ continue;
+ }
+ __textComponent.setText( file.getAbsolutePath() );
+ final ActionEvent dirEvent
+ = new ActionEvent( this, ActionEvent.ACTION_FIRST, file.getAbsolutePath() );
+ __dirChangeListener.actionPerformed( dirEvent );
+ return;
+ }
+ } catch ( UnsupportedFlavorException | IOException multE ) {
+ LOGGER.warn( multE.getMessage() );
+ }
+ }
+ }
+
+
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/MainPanel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/MainPanel.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/MainPanel.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/MainPanel.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,233 @@
+package org.apache.ctakes.dictionary.creator.gui.main;
+
+import org.apache.ctakes.dictionary.creator.gui.ctakes.DictionaryBuilder;
+import org.apache.ctakes.dictionary.creator.gui.umls.MrconsoIndex;
+import org.apache.ctakes.dictionary.creator.gui.umls.SourceTableModel;
+import org.apache.ctakes.dictionary.creator.gui.umls.Tui;
+import org.apache.ctakes.dictionary.creator.gui.umls.TuiTableModel;
+import org.apache.ctakes.dictionary.creator.util.FileUtil;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import javax.swing.*;
+import javax.swing.border.EmptyBorder;
+import javax.swing.table.TableModel;
+import javax.swing.text.JTextComponent;
+import java.awt.*;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/10/2015
+ */
+final public class MainPanel extends JPanel {
+
+ static private final Logger LOGGER = LogManager.getLogger( "MainPanel" );
+
+ private String _umlsDirPath = System.getProperty( "user.dir" );
+ private String _ctakesPath = System.getProperty( "user.dir" );
+ final TuiTableModel _tuiModel = new TuiTableModel();
+ final SourceTableModel _sourceModel = new SourceTableModel();
+
+ public MainPanel() {
+ super( new BorderLayout() );
+
+ final JComponent sourceDirPanel = new JPanel( new GridLayout( 2, 1 ) );
+ sourceDirPanel.add( new DirChooser( "cTAKES Installation:", _umlsDirPath, new CtakesDirListener() ) );
+ sourceDirPanel.add( new DirChooser( "UMLS Installation:", _ctakesPath, new UmlsDirListener() ) );
+ add( sourceDirPanel, BorderLayout.NORTH );
+
+ final JComponent centerPanel = new JPanel( new GridLayout( 1, 2 ) );
+ centerPanel.add( createSourceTable( _sourceModel ) );
+ centerPanel.add( createTuiTable( _tuiModel ) );
+ add( centerPanel, BorderLayout.CENTER );
+ add( createGoPanel(), BorderLayout.SOUTH );
+ }
+
+ private JComponent createTuiTable( final TableModel tuiModel ) {
+ final JTable tuiTable = new JTable( tuiModel );
+ tuiTable.setCellSelectionEnabled( false );
+ tuiTable.setShowVerticalLines( false );
+ tuiTable.setAutoCreateRowSorter( true );
+ tuiTable.setAutoResizeMode( JTable.AUTO_RESIZE_LAST_COLUMN );
+ tuiTable.getColumnModel().getColumn( 0 ).setMaxWidth( 50 );
+ tuiTable.getColumnModel().getColumn( 1 ).setMaxWidth( 50 );
+ return new JScrollPane( tuiTable );
+ }
+
+ private JComponent createSourceTable( final TableModel sourceModel ) {
+ final JTable tuiTable = new JTable( sourceModel );
+ tuiTable.setCellSelectionEnabled( false );
+ tuiTable.setShowVerticalLines( false );
+ tuiTable.setAutoCreateRowSorter( true );
+ tuiTable.setAutoResizeMode( JTable.AUTO_RESIZE_LAST_COLUMN );
+ tuiTable.getColumnModel().getColumn( 0 ).setMaxWidth( 50 );
+ tuiTable.getColumnModel().getColumn( 1 ).setMaxWidth( 50 );
+ return new JScrollPane( tuiTable );
+ }
+
+ private JComponent createGoPanel() {
+ final JPanel panel = new JPanel( new BorderLayout( 10, 10 ) );
+ panel.setBorder( new EmptyBorder( 2, 10, 2, 10 ) );
+ final JLabel label = new JLabel( "Dictionary Name:" );
+ label.setPreferredSize( new Dimension( 100, 0 ) );
+ label.setHorizontalAlignment( SwingConstants.TRAILING );
+ final JTextField textField = new JTextField( "custom" );
+ final JButton buildButton = new JButton( new BuildDictionaryAction( textField ) );
+ panel.add( label, BorderLayout.WEST );
+ panel.add( textField, BorderLayout.CENTER );
+ panel.add( buildButton, BorderLayout.EAST );
+ return panel;
+ }
+
+ private String setUmlsDirPath( final String umlsDirPath ) {
+ File mrConso = new File( umlsDirPath, "MRCONSO.RRF" );
+ if ( mrConso.isFile() ) {
+ _umlsDirPath = mrConso.getParentFile().getParent();
+ }
+ final String plusMetaPath = new File( umlsDirPath, "META" ).getPath();
+ mrConso = new File( plusMetaPath, "MRCONSO.RRF" );
+ if ( mrConso.isFile() ) {
+ _umlsDirPath = umlsDirPath;
+ } else {
+ error( "Invalid UMLS Installation", umlsDirPath + " is not a valid path to a UMLS installation" );
+ }
+ return _umlsDirPath;
+ }
+
+ private void loadSources() {
+ SwingUtilities.invokeLater( new SourceLoadRunner( _umlsDirPath ) );
+ }
+
+ private class SourceLoadRunner implements Runnable {
+ private final String __umlsDirPath;
+ private SourceLoadRunner( final String umlsDirPath ) {
+ __umlsDirPath = umlsDirPath;
+ }
+ public void run() {
+ SwingUtilities.getRoot( MainPanel.this ).setCursor( Cursor.getPredefinedCursor( Cursor.WAIT_CURSOR ) );
+ final File mrConso = new File( __umlsDirPath + "/META", "MRCONSO.RRF" );
+ final String mrConsoPath = mrConso.getPath();
+ LOGGER.info( "Parsing vocabulary types from " + mrConsoPath );
+ final Collection<String> sources = new HashSet<>();
+ try ( final BufferedReader reader = FileUtil.createReader( mrConsoPath ) ) {
+ int lineCount = 0;
+ java.util.List<String> tokens = FileUtil.readBsvTokens( reader, mrConsoPath );
+ while ( tokens != null ) {
+ lineCount++;
+ if ( tokens.size() > MrconsoIndex.SOURCE._index ) {
+ sources.add( tokens.get( MrconsoIndex.SOURCE._index ) );
+ }
+ if ( lineCount % 100000 == 0 ) {
+ LOGGER.info( "File Line " + lineCount + "\t Vocabularies " + sources.size() );
+ }
+ tokens = FileUtil.readBsvTokens( reader, mrConsoPath );
+ }
+ LOGGER.info( "Parsed " + sources.size() + " vocabulary types" );
+ _sourceModel.setSources( sources );
+ } catch ( IOException ioE ) {
+ error( "Vocabulary Parse Error", ioE.getMessage() );
+ }
+ SwingUtilities.getRoot( MainPanel.this ).setCursor( Cursor.getDefaultCursor() );
+ }
+ }
+
+ private void buildDictionary( final String dictionaryName ) {
+ SwingUtilities.invokeLater(
+ new DictionaryBuildRunner( _umlsDirPath, _ctakesPath, dictionaryName, _sourceModel.getWantedSources(),
+ _sourceModel.getWantedTargets(), _tuiModel.getWantedTuis() ) );
+ }
+
+ private void error( final String title, final String message ) {
+ LOGGER.error( message );
+ JOptionPane.showMessageDialog( MainPanel.this, message, title, JOptionPane.ERROR_MESSAGE );
+ }
+
+
+
+ private class DictionaryBuildRunner implements Runnable {
+ private final String __umlsDirPath;
+ private final String __ctakesDirPath;
+ private final String __dictionaryName;
+ private final Collection<String> __wantedSources;
+ private final Collection<String> __wantedTargets;
+ private final Collection<Tui> __wantedTuis;
+ private DictionaryBuildRunner( final String umlsDirPath, final String ctakesDirPath, final String dictionaryName,
+ final Collection<String> wantedSources,
+ final Collection<String> wantedTargets,
+ final Collection<Tui> wantedTuis ) {
+ __umlsDirPath = umlsDirPath;
+ __ctakesDirPath = ctakesDirPath;
+ __dictionaryName = dictionaryName;
+ __wantedSources = wantedSources;
+ __wantedTargets = new ArrayList<>( wantedTargets );
+ __wantedTuis = new ArrayList<>( wantedTuis );
+ }
+
+ public void run() {
+ SwingUtilities.getRoot( MainPanel.this ).setCursor( Cursor.getPredefinedCursor( Cursor.WAIT_CURSOR ) );
+ if ( DictionaryBuilder.buildDictionary( __umlsDirPath, __ctakesDirPath, __dictionaryName,
+ Collections.singletonList( "ENG" ),
+ __wantedSources, __wantedTargets, __wantedTuis ) ) {
+ final String message = "Dictionary " + __dictionaryName + " successfully built in " + __ctakesDirPath;
+ LOGGER.info( message );
+ JOptionPane.showMessageDialog( MainPanel.this, message, "Dictionary Built", JOptionPane.INFORMATION_MESSAGE );
+ } else {
+ error( "Build Failure", "Dictionary " + __dictionaryName + " could not be built in " + __ctakesDirPath );
+ }
+ SwingUtilities.getRoot( MainPanel.this ).setCursor( Cursor.getDefaultCursor() );
+ }
+ }
+
+
+
+ private class UmlsDirListener implements ActionListener {
+ public void actionPerformed( final ActionEvent event ) {
+ final String oldPath = _umlsDirPath;
+ final String newPath = setUmlsDirPath( event.getActionCommand() );
+ if ( !oldPath.equals( newPath ) ) {
+ loadSources();
+ }
+ }
+ }
+
+
+ private class CtakesDirListener implements ActionListener {
+ public void actionPerformed( final ActionEvent event ) {
+ _ctakesPath = event.getActionCommand();
+ }
+ }
+
+
+ /**
+ * Opens the JFileChooser
+ */
+ private class BuildDictionaryAction extends AbstractAction {
+ private final JTextComponent __textComponent;
+
+ private BuildDictionaryAction( final JTextComponent textComponent ) {
+ super( "Build Dictionary" );
+ __textComponent = textComponent;
+ }
+
+ @Override
+ public void actionPerformed( final ActionEvent event ) {
+ final String dictionaryName = __textComponent.getText();
+ if ( dictionaryName != null && !dictionaryName.isEmpty() ) {
+ buildDictionary( dictionaryName.toLowerCase() );
+ } else {
+ error( "Invalid Dictionary Name", "Please Specify a Dictionary Name" );
+ }
+ }
+ }
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Concept.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Concept.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Concept.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Concept.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,127 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+import org.apache.ctakes.dictionary.creator.util.collection.CollectionMap;
+import org.apache.ctakes.dictionary.creator.util.collection.HashSetMap;
+
+import java.util.*;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 11/20/13
+ */
+final public class Concept {
+
+ static public String PREFERRED_TERM_UNKNOWN = "Unknown Preferred Term";
+
+ private String _preferredText = null;
+ private boolean _hasDose = false;
+
+ final private Collection<String> _texts;
+ final private CollectionMap<String, String, ? extends Collection<String>> _codes;
+ final private Collection<Tui> _tuis;
+
+
+
+ public Concept() {
+ _codes = new HashSetMap<>( 0 );
+ _texts = new HashSet<>( 1 );
+ _tuis = EnumSet.noneOf( Tui.class );
+ }
+
+ public boolean addTexts( final Collection<String> texts ) {
+ return _texts.addAll( texts );
+ }
+
+ public void removeTexts( final Collection<String> texts ) {
+ _texts.removeAll( texts );
+ }
+
+ public Collection<String> getTexts() {
+ return _texts;
+ }
+
+ public void minimizeTexts() {
+ if ( _texts.size() < 2 ) {
+ return;
+ }
+ final List<String> textList = new ArrayList<>( _texts );
+ final Collection<String> extensionTexts = new HashSet<>();
+ for ( int i=0; i<textList.size()-1; i++ ) {
+ final String iText = textList.get( i );
+ for ( int j=i+1; j<textList.size(); j++ ) {
+ final String jText = textList.get( j );
+ if ( textContained( jText, iText ) ) {
+ extensionTexts.add( jText );
+ } else if ( textContained( iText, jText ) ) {
+ extensionTexts.add( iText );
+ }
+ }
+ }
+ _texts.removeAll( extensionTexts );
+ }
+
+ static private boolean textContained( final String containerText, final String containedText ) {
+ final int index = containerText.indexOf( containedText );
+ return index >= 0
+ && ( index == 0 || containerText.charAt( index-1 ) == ' ' )
+ && ( index+containedText.length() == containerText.length() || containerText.charAt( index + containedText.length() ) == ' ' );
+ }
+
+ public void setPreferredText( final String text ) {
+ _preferredText = text;
+ }
+
+ public String getPreferredText() {
+ if ( _preferredText != null ) {
+ return _preferredText;
+ }
+ return PREFERRED_TERM_UNKNOWN;
+ }
+
+ public void addCode( final String source, final String code ) {
+ _codes.placeValue( source, code );
+ }
+
+ public Collection<String> getVocabularies() {
+ return _codes.keySet();
+ }
+
+ public Collection<String> getCodes( final String source ) {
+ final Collection<String> codes = _codes.getCollection( source );
+ if ( codes == null ) {
+ return Collections.emptyList();
+ }
+ return codes;
+ }
+
+ public void addTui( final Tui tui ) {
+ _tuis.add( tui );
+ }
+
+ public Collection<Tui> getTuis() {
+ return _tuis;
+ }
+
+ public boolean hasTui( final Collection<Tui> tuis ) {
+ return _tuis.stream().anyMatch( tuis::contains );
+ }
+
+ public boolean isEmpty() {
+// return _texts.isEmpty() || _codes.isEmpty();
+ return _texts.isEmpty();
+ }
+
+ public void setHasDose() {
+ _hasDose = true;
+ }
+
+ public boolean hasDose() {
+ return _hasDose;
+ }
+
+ public boolean isUnwanted() {
+ return hasDose() || isEmpty();
+ }
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/ConceptMapFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/ConceptMapFactory.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/ConceptMapFactory.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/ConceptMapFactory.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,40 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/11/2015
+ */
+public class ConceptMapFactory {
+
+ static private final Logger LOGGER = LogManager.getLogger( "ConceptMapFactory" );
+
+ static public Map<Long,Concept> createInitialConceptMap( final String umlsDirPath,
+ final Collection<String> wantedSources,
+ final Collection<Tui> wantedTuis ) {
+ if ( wantedSources.isEmpty() ) {
+ LOGGER.warn( "No source vocabularies specified" );
+ return Collections.emptyMap();
+ }
+ if ( wantedTuis.isEmpty() ) {
+ LOGGER.warn( "No TUIs specified" );
+ return Collections.emptyMap();
+ }
+ // get the valid Cuis for all wanted Tuis
+ final Map<Long, Concept> concepts = MrstyParser.createConceptsForTuis( umlsDirPath, wantedTuis );
+ // filter out the Cuis that do not belong to the given sources
+ final Collection<Long> validVocabularyCuis = MrconsoParser.getValidVocabularyCuis( umlsDirPath, wantedSources );
+ concepts.keySet().retainAll( validVocabularyCuis );
+ LOGGER.info( "Total Valid Cuis " + concepts.size() + "\t from wanted Tuis and Vocabularies" );
+ return concepts;
+ }
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/CuiCodeUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/CuiCodeUtil.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/CuiCodeUtil.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/CuiCodeUtil.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,117 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.logging.Logger;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 9/5/2014
+ */
+public enum CuiCodeUtil {
+ INSTANCE;
+
+ static public CuiCodeUtil getInstance() {
+ return INSTANCE;
+ }
+
+ static private final Logger LOGGER = Logger.getLogger( "CuiCodeUtil" );
+ static private final long PREFIX_MULTIPLIER = 100000000;
+
+ final private List<PrefixerPair> _prefixerPairList = new ArrayList<>();
+
+ CuiCodeUtil() {
+ // Add the standard C as the default encoding prefix
+ _prefixerPairList.add( new PrefixerPair( "C0000000" ) );
+ }
+
+ public String getAsCui( final Long code ) {
+ final long multiplier = code / PREFIX_MULTIPLIER;
+ if ( code < 0 || multiplier < 0 || multiplier >= _prefixerPairList.size() ) {
+ LOGGER.severe( "Could not create Cui String for " + code );
+ return "" + code;
+ }
+ return _prefixerPairList.get( (int)multiplier ).getAsCui( code % PREFIX_MULTIPLIER );
+ }
+
+
+ public Long getCuiCode( final String cui ) {
+ final PrefixerPair prefixerPair = new PrefixerPair( cui );
+ int prefixerIndex = _prefixerPairList.indexOf( prefixerPair );
+ if ( prefixerIndex < 0 ) {
+ prefixerIndex = _prefixerPairList.size();
+ _prefixerPairList.add( prefixerPair );
+ }
+ return prefixerPair.getCuiCode( cui, prefixerIndex );
+ }
+
+
+ static private final class PrefixerPair {
+ final private int __digitCount;
+ final private char[] __prefix;
+ final private int __hashCode;
+
+ private PrefixerPair( final String cui ) {
+ final char[] chars = cui.toCharArray();
+ int digitCount = 0;
+ while ( digitCount < chars.length
+ && digitCount < 7
+ && Character.isDigit( chars[ chars.length - 1 - digitCount ] ) ) {
+ digitCount++;
+ }
+ __digitCount = digitCount;
+ __prefix = Arrays.copyOfRange( chars, 0, chars.length - digitCount );
+ __hashCode = digitCount + Arrays.hashCode( __prefix );
+ }
+
+ private Long getCuiCode( final String cui, final int multiplier ) {
+ final String cuiNum = cui.substring( cui.length() - __digitCount, cui.length() );
+ try {
+ return PREFIX_MULTIPLIER * multiplier + Long.parseLong( cuiNum );
+ } catch ( NumberFormatException nfE ) {
+ LOGGER.severe( "Could not create Cui Code for " + cui );
+ }
+ return -1l;
+ }
+
+ private String getAsCui( final Long code ) {
+ final char[] codeChars = String.valueOf( code ).toCharArray();
+ if ( codeChars.length > __digitCount ) {
+ LOGGER.severe( "Invalid code " + code + " for prefix " + __prefix
+ + " has more than " + __digitCount + " digits" );
+ return String.valueOf( __prefix ) + String.valueOf( codeChars );
+ }
+ final int cuiLength = __prefix.length + __digitCount;
+ final char[] cuiChars = new char[ cuiLength ];
+ System.arraycopy( __prefix, 0, cuiChars, 0, __prefix.length );
+ System.arraycopy( codeChars, 0, cuiChars, cuiLength - codeChars.length, codeChars.length );
+ for ( int i = __prefix.length; i < cuiLength - codeChars.length; i++ ) {
+ cuiChars[ i ] = '0';
+ }
+ return String.valueOf( cuiChars );
+ }
+
+ public int hashCode() {
+ return __hashCode;
+ }
+
+ public boolean equals( final Object other ) {
+ return other instanceof PrefixerPair
+ && __hashCode == ((PrefixerPair)other).__hashCode
+ && __digitCount == ((PrefixerPair)other).__digitCount
+ && Arrays.equals( __prefix, ((PrefixerPair)other).__prefix );
+ }
+ }
+
+ // todo
+ // todo switch to int: 32 bit signed, max = 2,147,483,647
+ // todo if we keep final 7 digits for the numerical then we have 213 possible prefixes
+ // todo
+ // todo can probably change the code and the db will be fine, change the db too
+ // todo
+
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/DoseUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/DoseUtil.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/DoseUtil.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/DoseUtil.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,78 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.logging.Logger;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 7/14/14
+ */
+final public class DoseUtil {
+
+ private DoseUtil() {
+ }
+
+ static private final Logger LOGGER = Logger.getLogger( "DoseUtil" );
+
+
+ // some of these are not strictly units, e.g. "ud" : "ut dictum" or "as directed"
+ // but can be properly trimmed as they appear in the same place as would a unit
+
+ static private final String[] UNIT_ARRAY = { "gr", "gm", "gram", "grams", "g",
+ "mg", "milligram", "milligrams", "kg",
+ "microgram", "micrograms", "mcg", "ug",
+ "millicurie", "mic", "oz",
+ "lf", "ml", "liter", "milliliter", "l",
+ "milliequivalent", "meq",
+ "hour", "hours", "hr", //"day", "days", "daily", //"24hr", "8hr", "12hr",
+ "week", "weeks", "weekly", "biweekly",
+ "usp", "titradose",
+ "unit", "units", "unt", "iu", "u", "mmu",
+ "mm", "cm",
+ "gauge", "intl","au", "bau", "mci", "ud",
+ "ww", "vv", "wv",
+ "%", "percent", "%ww", "%vv", "%wv",
+ "actuation", "actuat", "vial", "vil", "packet", "pkt" };
+ static private final Collection<String> UNITS = Arrays.asList( UNIT_ARRAY );
+
+
+ static public boolean hasUnit( final String text ) {
+ final String[] splits = text.split( "\\s+" );
+ if ( splits.length <= 1 ) {
+ return false;
+ }
+ for ( int i=1; i<splits.length; i++ ) {
+ for ( String unit : UNITS ) {
+ if ( !splits[i].endsWith( unit ) ) {
+ continue;
+ }
+ final int diff = splits[i].length() - unit.length();
+ if ( diff == 0 ) {
+ if ( i == 1 ) {
+ for ( int j=0; j<splits[0].length(); j++ ) {
+ if ( !Character.isDigit( splits[0].charAt( j ) ) ) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ boolean isAmount = true;
+ for ( int j=0; j<diff; j++ ) {
+ if ( !Character.isDigit( splits[i].charAt( j ) ) ) {
+ isAmount = false;
+ break;
+ }
+ }
+ if ( isAmount ) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoIndex.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoIndex.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoIndex.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoIndex.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,16 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/23/14
+ */
+public enum MrconsoIndex {
+ CUI( 0 ), LANGUAGE( 1 ), STATUS( 2 ), FORM( 4 ), SOURCE( 11 ), TERM_TYPE( 12 ), SOURCE_CODE( 13 ), TEXT( 14 );
+ final public int _index;
+
+ private MrconsoIndex( final int index ) {
+ _index = index;
+ }
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoParser.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoParser.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoParser.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,284 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+
+import org.apache.ctakes.dictionary.creator.util.FileUtil;
+import org.apache.ctakes.dictionary.creator.util.TextTokenizer;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.*;
+
+import static org.apache.ctakes.dictionary.creator.gui.umls.MrconsoIndex.*;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/17/14
+ */
+final public class MrconsoParser {
+
+ static private final Logger LOGGER = LogManager.getLogger( "MrConsoParser" );
+
+ static private final String MR_CONSO_SUB_PATH = "/META/MRCONSO.RRF";
+
+ // TODO - put all exclusions in a data file, display for user, allow changes and save, etc.
+
+ // https://www.nlm.nih.gov/research/umls/sourcereleasedocs
+ // https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/SNOMEDCT_US/stats.html
+ // https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/RXNORM/stats.html
+ static private final String[] DEFAULT_EXCLUSIONS = { "FN", "CCS", "CA2", "CA3", "PSN", "TMSY",
+ "SBD", "SBDC", "SBDF", "SBDG",
+ "SCD", "SCDC", "SCDF", "SCDG", "BPCK", "GPCK", "XM" };
+
+ static private final String[] SNOMED_OBSOLETES = { "OF", "MTH_OF", "OAP", "MTH_OAP", "OAF", "MTH_OAF",
+ "IS", "MTH_IS", "OAS", "MTH_OAS",
+ "OP", "MTH_OP" };
+ // Snomed OF = Obsolete Fully Specified Name MTH_OF
+ // Snomed OAP = Obsolete Active Preferred Term MTH_OAP
+ // Snomed OAF = Obsolete Active Full Name MTH_OAF
+ // Snomed IS = Obsolete Synonym MTH_IS
+ // Snomed OAS = Obsolete Active Synonym MTH_OAS
+ // Snomed OP = Obsolete Preferred Name MTH_OP
+ // Snomed PT = Preferred Term , but we don't need that for valid cuis ... or do we want only those with preferred terms?
+ // Snomed PTGB = British Preferred Term
+
+ // GO has same snomed obsoletes +
+ // GO EOT = Obsolete Entry Term
+ // HPO has same snomed obsoletes
+
+ // MTHSPL - DP is Drug Product as is MTH_RXN_DP MTHSPL SU is active substance
+ // VANDF AB is abbreviation for drug VANDF CD is Clinical Drug. Both are dosed.
+ // NDFRT AB? Looks like ingredient. NDFRT PT can be dosed
+
+ static private final String[] GO_OBSOLETES = { "EOT" };
+
+ static private final String[] LOINC_OBSOLETES = { "LO", "OLC", "MTH_LO", "OOSN" };
+
+ static private final String[] MEDRA_OBSOLETES = { "OL", "MTH_OL" };
+
+ static private final String[] MESH_EXCLUSIONS = { "N1", "EN", "PEN" };
+
+ static private final String[] RXNORM_EXCLUSIONS = { "SY" }; // What is IN ? Ingredient?
+
+ static private final String[] NCI_EXCLUSIONS = { "CSN" };
+
+ // Related to, but not synonymous
+ static private final String[] UMDNS_EXCLUSIONS = { "RT" };
+
+ private MrconsoParser() {
+ }
+
+ static public String[] getDefaultExclusions() {
+ return DEFAULT_EXCLUSIONS;
+ }
+
+ static public String[] getSnomedExclusions() {
+ final String[] defaults = getDefaultExclusions();
+ final String[] exclusionTypes = Arrays.copyOf( defaults,
+ defaults.length + SNOMED_OBSOLETES.length );
+ System.arraycopy( SNOMED_OBSOLETES, 0, exclusionTypes, defaults.length, SNOMED_OBSOLETES.length );
+ return exclusionTypes;
+ }
+
+ static public String[] getNonRxnormExclusions() {
+ final String[] snomeds = getSnomedExclusions();
+ final String[] exclusionTypes = Arrays.copyOf( snomeds,
+ snomeds.length
+ + GO_OBSOLETES.length
+ + LOINC_OBSOLETES.length
+ + MEDRA_OBSOLETES.length
+ + MESH_EXCLUSIONS.length
+ + NCI_EXCLUSIONS.length
+ + UMDNS_EXCLUSIONS.length );
+ int start = snomeds.length;
+ System.arraycopy( GO_OBSOLETES, 0, exclusionTypes, start, GO_OBSOLETES.length );
+ start += GO_OBSOLETES.length;
+ System.arraycopy( LOINC_OBSOLETES, 0, exclusionTypes, start, LOINC_OBSOLETES.length );
+ start += LOINC_OBSOLETES.length;
+ System.arraycopy( MEDRA_OBSOLETES, 0, exclusionTypes, start, MEDRA_OBSOLETES.length );
+ start += MEDRA_OBSOLETES.length;
+ System.arraycopy( MESH_EXCLUSIONS, 0, exclusionTypes, start, MESH_EXCLUSIONS.length );
+ start += MESH_EXCLUSIONS.length;
+ System.arraycopy( NCI_EXCLUSIONS, 0, exclusionTypes, start, NCI_EXCLUSIONS.length );
+ start += NCI_EXCLUSIONS.length;
+ System.arraycopy( UMDNS_EXCLUSIONS, 0, exclusionTypes, start, UMDNS_EXCLUSIONS.length );
+ return exclusionTypes;
+ }
+
+
+
+ static public Map<Long, Concept> parseAllConcepts( final String umlsDirPath,
+ final Map<Long, Concept> concepts,
+ final Collection<String> wantedTargets,
+ final UmlsTermUtil umlsTermUtil,
+ final Collection<String> languages,
+ final boolean extractAbbreviations,
+ final int minCharLength,
+ final int maxCharLength,
+ final int maxWordCount,
+ final int maxSymCount ) {
+ final String mrconsoPath = umlsDirPath + MR_CONSO_SUB_PATH;
+ final Collection<String> invalidTypeSet = new HashSet<>( Arrays.asList( getNonRxnormExclusions() ) );
+ LOGGER.info( "Compiling map of Concepts from " + mrconsoPath );
+ long lineCount = 0;
+ long textCount = 0;
+ try ( final BufferedReader reader = FileUtil.createReader( mrconsoPath ) ) {
+ List<String> tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+ while ( tokens != null ) {
+ lineCount++;
+ if ( lineCount % 100000 == 0 ) {
+ LOGGER.info( "File Line " + lineCount + " Texts " + textCount );
+ }
+ if ( !isRowOk( tokens, languages, invalidTypeSet ) ) {
+ tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+ continue;
+ }
+ final Long cuiCode = CuiCodeUtil.getInstance().getCuiCode( getToken( tokens, CUI ) );
+ final Concept concept = concepts.get( cuiCode );
+ if ( concept == null ) {
+ // cui for current row is unwanted
+ tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+ continue;
+ }
+ final String text = getToken( tokens, TEXT );
+ if ( isPreferredTerm( tokens ) ) {
+ concept.setPreferredText( text );
+ }
+ final String source = getToken( tokens, SOURCE );
+ if ( wantedTargets.contains( source ) ) {
+ final String code = getToken( tokens, SOURCE_CODE );
+ if ( !code.equals( "NOCODE" ) ) {
+ Vocabulary.getInstance().addVocabulary( source, code );
+ concept.addCode( source, code );
+ }
+ }
+ final String tokenizedText = TextTokenizer.getTokenizedText( text );
+ if ( tokenizedText == null || tokenizedText.isEmpty()
+ || !umlsTermUtil.isTextValid( tokenizedText ) ) {
+ // no tokenizable text or tokenized text is invalid for some reason
+// LOGGER.warn( tokenizedText + " not valid" );
+ tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+ continue;
+ }
+ final String strippedText = umlsTermUtil.getStrippedText( tokenizedText );
+ if ( strippedText == null || strippedText.isEmpty()
+ || UmlsTermUtil.isTextTooShort( strippedText, minCharLength )
+ || UmlsTermUtil.isTextTooLong( strippedText, maxCharLength, maxWordCount, maxSymCount ) ) {
+ // after stripping unwanted prefixes and suffixes there is no valid text
+// LOGGER.warn( tokenizedText + " stripped invalid" );
+ tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+ continue;
+ }
+ final Collection<String> formattedTexts
+ = umlsTermUtil.getFormattedTexts( strippedText, extractAbbreviations, minCharLength, maxCharLength, maxWordCount, maxSymCount );
+ if ( formattedTexts != null && !formattedTexts.isEmpty() ) {
+ if ( DoseUtil.hasUnit( tokenizedText ) ) {
+ concept.setHasDose();
+// LOGGER.warn( tokenizedText + " has dose" );
+ tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+ continue;
+ }
+ if ( concept.addTexts( formattedTexts ) ) {
+ textCount += formattedTexts.size();
+ }
+ }
+ tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+ }
+ } catch ( IOException ioE ) {
+ LOGGER.error( ioE.getMessage() );
+ }
+ LOGGER.info( "File Lines " + lineCount + " Texts " + textCount );
+ return concepts;
+ }
+
+
+ static private boolean isRowOk( final List<String> tokens,
+ final Collection<String> languages,
+ final Collection<String> invalidTypeSet ) {
+ if ( tokens.size() <= TEXT._index || !languages.contains( getToken( tokens, LANGUAGE ) ) ) {
+ return false;
+ }
+ final String type = getToken( tokens, TERM_TYPE );
+ if ( invalidTypeSet.contains( type ) ) {
+ return false;
+ }
+ // "Synonyms" are actually undesirable in the rxnorm vocabulary
+ final String source = getToken( tokens, SOURCE );
+ return !( source.equals( "RXNORM" ) && type.equals( "SY" ) );
+ }
+
+
+ static private boolean isPreferredTerm( final List<String> tokens ) {
+ return getToken( tokens, STATUS ).equals( "P" ) && getToken( tokens, FORM ).equals( "PF" );
+ }
+
+
+ /**
+ * Can cull the given collection of cuis
+ *
+ * @param umlsDirPath path to the UMLS_ROOT Meta/MRCONSO.RRF file
+ * @param sourceVocabularies desired source type names as appear in rrf: RXNORM, SNOMEDCT, MSH, etc.
+ * @return Subset of cuis that exist in in the given sources
+ */
+ static public Collection<Long> getValidVocabularyCuis( final String umlsDirPath,
+ final Collection<String> sourceVocabularies ) {
+// return getValidVocabularyCuis( umlsDirPath, sourceVocabularies, getDefaultExclusions() );
+ return getValidVocabularyCuis( umlsDirPath, sourceVocabularies, getNonRxnormExclusions() );
+ }
+
+// /**
+// * Can cull the given collection of cuis
+// *
+// * @param umlsDirPath path to the UMLS_ROOT Meta/MRCONSO.RRF file
+// * @return Subset of cuis that exist in in the given sources
+// */
+// static public Collection<Long> getValidRxNormCuis( final String umlsDirPath ) {
+// return getValidVocabularyCuis( umlsDirPath, Collections.singletonList( "RXNORM" ), getRxnormExclusions() );
+// }
+
+ /**
+ * Can cull the given collection of cuis
+ *
+ * @param umlsDirPath path to the UMLS_ROOT Meta/MRCONSO.RRF file
+ * @param sourceVocabularies desired source type names as appear in rrf: RXNORM, SNOMEDCT, MSH, etc.
+ * @param invalidTypes term type names as appear in rrf: FN, CCS, etc. that are not valid
+ * @return Subset of cuis that exist in in the given sources
+ */
+ static private Collection<Long> getValidVocabularyCuis( final String umlsDirPath,
+ final Collection<String> sourceVocabularies,
+ final String... invalidTypes ) {
+ final String mrconsoPath = umlsDirPath + MR_CONSO_SUB_PATH;
+ LOGGER.info( "Compiling list of Cuis with wanted Vocabularies using " + mrconsoPath );
+ final Collection<Long> validCuis = new HashSet<>();
+ long lineCount = 0;
+ try ( final BufferedReader reader = FileUtil.createReader( mrconsoPath ) ) {
+ List<String> tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+ while ( tokens != null ) {
+ lineCount++;
+ if ( lineCount % 100000 == 0 ) {
+ LOGGER.info( "File Line " + lineCount + "\t Valid Cuis " + validCuis.size() );
+ }
+ if ( tokens.size() > SOURCE._index
+ && sourceVocabularies.stream().anyMatch( getToken( tokens, SOURCE )::equals )
+ && Arrays.stream( invalidTypes ).noneMatch( getToken( tokens, TERM_TYPE )::equals ) ) {
+ final Long cuiCode = CuiCodeUtil.getInstance().getCuiCode( getToken( tokens, CUI ) );
+ validCuis.add( cuiCode );
+ }
+ tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+ }
+ } catch ( IOException ioE ) {
+ LOGGER.error( ioE.getMessage() );
+ }
+ LOGGER.info( "File Lines " + lineCount + "\t Valid Cuis " + validCuis.size() + "\t for wanted Vocabularies" );
+ return validCuis;
+ }
+
+
+ static private String getToken( final List<String> tokens, final MrconsoIndex mrconsoIndex ) {
+ return tokens.get( mrconsoIndex._index );
+ }
+
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyIndex.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyIndex.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyIndex.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyIndex.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,16 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 3/26/14
+ */
+public enum MrstyIndex {
+ CUI( 0 ), TUI( 1 );
+ final public int _index;
+
+ private MrstyIndex( final int index ) {
+ _index = index;
+ }
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyParser.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyParser.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyParser.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,74 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+
+import org.apache.ctakes.dictionary.creator.util.FileUtil;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.*;
+
+import static org.apache.ctakes.dictionary.creator.gui.umls.MrstyIndex.CUI;
+import static org.apache.ctakes.dictionary.creator.gui.umls.MrstyIndex.TUI;
+
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 1/17/14
+ */
+final public class MrstyParser {
+
+ static private final Logger LOGGER = LogManager.getLogger( "MrStyParser" );
+
+ static private final String MRSTY_SUB_PATH = "/META/MRSTY.RRF";
+
+ private MrstyParser() {
+ }
+
+ static public Map<Long, Concept> createConceptsForTuis( final String umlsPath,
+ final Collection<Tui> wantedTuis ) {
+ final String mrstyPath = umlsPath + MRSTY_SUB_PATH;
+ LOGGER.info( "Compiling list of Cuis with wanted Tuis using " + mrstyPath );
+ long lineCount = 0;
+ final Map<Long,Concept> wantedConcepts = new HashMap<>();
+ final Collection<Tui> usedTuis = new HashSet<>( wantedTuis.size() );
+ try (final BufferedReader reader = FileUtil.createReader( mrstyPath ) ) {
+ List<String> tokens = FileUtil.readBsvTokens( reader, mrstyPath );
+ while ( tokens != null ) {
+ lineCount++;
+ if ( tokens.size() > TUI._index ) {
+ final Tui tuiEnum = Tui.valueOf( tokens.get( TUI._index ) );
+ if ( !wantedTuis.contains( tuiEnum ) ) {
+ tokens = FileUtil.readBsvTokens( reader, mrstyPath );
+ continue;
+ }
+ final Long cuiCode = CuiCodeUtil.getInstance().getCuiCode( tokens.get( CUI._index ) );
+ Concept concept = wantedConcepts.get( cuiCode );
+ if ( concept == null ) {
+ concept = new Concept();
+ wantedConcepts.put( cuiCode, concept );
+ }
+ concept.addTui( tuiEnum );
+ usedTuis.add( tuiEnum );
+ }
+ if ( lineCount % 100000 == 0 ) {
+ LOGGER.info( "File Line " + lineCount + "\t Valid Cuis " + wantedConcepts.size() );
+ }
+ tokens = FileUtil.readBsvTokens( reader, mrstyPath );
+ }
+ } catch ( IOException ioE ) {
+ LOGGER.error( ioE.getMessage() );
+ }
+ LOGGER.info( "File Lines " + lineCount + "\t Valid Cuis " + wantedConcepts.size() + "\t for wanted Tuis" );
+ if ( usedTuis.size() != wantedTuis.size() ) {
+ wantedTuis.removeAll( usedTuis );
+ for ( Tui missingTui : wantedTuis ) {
+ LOGGER.warn( "Could not find Cuis for Tui " + missingTui + " " + missingTui.getDescription() );
+ }
+ }
+ return wantedConcepts;
+ }
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/SourceTableModel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/SourceTableModel.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/SourceTableModel.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/SourceTableModel.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,188 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import javax.swing.event.EventListenerList;
+import javax.swing.event.TableModelEvent;
+import javax.swing.event.TableModelListener;
+import javax.swing.table.TableModel;
+import java.util.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/10/2015
+ */
+final public class SourceTableModel implements TableModel {
+
+ static private final Logger LOGGER = LogManager.getLogger( "SourceTableModel" );
+
+ static private final String[] COLUMN_NAMES = { "Source", "Target", "Vocabulary" };
+ static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, Boolean.class, String.class };
+
+ static private final String[] CTAKES_SOURCES = { "SNOMEDCT", "SNOMEDCT_US", "RXNORM" };
+
+ private final EventListenerList _listenerList = new EventListenerList();
+ private final Collection<String> _wantedSources = new HashSet<>();
+ private final Collection<String> _wantedTargets = new HashSet<>();
+ private final List<String> _sources = new ArrayList<>();
+
+
+
+
+ public void setSources( final Collection<String> sources ) {
+ _sources.clear();
+ _wantedSources.clear();
+ _wantedTargets.clear();
+ _sources.addAll( sources );
+ Collections.sort( _sources );
+ _wantedSources.addAll( Arrays.asList( CTAKES_SOURCES ) );
+ _wantedTargets.addAll( Arrays.asList( CTAKES_SOURCES ) );
+ fireTableChanged( new TableModelEvent( this ) );
+ }
+
+ public Collection<String> getWantedSources() {
+ return _wantedSources;
+ }
+
+ public Collection<String> getWantedTargets() {
+ return _wantedTargets;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int getRowCount() {
+ return _sources.size();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int getColumnCount() {
+ return 3;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public String getColumnName( final int columnIndex ) {
+ return COLUMN_NAMES[ columnIndex ];
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Class<?> getColumnClass( final int columnIndex ) {
+ return COLUMN_CLASSES[ columnIndex ];
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean isCellEditable( final int rowIndex, final int columnIndex ) {
+ return columnIndex == 0 || (columnIndex == 1 && (Boolean)getValueAt( rowIndex, 0 ) );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Object getValueAt( final int rowIndex, final int columnIndex ) {
+ final String source = _sources.get( rowIndex );
+ switch ( columnIndex ) {
+ case 0 : return isSourceEnabled( source );
+ case 1 : return isTargetEnabled( source );
+ case 2 : return source;
+ }
+ return "ERROR";
+ }
+
+ private boolean isSourceEnabled( final String source ) {
+ return _wantedSources.contains( source );
+ }
+
+ private boolean isTargetEnabled( final String source) {
+ return _wantedTargets.contains( source );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void setValueAt( final Object aValue, final int rowIndex, final int columnIndex ) {
+ if ( aValue instanceof Boolean ) {
+ final String source = _sources.get( rowIndex );
+ if ( columnIndex == 0 ) {
+ selectWantedSource( source, (Boolean)aValue );
+ } else if ( columnIndex == 1 ) {
+ selectWantedTarget( source, (Boolean)aValue );
+ }
+ }
+ }
+
+ private void selectWantedSource( final String source, final boolean select ) {
+ if ( select ) {
+ _wantedSources.add( source );
+ } else {
+ _wantedSources.remove( source );
+ }
+ }
+
+ private void selectWantedTarget( final String target, final boolean select ) {
+ if ( select ) {
+ _wantedTargets.add( target );
+ } else {
+ _wantedTargets.remove( target );
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void addTableModelListener( final TableModelListener listener ) {
+ _listenerList.add( TableModelListener.class, listener );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void removeTableModelListener( final TableModelListener listener ) {
+ _listenerList.remove( TableModelListener.class, listener );
+ }
+
+ /**
+ * Forwards the given notification event to all
+ * <code>TableModelListeners</code> that registered
+ * themselves as listeners for this table model.
+ *
+ * @param e the event to be forwarded
+ *
+ * @see #addTableModelListener
+ * @see TableModelEvent
+ * @see EventListenerList
+ */
+ private void fireTableChanged(TableModelEvent e) {
+ // Guaranteed to return a non-null array
+ Object[] listeners = _listenerList.getListenerList();
+ // Process the listeners last to first, notifying
+ // those that are interested in this event
+ for (int i = listeners.length-2; i>=0; i-=2) {
+ if (listeners[i]==TableModelListener.class) {
+ ((TableModelListener)listeners[i+1]).tableChanged(e);
+ }
+ }
+ }
+
+
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Tui.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Tui.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Tui.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Tui.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,185 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/10/2015
+ */
+public enum Tui {
+ // Activities & Behaviors
+ T052( "Activity" ),
+ T053( "Behavior" ),
+ T056( "Daily or Recreational Activity" ),
+ T051( "Event" ),
+ T064( "Governmental or Regulatory Activity" ),
+ T055( "Individual Behavior" ),
+ T066( "Machine Activity" ),
+ T057( "Occupational Activity" ),
+ T054( "Social Behavior" ),
+ // Anatomy
+ T017( "Anatomical Structure" ),
+ T029( "Body Location or Region" ),
+ T023( "Body Part, Organ, or Organ Component" ),
+ T030( "Body Space or Junction" ),
+ T031( "Body Substance" ),
+ T022( "Body System" ),
+ T025( "Cell" ),
+ T026( "Cell Component" ),
+ T018( "Embryonic Structure" ),
+ T021( "Fully Formed Anatomical Structure" ),
+ T024( "Tissue" ),
+ // Chemicals & Drugs
+ T116( "Amino Acid, Peptide, or Protein" ),
+ T195( "Antibiotic" ),
+ T123( "Biologically Active Substance" ),
+ T122( "Biomedical or Dental Material" ),
+ T118( "Carbohydrate" ),
+ T103( "Chemical" ),
+ T120( "Chemical Viewed Functionally" ),
+ T104( "Chemical Viewed Structurally" ),
+ T200( "Clinical Drug" ),
+ T111( "Eicosanoid" ),
+ T196( "Element, Ion, or Isotope" ),
+ T126( "Enzyme" ),
+ T131( "Hazardous or Poisonous Substance" ),
+ T125( "Hormone" ),
+ T129( "Immunologic Factor" ),
+ T130( "Indicator, Reagent, or Diagnostic Aid" ),
+ T197( "Inorganic Chemical" ),
+ T119( "Lipid" ),
+ T124( "Neuroreactive Substance or Biogenic Amine" ),
+ T114( "Nucleic Acid, Nucleoside, or Nucleotide" ),
+ T109( "Organic Chemical" ),
+ T115( "Organophosphorus Compound" ),
+ T121( "Pharmacologic Substance" ),
+ T192( "Receptor" ),
+ T110( "Steroid" ),
+ T127( "Vitamin" ),
+ // Concepts & Ideas
+ T185( "Classification" ),
+ T077( "Conceptual Entity" ),
+ T169( "Functional Concept" ),
+ T102( "Group Attribute" ),
+ T078( "Idea or Concept" ),
+ T170( "Intellectual Product" ),
+ T171( "Language" ),
+ T080( "Qualitative Concept" ),
+ T081( "Quantitative Concept" ),
+ T089( "Regulation or Law" ),
+ T082( "Spatial Concept" ),
+ T079( "Temporal Concept" ),
+ // Devices
+ T203( "Drug Delivery Device" ),
+ T074( "Medical Device" ),
+ T075( "Research Device" ),
+ // Disorders
+ T020( "Acquired Abnormality" ),
+ T190( "Anatomical Abnormality" ),
+ T049( "Cell or Molecular Dysfunction" ),
+ T019( "Congenital Abnormality" ),
+ T047( "Disease or Syndrome" ),
+ T050( "Experimental Model of Disease" ),
+ T033( "Finding" ),
+ T037( "Injury or Poisoning" ),
+ T048( "Mental or Behavioral Dysfunction" ),
+ T191( "Neoplastic Process" ),
+ T046( "Pathologic Function" ),
+ T184( "Sign or Symptom" ),
+ // Genes & Molecular Sequences
+ T087( "Amino Acid Sequence" ),
+ T088( "Carbohydrate Sequence" ),
+ T028( "Gene or Genome" ),
+ T085( "Molecular Sequence" ),
+ T086( "Nucleotide Sequence" ),
+ // Geographic Areas
+ T083( "Geographic Area" ),
+ // Living Beings
+ T100( "Age Group" ),
+ T011( "Amphibian" ),
+ T008( "Animal" ),
+ T194( "Archaeon" ),
+ T007( "Bacterium" ),
+ T012( "Bird" ),
+ T204( "Eukaryote" ),
+ T099( "Family Group" ),
+ T013( "Fish" ),
+ T004( "Fungus" ),
+ T096( "Group" ),
+ T016( "Human" ),
+ T015( "Mammal" ),
+ T001( "Organism" ),
+ T101( "Patient or Disabled Group" ),
+ T002( "Plant" ),
+ T098( "Population Group" ),
+ T097( "Professional or Occupational Group" ),
+ T014( "Reptile" ),
+ T010( "Vertebrate" ),
+ T005( "Virus" ),
+ // Objects
+ T071( "Entity" ),
+ T168( "Food" ),
+ T073( "Manufactured Object" ),
+ T072( "Physical Object" ),
+ T167( "Substance" ),
+ // Occupations
+ T091( "Biomedical Occupation or Discipline" ),
+ T090( "Occupation or Discipline" ),
+ // Organizations
+ T093( "Health Care Related Organization" ),
+ T092( "Organization" ),
+ T094( "Professional Society" ),
+ T095( "Self-help or Relief Organization" ),
+ // Phenomena
+ T038( "Biologic Function" ),
+ T069( "Environmental Effect of Humans" ),
+ T068( "Human-caused Phenomenon or Process" ),
+ T034( "Laboratory or Test Result" ),
+ T070( "Natural Phenomenon or Process" ),
+ T067( "Phenomenon or Process" ),
+ // Physiology
+ T043( "Cell Function" ),
+ T201( "Clinical Attribute" ),
+ T045( "Genetic Function" ),
+ T041( "Mental Process" ),
+ T044( "Molecular Function" ),
+ T032( "Organism Attribute" ),
+ T040( "Organism Function" ),
+ T042( "Organ or Tissue Function" ),
+ T039( "Physiologic Function" ),
+ // Procedures
+ T060( "Diagnostic Procedure" ),
+ T065( "Educational Activity" ),
+ T058( "Health Care Activity" ),
+ T059( "Laboratory Procedure" ),
+ T063( "Molecular Biology Research Technique" ),
+ T062( "Research Activity" ),
+ T061( "Therapeutic or Preventive Procedure" ),
+ // ERROR
+ T999( "Error" );
+
+ final private String _description;
+ private Tui( final String description ) {
+ _description = description;
+ }
+
+ public String getDescription() {
+ return _description;
+ }
+
+ public int getIntValue() {
+ return Integer.parseInt( name().substring( 1 ) );
+ }
+
+// static public Tui valueOf( final String text ) {
+//
+//
+//
+// for ( Tui tuiEnum : Tui.values() ) {
+// if ( tuiEnum.name().equals( text ) ) {
+// return tuiEnum;
+// }
+// }
+// return Tui.T999;
+// }
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiCellRenderer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiCellRenderer.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiCellRenderer.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiCellRenderer.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,40 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import javax.swing.*;
+import javax.swing.table.TableCellRenderer;
+import java.awt.*;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/11/2015
+ */
+final public class TuiCellRenderer implements TableCellRenderer {
+
+ static private final Logger LOGGER = LogManager.getLogger( "TuiCellRenderer" );
+
+ private final TuiTableModel _tuiModel;
+ private final TableCellRenderer _delegate;
+
+ public TuiCellRenderer( final TuiTableModel tuiModel, final TableCellRenderer delegate ) {
+ _tuiModel = tuiModel;
+ _delegate = delegate;
+ }
+
+
+ public Component getTableCellRendererComponent( final JTable table, final Object value,
+ final boolean isSelected, final boolean hasFocus,
+ final int row, final int column ) {
+ final Component renderer
+ = _delegate.getTableCellRendererComponent( table, value, isSelected, hasFocus, row, column );
+// final Tui tui = _tuiModel.
+
+ return renderer;
+ }
+
+
+}
Added: ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiTableModel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiTableModel.java?rev=1787257&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiTableModel.java (added)
+++ ctakes/trunk/ctakes-dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiTableModel.java Thu Mar 16 21:55:00 2017
@@ -0,0 +1,139 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import javax.swing.event.EventListenerList;
+import javax.swing.event.TableModelListener;
+import javax.swing.table.TableModel;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.EnumSet;
+
+import static org.apache.ctakes.dictionary.creator.gui.umls.Tui.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/10/2015
+ */
+final public class TuiTableModel implements TableModel {
+
+ static private final Logger LOGGER = LogManager.getLogger( "TuiTableModel" );
+
+ static public final Tui[] CTAKES_ANAT = { T021, T022, T023, T024, T025, T026, T029, T030 };
+ static private final Tui[] CTAKES_DISO = { T019, T020, T037, T047, T048, T049, T050, T190, T191 };
+ static private final Tui[] CTAKES_FIND = { T033, T034, T040, T041, T042, T043, T044, T045, T046, T056, T057, T184 };
+ static private final Tui[] CTAKES_PROC = { T059, T060, T061 };
+ static public final Tui[] CTAKES_DRUG = { T109, T110, T114, T115, T116, T118, T119, T121, T122, T123, T124,
+ T125, T126, T127, T129, T130, T131, T195, T196, T197, T200, T203 };
+
+ static private final String[] COLUMN_NAMES = { "Use", "TUI", "Definition" };
+ static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, String.class, String.class };
+
+ private final EventListenerList _listenerList = new EventListenerList();
+ private final Collection<Tui> _wantedTuis = EnumSet.noneOf( Tui.class );
+
+ public TuiTableModel() {
+ _wantedTuis.addAll( Arrays.asList( CTAKES_ANAT ) );
+ _wantedTuis.addAll( Arrays.asList( CTAKES_DISO ) );
+ _wantedTuis.addAll( Arrays.asList( CTAKES_FIND ) );
+ _wantedTuis.addAll( Arrays.asList( CTAKES_PROC ) );
+ _wantedTuis.addAll( Arrays.asList( CTAKES_DRUG ) );
+ }
+
+ public Collection<Tui> getWantedTuis() {
+ return _wantedTuis;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int getRowCount() {
+ return Tui.values().length;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int getColumnCount() {
+ return 3;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public String getColumnName( final int columnIndex ) {
+ return COLUMN_NAMES[ columnIndex ];
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Class<?> getColumnClass( final int columnIndex ) {
+ return COLUMN_CLASSES[ columnIndex ];
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean isCellEditable( final int rowIndex, final int columnIndex ) {
+ return columnIndex == 0;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Object getValueAt( final int rowIndex, final int columnIndex ) {
+ final Tui tui = Tui.values()[ rowIndex ];
+ switch ( columnIndex ) {
+ case 0 : return isTuiEnabled( tui );
+ case 1 : return tui.name();
+ case 2 : return tui.getDescription();
+ }
+ return "ERROR";
+ }
+
+ private boolean isTuiEnabled( final Tui tui ) {
+ return _wantedTuis.contains( tui );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void setValueAt( final Object aValue, final int rowIndex, final int columnIndex ) {
+ if ( aValue instanceof Boolean && columnIndex == 0 ) {
+ final Tui tui = Tui.values()[ rowIndex ];
+ if ( (Boolean)aValue ) {
+ _wantedTuis.add( tui );
+ } else {
+ _wantedTuis.remove( tui );
+ }
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void addTableModelListener( final TableModelListener listener ) {
+ _listenerList.add( TableModelListener.class, listener );
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void removeTableModelListener( final TableModelListener listener ) {
+ _listenerList.remove( TableModelListener.class, listener );
+ }
+
+
+}