You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2020/09/25 00:55:09 UTC
svn commit: r1881993 - in /ctakes/trunk:
ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/
ctakes-core/src/main/java/org/apache/ctakes/core/util/
ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/ ctake...
Author: seanfinan
Date: Fri Sep 25 00:55:08 2020
New Revision: 1881993
URL: http://svn.apache.org/viewvc?rev=1881993&view=rev
Log:
Refactoring in ctakes-gui to use SemanticTui
Some refactoring in piper files to accomodate relocation of AEs
Removed:
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/Tui.java
Modified:
ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/FinishedLogger.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticTui.java
ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java
ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/XmiToPretty.piper
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryBuilder.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/MainPanel.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/Concept.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/ConceptMapFactory.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrconsoParser.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrstyParser.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/SourceTableModel.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/TuiTableModel.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java
ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java
Modified: ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper (original)
+++ ctakes/trunk/ctakes-clinical-pipeline-res/src/main/resources/org/apache/ctakes/clinical/pipeline/DefaultFastPipeline.piper Fri Sep 25 00:55:08 2020
@@ -20,5 +20,5 @@ load AttributeCleartkSubPipe
add pretty.html.HtmlTextWriter SubDirectory=HTML
// Log run time stats and completion
-addLast util.FinishedLogger
+addLast util.log.FinishedLogger
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/FinishedLogger.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/FinishedLogger.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/FinishedLogger.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/FinishedLogger.java Fri Sep 25 00:55:08 2020
@@ -1,6 +1,7 @@
package org.apache.ctakes.core.util;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
@@ -20,13 +21,16 @@ import org.apache.uima.resource.Resource
)
final public class FinishedLogger extends JCasAnnotator_ImplBase {
+ final org.apache.ctakes.core.util.log.FinishedLogger _delegate
+ = new org.apache.ctakes.core.util.log.FinishedLogger();
/**
* {@inheritDoc}
*/
@Override
public void initialize( final UimaContext context ) throws ResourceInitializationException {
- throw new ResourceInitializationException( new Exception( "Deprecated use FinishedLogger in (sub) package log." ) );
+ Logger.getLogger( "FinishedLogger" ).warn( "Deprecated use FinishedLogger in (sub) package log." );
+ _delegate.initialize( context );
}
/**
@@ -34,7 +38,15 @@ final public class FinishedLogger extend
*/
@Override
public void process( final JCas jCas ) throws AnalysisEngineProcessException {
- throw new AnalysisEngineProcessException( new Exception( "Deprecated use FinishedLogger in (sub) package log." ) );
+ _delegate.process( jCas );
}
-}
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void collectionProcessComplete() throws AnalysisEngineProcessException {
+ _delegate.collectionProcessComplete();
+ }
+
+}
\ No newline at end of file
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/IdentifiedAnnotationUtil.java Fri Sep 25 00:55:08 2020
@@ -56,14 +56,26 @@ final public class IdentifiedAnnotationU
return annotation.getHistoryOf() == CONST.NE_HISTORY_OF_PRESENT;
}
- static public SemanticGroup getSemanticGroup( final IdentifiedAnnotation annotation ) {
- return SemanticGroup.getBestGroup( annotation );
+ /**
+ * @param annotation -
+ * @return Semantic Groups for all Umls Concepts of the annotation
+ */
+ static public Collection<SemanticGroup> getSemanticGroups( final IdentifiedAnnotation annotation ) {
+ return SemanticGroup.getGroups( annotation );
}
- static public Collection<SemanticTui> getSemanticTui( final IdentifiedAnnotation annotation ) {
+ /**
+ * @param annotation -
+ * @return Semantic Tuis for all Umls Concepts of the annotation
+ */
+ static public Collection<SemanticTui> getSemanticTuis( final IdentifiedAnnotation annotation ) {
return SemanticTui.getTuis( annotation );
}
+ /**
+ * @param annotation -
+ * @return cuis for all Umls Concepts of the annotation
+ */
static public Collection<String> getCuis( final IdentifiedAnnotation annotation ) {
return OntologyConceptUtil.getCuis( annotation );
}
@@ -85,7 +97,11 @@ final public class IdentifiedAnnotationU
return OntologyConceptUtil.getCodes( annotation, schemeName );
}
- static public Collection<String> getPreferredText( final IdentifiedAnnotation annotation ) {
+ /**
+ * @param annotation -
+ * @return preferred texts for all Umls Concepts of the annotation
+ */
+ static public Collection<String> getPreferredTexts( final IdentifiedAnnotation annotation ) {
return OntologyConceptUtil.getUmlsConceptStream( annotation )
.map( UmlsConcept::getPreferredText )
.filter( Objects::nonNull )
@@ -93,5 +109,94 @@ final public class IdentifiedAnnotationU
.collect( Collectors.toSet() );
}
+ /**
+ * @param annotation -
+ * @return the confidence of the annotation
+ */
+ static public float getConfidence( final IdentifiedAnnotation annotation ) {
+ return annotation.getConfidence();
+ }
+
+ //
+ // Some get methods that can utilize possible OntologyConcept wsd scores
+ //
+
+ /**
+ * @param annotation -
+ * @return the best wsd SemanticGroups
+ */
+ static public Collection<SemanticGroup> getBestSemanticGroups( final IdentifiedAnnotation annotation ) {
+ return getBestSemanticTuis( annotation )
+ .stream()
+ .map( SemanticTui::getGroup )
+ .collect( Collectors.toSet() );
+ }
+
+ /**
+ * @param annotation -
+ * @return the best wsd SemanticGroups
+ */
+ static public SemanticGroup getBestSemanticGroup( final IdentifiedAnnotation annotation ) {
+// return SemanticGroup.getBestGroup( getBestSemanticGroups( annotation ) );
+ return SemanticGroup.getBestGroup( getSemanticGroups( annotation ) );
+ }
+
+ /**
+ * @param annotation -
+ * @return the best wsd SemanticTuis
+ */
+ static public Collection<SemanticTui> getBestSemanticTuis( final IdentifiedAnnotation annotation ) {
+ return OntologyConceptUtil.getBestUmlsConcepts( annotation )
+ .stream()
+ .map( SemanticTui::getTui )
+ .collect( Collectors.toSet() );
+ }
+
+ /**
+ * @param annotation -
+ * @return the best wsd cuis
+ */
+ static public Collection<String> getBestCuis( final IdentifiedAnnotation annotation ) {
+ return OntologyConceptUtil.getBestUmlsConcepts( annotation )
+ .stream()
+ .map( UmlsConcept::getCui )
+ .collect( Collectors.toSet() );
+ }
+
+ /**
+ * @param annotation -
+ * @return a collection of the best wsd schemes with codes for the given annotation. e.g. snomed_us, rxnorm.
+ */
+ static public Collection<String> getBestCodeSchemes( final IdentifiedAnnotation annotation ) {
+ return OntologyConceptUtil.getBestUmlsConcepts( annotation )
+ .stream()
+ .map( UmlsConcept::getCodingScheme )
+ .collect( Collectors.toSet() );
+ }
+
+ /**
+ * @param annotation -
+ * @param schemeName the name of a coding scheme. e.g. snomed_us, rxnorm.
+ * @return the best wsd annotation codes for the given coding scheme.
+ */
+ static public Collection<String> getBestCodes( final IdentifiedAnnotation annotation, final String schemeName ) {
+ return OntologyConceptUtil.getBestUmlsConcepts( annotation )
+ .stream()
+ .filter( c -> schemeName.equals( c.getCodingScheme() ) )
+ .map( UmlsConcept::getCode )
+ .collect( Collectors.toSet() );
+ }
+
+ /**
+ * @param annotation -
+ * @return preferred texts for the best wsd Umls Concepts of the annotation
+ */
+ static public Collection<String> getBestPreferredTexts( final IdentifiedAnnotation annotation ) {
+ return OntologyConceptUtil.getBestUmlsConcepts( annotation )
+ .stream()
+ .map( UmlsConcept::getPreferredText )
+ .collect( Collectors.toSet() );
+ }
+
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/OntologyConceptUtil.java Fri Sep 25 00:55:08 2020
@@ -102,6 +102,56 @@ final public class OntologyConceptUtil {
}
+ static private final Predicate<OntologyConcept> isDisambiguated = c -> {
+ try {
+ return c.getDisambiguated();
+ } catch ( Exception e ) {
+ return false;
+ }
+ };
+
+ static private double getWsdScore( final OntologyConcept concept ) {
+ try {
+ return concept.getScore();
+ } catch ( Exception e ) {
+ return 0.5;
+ }
+ }
+
+ ;
+
+
+ /**
+ * @param annotation -
+ * @return set of all Umls Concepts associated with the annotation with the highest wsd score
+ */
+ static public Collection<UmlsConcept> getBestUmlsConcepts( final IdentifiedAnnotation annotation ) {
+ final Stream<UmlsConcept> umlsConceptStream = getUmlsConceptStream( annotation );
+ List<UmlsConcept> wsdConcepts = umlsConceptStream
+ .filter( isDisambiguated )
+ .collect( Collectors.toList() );
+ if ( wsdConcepts.size() == 1 ) {
+ return wsdConcepts;
+ }
+ if ( wsdConcepts.isEmpty() ) {
+ wsdConcepts = umlsConceptStream
+ .sorted( Comparator.comparingDouble( OntologyConceptUtil::getWsdScore ) )
+ .collect( Collectors.toList() );
+ } else {
+ wsdConcepts = wsdConcepts.stream()
+ .sorted( Comparator.comparingDouble( OntologyConceptUtil::getWsdScore ) )
+ .collect( Collectors.toList() );
+ }
+ if ( wsdConcepts.size() <= 1 ) {
+ return wsdConcepts;
+ }
+ final double max = getWsdScore( wsdConcepts.get( wsdConcepts.size() - 1 ) );
+ return wsdConcepts.stream()
+ .filter( c -> getWsdScore( c ) == max )
+ .collect( Collectors.toSet() );
+ }
+
+
//
// Get cuis, tuis, or codes for a single IdentifiedAnnotation
//
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticGroup.java Fri Sep 25 00:55:08 2020
@@ -12,34 +12,37 @@ import java.util.stream.Collectors;
import static org.apache.ctakes.typesystem.type.constants.CONST.*;
public enum SemanticGroup {
- DRUG( NE_TYPE_ID_DRUG, "Drug", MedicationMention.class, MedicationMention::new ),
- DISORDER( NE_TYPE_ID_DISORDER, "Disorder", DiseaseDisorderMention.class, DiseaseDisorderMention::new ),
- FINDING( NE_TYPE_ID_FINDING, "Finding", SignSymptomMention.class, SignSymptomMention::new ),
- PROCEDURE( NE_TYPE_ID_PROCEDURE, "Procedure", ProcedureMention.class, ProcedureMention::new ),
- ANATOMY( NE_TYPE_ID_ANATOMICAL_SITE, "Anatomy", AnatomicalSiteMention.class, AnatomicalSiteMention::new ),
- CLINICAL_ATTRIBUTE( NE_TYPE_ID_CLINICAL_ATTRIBUTE, "Attribute", SignSymptomMention.class, SignSymptomMention::new ),
- DEVICE( NE_TYPE_ID_DEVICE, "Device", EntityMention.class, EntityMention::new ),
- LAB( NE_TYPE_ID_LAB, "Lab", LabMention.class, LabMention::new ),
- PHENOMENON( NE_TYPE_ID_PHENOMENA, "Phenomenon", EventMention.class, EventMention::new ),
- SUBJECT( NE_TYPE_ID_SUBJECT_MODIFIER, "Subject", SubjectModifier.class, SubjectModifier::new ),
- TITLE( NE_TYPE_ID_PERSON_TITLE, "Title", PersonTitleAnnotation.class, PersonTitleAnnotation::new ),
- EVENT( NE_TYPE_ID_GENERIC_EVENT, "Event", EventMention.class, EventMention::new ),
- ENTITY( NE_TYPE_ID_GENERIC_ENTITY, "Entity", EntityMention.class, EntityMention::new ),
- TIME( NE_TYPE_ID_TIME_MENTION, "Time", TimeMention.class, TimeAnnotation::new ),
- MODIFIER( NE_TYPE_ID_GENERIC_MODIFIER, "Modifier", Modifier.class, Modifier::new ),
- LAB_MODIFIER( NE_TYPE_ID_LAB_VALUE_MODIFIER, "LabModifier", LabValueModifier.class, LabValueModifier::new ),
- UNKNOWN( NE_TYPE_ID_UNKNOWN, "Unknown", IdentifiedAnnotation.class, IdentifiedAnnotation::new );
+ DRUG( NE_TYPE_ID_DRUG, "Drug", "Medication", MedicationMention.class, MedicationMention::new ),
+ DISORDER( NE_TYPE_ID_DISORDER, "Disorder", "Disease/Disorder", DiseaseDisorderMention.class, DiseaseDisorderMention::new ),
+ FINDING( NE_TYPE_ID_FINDING, "Finding", "Sign/Symptom", SignSymptomMention.class, SignSymptomMention::new ),
+ PROCEDURE( NE_TYPE_ID_PROCEDURE, "Procedure", "Procedure", ProcedureMention.class, ProcedureMention::new ),
+ ANATOMY( NE_TYPE_ID_ANATOMICAL_SITE, "Anatomy", "Anatomical Site", AnatomicalSiteMention.class, AnatomicalSiteMention::new ),
+ CLINICAL_ATTRIBUTE( NE_TYPE_ID_CLINICAL_ATTRIBUTE, "Attribute", "Clinical Attribute", SignSymptomMention.class, SignSymptomMention::new ),
+ DEVICE( NE_TYPE_ID_DEVICE, "Device", "Device", EntityMention.class, EntityMention::new ),
+ LAB( NE_TYPE_ID_LAB, "Lab", "Lab", LabMention.class, LabMention::new ),
+ PHENOMENON( NE_TYPE_ID_PHENOMENA, "Phenomenon", "Phenomenon", EventMention.class, EventMention::new ),
+ SUBJECT( NE_TYPE_ID_SUBJECT_MODIFIER, "Subject", "Subject", SubjectModifier.class, SubjectModifier::new ),
+ TITLE( NE_TYPE_ID_PERSON_TITLE, "Title", "Person Title", PersonTitleAnnotation.class, PersonTitleAnnotation::new ),
+ EVENT( NE_TYPE_ID_GENERIC_EVENT, "Event", "Event", EventMention.class, EventMention::new ),
+ ENTITY( NE_TYPE_ID_GENERIC_ENTITY, "Entity", "Entity", EntityMention.class, EntityMention::new ),
+ TIME( NE_TYPE_ID_TIME_MENTION, "Time", "Timex3", TimeMention.class, TimeAnnotation::new ),
+ MODIFIER( NE_TYPE_ID_GENERIC_MODIFIER, "Modifier", "Modifier", Modifier.class, Modifier::new ),
+ LAB_MODIFIER( NE_TYPE_ID_LAB_VALUE_MODIFIER, "LabModifier", "Lab Modifier", LabValueModifier.class, LabValueModifier::new ),
+ UNKNOWN( NE_TYPE_ID_UNKNOWN, "Unknown", "Unknown Semantic Group", IdentifiedAnnotation.class, IdentifiedAnnotation::new );
private final int _code;
private final String _name;
+ private final String _longName;
private final Class<? extends IdentifiedAnnotation> _clazz;
private final Function<JCas, ? extends IdentifiedAnnotation> _creator;
SemanticGroup( final int code, final String name,
+ final String longName,
final Class<? extends IdentifiedAnnotation> clazz,
final Function<JCas, ? extends IdentifiedAnnotation> creator ) {
_code = code;
_name = name;
+ _longName = name;
_clazz = clazz;
_creator = creator;
}
@@ -52,6 +55,10 @@ public enum SemanticGroup {
return _name;
}
+ public String getLongName() {
+ return _longName;
+ }
+
public Class<? extends IdentifiedAnnotation> getCtakesClass() {
return _clazz;
}
@@ -83,7 +90,7 @@ public enum SemanticGroup {
}
static private final class BestGrouper implements Comparator<SemanticGroup> {
- static private BestGrouper INSTANCE = new BestGrouper();
+ static private final BestGrouper INSTANCE = new BestGrouper();
public int compare( final SemanticGroup g1, final SemanticGroup g2 ) {
if ( g1 == SemanticGroup.UNKNOWN ) {
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticTui.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticTui.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticTui.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/annotation/SemanticTui.java Fri Sep 25 00:55:08 2020
@@ -156,7 +156,7 @@ public enum SemanticTui {
private final int _code;
private final String _name;
- private final SemanticGroup _group;
+ private SemanticGroup _group;
SemanticTui( final int code, final String name, final SemanticGroup group ) {
_code = code;
@@ -172,6 +172,16 @@ public enum SemanticTui {
return _name;
}
+ /**
+ * Allows a user to override the semantic group associated with a tui.
+ * This is useful when differentiating things like chemicals and drugs.
+ *
+ * @param group -
+ */
+ public void setGroup( final SemanticGroup group ) {
+ _group = group;
+ }
+
public SemanticGroup getGroup() {
return _group;
}
@@ -197,14 +207,16 @@ public enum SemanticTui {
}
static public SemanticTui getTui( final String semanticType ) {
+ // Attempt to match name ( e.g. "Cell" ).
final String toMatch = getMatchable( semanticType );
for ( SemanticTui tui : SemanticTui.values() ) {
if ( tui.getMatchType()
- .equals( toMatch ) ) {
+ .equals( toMatch ) ) {
return tui;
}
}
- return UNKNOWN;
+ // Attempt to match code ( e.g. "T001" ).
+ return getTuiFromCode( toMatch );
}
static public SemanticTui getTui( final int code ) {
@@ -219,7 +231,7 @@ public enum SemanticTui {
static public SemanticTui getTuiFromCode( final String tuiCode ) {
for ( SemanticTui tui : SemanticTui.values() ) {
if ( tui.name()
- .equals( tuiCode ) ) {
+ .equalsIgnoreCase( tuiCode ) ) {
return tui;
}
}
@@ -248,9 +260,31 @@ public enum SemanticTui {
return getTuiFromCode( umlsConcept.getTui() );
}
+ /**
+ * Allows a user to override the semantic group associated with a tui.
+ * This is useful when differentiating things like chemicals and drugs.
+ *
+ * @param tui -
+ * @param group -
+ */
+ static public void setGroup( final int tui, final String group ) {
+ getTui( tui ).setGroup( SemanticGroup.getGroup( group ) );
+ }
+
+ /**
+ * Allows a user to override the semantic group associated with a tui.
+ * This is useful when differentiating things like chemicals and drugs.
+ *
+ * @param type -
+ * @param group -
+ */
+ static public void setGroup( final String type, final String group ) {
+ getTui( type ).setGroup( SemanticGroup.getGroup( group ) );
+ }
+
static private String getMatchable( final String text ) {
return text.toLowerCase()
- .replaceAll( ",", "" );
+ .replaceAll( ",", "" );
}
}
Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java Fri Sep 25 00:55:08 2020
@@ -1,7 +1,7 @@
package org.apache.ctakes.dictionary.lookup2.util;
import java.util.Collection;
-import java.util.HashSet;
+import java.util.stream.Collectors;
/**
* Author: SPF
@@ -38,24 +38,16 @@ final public class TuiCodeUtil {
static public Collection<String> getIntAsTuis( final Collection<Integer> tuiCodes ) {
- final Collection<String> tuis = new HashSet<>( tuiCodes.size() );
- for ( Integer tuiCode : tuiCodes ) {
- tuis.add( getAsTui( tuiCode ) );
- }
- return tuis;
+ return tuiCodes.stream().map( TuiCodeUtil::getAsTui ).collect( Collectors.toSet() );
}
static public Collection<String> getStringAsTuis( final Collection<String> tuiNums ) {
- final Collection<String> tuis = new HashSet<>( tuiNums.size() );
- for ( String tuiNum : tuiNums ) {
- tuis.add( getAsTui( tuiNum ) );
- }
- return tuis;
+ return tuiNums.stream().map( TuiCodeUtil::getAsTui ).collect( Collectors.toSet() );
}
static public Integer getTuiCode( final String tui ) {
final String tuiText = getAsTui( tui );
- final String tuiNum = tuiText.substring( 1, tuiText.length() );
+ final String tuiNum = tuiText.substring( 1 );
try {
return Integer.parseInt( tuiNum );
} catch ( NumberFormatException nfE ) {
Modified: ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/XmiToPretty.piper
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/XmiToPretty.piper?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/XmiToPretty.piper (original)
+++ ctakes/trunk/ctakes-examples-res/src/main/resources/org/apache/ctakes/examples/pipeline/XmiToPretty.piper Fri Sep 25 00:55:08 2020
@@ -11,4 +11,4 @@ add pretty.plaintext.PrettyTextWriterFit
add property.plaintext.PropertyTextWriterFit
// Announce completion
-addLast util.FinishedLogger
+addLast util.log.FinishedLogger
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryBuilder.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryBuilder.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/DictionaryBuilder.java Fri Sep 25 00:55:08 2020
@@ -1,9 +1,14 @@
package org.apache.ctakes.gui.dictionary;
+import org.apache.ctakes.core.util.annotation.SemanticGroup;
+import org.apache.ctakes.core.util.annotation.SemanticTui;
import org.apache.ctakes.core.util.collection.CollectionMap;
import org.apache.ctakes.core.util.collection.HashSetMap;
-import org.apache.ctakes.gui.dictionary.umls.*;
+import org.apache.ctakes.gui.dictionary.umls.Concept;
+import org.apache.ctakes.gui.dictionary.umls.ConceptMapFactory;
+import org.apache.ctakes.gui.dictionary.umls.MrconsoParser;
+import org.apache.ctakes.gui.dictionary.umls.UmlsTermUtil;
import org.apache.ctakes.gui.dictionary.util.HsqlUtil;
import org.apache.ctakes.gui.dictionary.util.JdbcUtil;
import org.apache.ctakes.gui.dictionary.util.RareWordDbWriter;
@@ -45,7 +50,7 @@ final class DictionaryBuilder {
final Collection<String> wantedLanguages,
final Collection<String> wantedSources,
final Collection<String> wantedTargets,
- final Collection<Tui> wantedTuis ) {
+ final Collection<SemanticTui> wantedTuis ) {
// Set up the term utility
final UmlsTermUtil umlsTermUtil = new UmlsTermUtil( DEFAULT_DATA_DIR );
final Map<Long, Concept> conceptMap
@@ -59,7 +64,7 @@ final class DictionaryBuilder {
final Collection<String> wantedLanguages,
final Collection<String> wantedSources,
final Collection<String> wantedTargets,
- final Collection<Tui> wantedTuis ) {
+ final Collection<SemanticTui> wantedTuis ) {
LOGGER.info( "Parsing Concepts" );
// Create a map of Cuis to empty Concepts for all wanted Tuis and source vocabularies
final Map<Long, Concept> conceptMap
@@ -75,11 +80,14 @@ final class DictionaryBuilder {
return conceptMap;
}
- static private void removeWsdRarities( final Map<Long, Concept> conceptMap, final Collection<Tui> wantedTuis,
+ static private void removeWsdRarities( final Map<Long, Concept> conceptMap, final Collection<SemanticTui> wantedTuis,
final int wsdDivisor, final int anatMultiplier ) {
LOGGER.info( "Performing Poor man's WSD ..." );
- final Collection<Tui> wantedAnatTuis = new ArrayList<>( wantedTuis );
- wantedAnatTuis.retainAll( Arrays.asList( TuiTableModel.CTAKES_ANAT ) );
+ final EnumSet<SemanticTui> wantedAnatTuis = EnumSet.noneOf( SemanticTui.class );
+ Arrays.stream( SemanticTui.values() )
+ .filter( t -> t.getGroup() == SemanticGroup.ANATOMY )
+ .filter( wantedTuis::contains )
+ .forEach( wantedAnatTuis::add );
final CollectionMap<String, Concept, Set<Concept>> synonymCodeMap = new HashSetMap<>( 500000 );
for ( Concept concept : conceptMap.values() ) {
concept.cullExtensions();
@@ -129,17 +137,23 @@ final class DictionaryBuilder {
}
static private void removeAnatTexts( final Map<Long, Concept> conceptMap,
- final Collection<Tui> wantedTuis ) {
+ final Collection<SemanticTui> wantedTuis ) {
LOGGER.info( "Removing Non-Anatomy synonyms that are also Anatomy synonyms ..." );
- final Collection<Tui> wantedAnatTuis = new ArrayList<>( wantedTuis );
- wantedAnatTuis.retainAll( Arrays.asList( TuiTableModel.CTAKES_ANAT ) );
+ final EnumSet<SemanticTui> wantedAnatTuis = EnumSet.noneOf( SemanticTui.class );
+ Arrays.stream( SemanticTui.values() )
+ .filter( t -> t.getGroup() == SemanticGroup.ANATOMY )
+ .filter( wantedTuis::contains )
+ .forEach( wantedAnatTuis::add );
final Collection<String> anatTexts = conceptMap.values().stream()
- .filter( c -> wantedAnatTuis.containsAll( c.getTuis() ) )
- .map( Concept::getTexts )
- .flatMap( Collection::stream )
- .collect( Collectors.toSet() );
- final Collection<Tui> nonAnatTuis = new ArrayList<>( wantedTuis );
- nonAnatTuis.removeAll( Arrays.asList( TuiTableModel.CTAKES_ANAT ) );
+ .filter( c -> wantedAnatTuis.containsAll( c.getTuis() ) )
+ .map( Concept::getTexts )
+ .flatMap( Collection::stream )
+ .collect( Collectors.toSet() );
+ final EnumSet<SemanticTui> nonAnatTuis = EnumSet.noneOf( SemanticTui.class );
+ Arrays.stream( SemanticTui.values() )
+ .filter( t -> t.getGroup() != SemanticGroup.ANATOMY )
+ .filter( wantedTuis::contains )
+ .forEach( nonAnatTuis::add );
final Collection<Long> empties = new ArrayList<>();
int textCount = 0;
for ( Map.Entry<Long, Concept> entry : conceptMap.entrySet() ) {
@@ -159,14 +173,21 @@ final class DictionaryBuilder {
// TODO too much tui confusion in non-rxnorm drugs
- static private void removeUnwantedDrugs( final Map<Long, Concept> conceptMap, Collection<Tui> wantedTuis ) {
+ static private void removeUnwantedDrugs( final Map<Long, Concept> conceptMap,
+ final Collection<SemanticTui> wantedTuis ) {
LOGGER.info( "Removing Drug Concepts not in rxnorm ..." );
// remove concepts that have only drug tuis but are not in rxnorm
- final Collection<Tui> drugTuis = new ArrayList<>( wantedTuis );
- drugTuis.retainAll( Arrays.asList( TuiTableModel.CTAKES_DRUG ) );
+ final EnumSet<SemanticTui> drugTuis = EnumSet.noneOf( SemanticTui.class );
+ Arrays.stream( SemanticTui.values() )
+ .filter( t -> t.getGroup() == SemanticGroup.DRUG )
+ .filter( wantedTuis::contains )
+ .forEach( drugTuis::add );
// remove concepts that are in rxnorm but have non-drug tuis
- final Collection<Tui> nonDrugTuis = new ArrayList<>( wantedTuis );
- nonDrugTuis.removeAll( Arrays.asList( TuiTableModel.CTAKES_DRUG ) );
+ final EnumSet<SemanticTui> nonDrugTuis = EnumSet.noneOf( SemanticTui.class );
+ Arrays.stream( SemanticTui.values() )
+ .filter( t -> t.getGroup() != SemanticGroup.DRUG )
+ .filter( wantedTuis::contains )
+ .forEach( nonDrugTuis::add );
// if concept has drug tuis but is not in rxnorm || concept is in rxnorm but does not have drug tuis
final Collection<Long> empties = new ArrayList<>();
int textCount = 0;
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/MainPanel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/MainPanel.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/MainPanel.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/MainPanel.java Fri Sep 25 00:55:08 2020
@@ -1,5 +1,6 @@
package org.apache.ctakes.gui.dictionary;
+import org.apache.ctakes.core.util.annotation.SemanticTui;
import org.apache.ctakes.gui.component.DisablerPane;
import org.apache.ctakes.gui.component.FileChooserPanel;
import org.apache.ctakes.gui.component.LoggerPanel;
@@ -18,9 +19,7 @@ import java.awt.event.ActionListener;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashSet;
+import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -169,6 +168,36 @@ final class MainPanel extends JPanel {
} catch ( IOException ioE ) {
error( "Vocabulary Parse Error", ioE.getMessage() );
}
+ final File mrSab = new File( __umlsDirPath + "/META", "MRSAB.RRF" );
+ final String mrSabPath = mrSab.getPath();
+ final Map<String, String> sourceNames = new HashMap<>();
+ final Map<String, String> sourceVersions = new HashMap<>();
+ final Map<String, String> sourceCuiCounts = new HashMap<>();
+ LOGGER.info( "Parsing vocabulary names from " + mrSabPath );
+ try ( final BufferedReader reader = FileUtil.createReader( mrSabPath ) ) {
+ int lineCount = 0;
+ java.util.List<String> tokens = FileUtil.readBsvTokens( reader, mrSabPath );
+ while ( tokens != null ) {
+ lineCount++;
+ if ( tokens.size() > MrsabIndex.CFR._index ) {
+ final String sab = tokens.get( MrsabIndex.RSAB._index );
+ if ( sources.contains( sab ) ) {
+ sourceNames.put( sab, tokens.get( MrsabIndex.SON._index ) );
+ sourceVersions.put( sab, tokens.get( MrsabIndex.SVER._index ) );
+ sourceCuiCounts.put( sab, tokens.get( MrsabIndex.CFR._index ) );
+ }
+ }
+ if ( lineCount % 100000 == 0 ) {
+ LOGGER.info( "File Line " + lineCount + "\t Vocabularies " + sources.size() );
+ }
+ tokens = FileUtil.readBsvTokens( reader, mrConsoPath );
+ }
+ LOGGER.info( "Parsed " + sources.size() + " vocabulary names" );
+ _sourceModel.setSourceInfo( sourceNames, sourceVersions, sourceCuiCounts );
+ } catch ( IOException ioE ) {
+ error( "Vocabulary Parse Error", ioE.getMessage() );
+ }
+
DisablerPane.getInstance().setVisible( false );
frame.setCursor( Cursor.getDefaultCursor() );
}
@@ -193,13 +222,16 @@ final class MainPanel extends JPanel {
private final String __dictionaryName;
private final Collection<String> __wantedSources;
private final Collection<String> __wantedTargets;
- private final Collection<Tui> __wantedTuis;
+ private final Collection<SemanticTui> __wantedTuis;
private final Collection<String> __wantedLanguages;
- private DictionaryBuildRunner( final String umlsDirPath, final String ctakesDirPath, final String dictionaryName,
+ private DictionaryBuildRunner( final String umlsDirPath,
+ final String ctakesDirPath,
+ final String dictionaryName,
final Collection<String> wantedSources,
final Collection<String> wantedTargets,
- final Collection<Tui> wantedTuis, final Collection<String> wantedLangauges ) {
+ final Collection<SemanticTui> wantedTuis,
+ final Collection<String> wantedLangauges ) {
__umlsDirPath = umlsDirPath;
__ctakesDirPath = ctakesDirPath;
__dictionaryName = dictionaryName;
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/Concept.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/Concept.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/Concept.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/Concept.java Fri Sep 25 00:55:08 2020
@@ -1,6 +1,7 @@
package org.apache.ctakes.gui.dictionary.umls;
+import org.apache.ctakes.core.util.annotation.SemanticTui;
import org.apache.ctakes.core.util.collection.HashSetMap;
import org.apache.ctakes.gui.dictionary.util.TextTokenizer;
@@ -20,12 +21,12 @@ final public class Concept {
final private Map<String, Integer> _textCounts;
final private HashSetMap<String, String> _codes;
- final private Collection<Tui> _tuis;
+ final private Collection<SemanticTui> _tuis;
public Concept() {
_textCounts = new HashMap<>( 1 );
_codes = new HashSetMap<>( 0 );
- _tuis = EnumSet.noneOf( Tui.class );
+ _tuis = EnumSet.noneOf( SemanticTui.class );
}
public int addTexts( final Collection<String> texts ) {
@@ -123,15 +124,15 @@ final public class Concept {
return codes;
}
- public void addTui( final Tui tui ) {
+ public void addTui( final SemanticTui tui ) {
_tuis.add( tui );
}
- public Collection<Tui> getTuis() {
+ public Collection<SemanticTui> getTuis() {
return _tuis;
}
- public boolean hasTui( final Collection<Tui> tuis ) {
+ public boolean hasTui( final Collection<SemanticTui> tuis ) {
return _tuis.stream().anyMatch( tuis::contains );
}
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/ConceptMapFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/ConceptMapFactory.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/ConceptMapFactory.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/ConceptMapFactory.java Fri Sep 25 00:55:08 2020
@@ -1,6 +1,7 @@
package org.apache.ctakes.gui.dictionary.umls;
+import org.apache.ctakes.core.util.annotation.SemanticTui;
import org.apache.log4j.Logger;
import java.util.Collection;
@@ -21,7 +22,7 @@ final public class ConceptMapFactory {
static public Map<Long, Concept> createInitialConceptMap( final String umlsDirPath,
final Collection<String> wantedSources,
- final Collection<Tui> wantedTuis ) {
+ final Collection<SemanticTui> wantedTuis ) {
if ( wantedSources.isEmpty() ) {
LOGGER.warn( "No source vocabularies specified" );
return Collections.emptyMap();
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrconsoParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrconsoParser.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrconsoParser.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrconsoParser.java Fri Sep 25 00:55:08 2020
@@ -152,7 +152,7 @@ final public class MrconsoParser {
concept.setPreferredText( text );
}
// Get tokenized text
- final String tokenizedText = TextTokenizer.getTokenizedText( text );
+ final String tokenizedText = TextTokenizer.getTokenizedText( text.toLowerCase() );
if ( tokenizedText == null || tokenizedText.isEmpty()
|| !umlsTermUtil.isTextValid( tokenizedText )
|| DoseUtil.hasUnit( tokenizedText ) ) {
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrstyParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrstyParser.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrstyParser.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/MrstyParser.java Fri Sep 25 00:55:08 2020
@@ -1,6 +1,7 @@
package org.apache.ctakes.gui.dictionary.umls;
+import org.apache.ctakes.core.util.annotation.SemanticTui;
import org.apache.ctakes.gui.dictionary.util.FileUtil;
import org.apache.log4j.Logger;
@@ -28,14 +29,14 @@ final public class MrstyParser {
}
static public Map<Long, Concept> createConceptsForTuis( final String umlsPath,
- final Collection<Tui> wantedTuis ) {
+ final Collection<SemanticTui> wantedTuis ) {
final String mrstyPath = umlsPath + MRSTY_SUB_PATH;
LOGGER.info( "Compiling list of Cuis with wanted Tuis using " + mrstyPath );
long lineCount = 0;
final Map<Long, Concept> wantedConcepts = new HashMap<>();
- final Collection<Tui> usedTuis = EnumSet.noneOf( Tui.class );
- final Map<Tui, Long> tuiCodeCount = new EnumMap<>( Tui.class );
- for ( Tui tui : wantedTuis ) {
+ final Collection<SemanticTui> usedTuis = EnumSet.noneOf( SemanticTui.class );
+ final Map<SemanticTui, Long> tuiCodeCount = new EnumMap<>( SemanticTui.class );
+ for ( SemanticTui tui : wantedTuis ) {
tuiCodeCount.put( tui, 0L );
}
try ( final BufferedReader reader = FileUtil.createReader( mrstyPath ) ) {
@@ -43,7 +44,7 @@ final public class MrstyParser {
while ( tokens != null ) {
lineCount++;
if ( tokens.size() > TUI._index ) {
- final Tui tuiEnum = Tui.valueOf( tokens.get( TUI._index ) );
+ final SemanticTui tuiEnum = SemanticTui.valueOf( tokens.get( TUI._index ) );
if ( !wantedTuis.contains( tuiEnum ) ) {
tokens = FileUtil.readBsvTokens( reader, mrstyPath );
continue;
@@ -76,8 +77,8 @@ final public class MrstyParser {
LOGGER.info( "File Lines " + lineCount + "\t Cuis: " + counts );
if ( usedTuis.size() != wantedTuis.size() ) {
wantedTuis.removeAll( usedTuis );
- for ( Tui missingTui : wantedTuis ) {
- LOGGER.warn( "Could not find Cuis for Tui " + missingTui + " " + missingTui.getDescription() );
+ for ( SemanticTui missingTui : wantedTuis ) {
+ LOGGER.warn( "Could not find Cuis for Tui " + missingTui + " " + missingTui.getSemanticType() );
}
}
return wantedConcepts;
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/SourceTableModel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/SourceTableModel.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/SourceTableModel.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/SourceTableModel.java Fri Sep 25 00:55:08 2020
@@ -18,15 +18,21 @@ final public class SourceTableModel impl
static private final Logger LOGGER = Logger.getLogger( "SourceTableModel" );
- static private final String[] COLUMN_NAMES = { "Source", "Target", "Vocabulary" };
- static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, Boolean.class, String.class };
+ static private final String[] COLUMN_NAMES = { "Read Synonyms", "Record Codes", "Code", "Vocabulary", "Version",
+ "CUIs" };
+ static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, Boolean.class, String.class, String.class,
+ String.class, String.class };
- static private final String[] CTAKES_SOURCES = { "SNOMEDCT_US", "RXNORM" };
+ static private final String[] CTAKES_SOURCES = { "SNOMEDCT_US", "RXNORM", "MTH", "MSH", "LNC" };
+ static private final String[] CTAKES_TARGETS = { "SNOMEDCT_US", "RXNORM" };
private final EventListenerList _listenerList = new EventListenerList();
private final Collection<String> _wantedSources = new HashSet<>();
private final Collection<String> _wantedTargets = new HashSet<>();
private final List<String> _sources = new ArrayList<>();
+ private final Map<String, String> _sourceNames = new HashMap<>();
+ private final Map<String, String> _sourceVersions = new HashMap<>();
+ private final Map<String, String> _sourceCuiCounts = new HashMap<>();
public void setSources( final Collection<String> sources ) {
@@ -36,7 +42,19 @@ final public class SourceTableModel impl
_sources.addAll( sources );
Collections.sort( _sources );
_wantedSources.addAll( Arrays.asList( CTAKES_SOURCES ) );
- _wantedTargets.addAll( Arrays.asList( CTAKES_SOURCES ) );
+ _wantedTargets.addAll( Arrays.asList( CTAKES_TARGETS ) );
+ fireTableChanged( new TableModelEvent( this ) );
+ }
+
+ public void setSourceInfo( final Map<String, String> sourceNames,
+ final Map<String, String> sourceVersions,
+ final Map<String, String> sourceCuiCounts ) {
+ _sourceNames.clear();
+ _sourceVersions.clear();
+ _sourceCuiCounts.clear();
+ _sourceNames.putAll( sourceNames );
+ _sourceVersions.putAll( sourceVersions );
+ _sourceCuiCounts.putAll( sourceCuiCounts );
fireTableChanged( new TableModelEvent( this ) );
}
@@ -61,7 +79,7 @@ final public class SourceTableModel impl
*/
@Override
public int getColumnCount() {
- return 3;
+ return 6;
}
/**
@@ -101,6 +119,12 @@ final public class SourceTableModel impl
return isTargetEnabled( source );
case 2:
return source;
+ case 3:
+ return _sourceNames.getOrDefault( source, "" );
+ case 4:
+ return _sourceVersions.getOrDefault( source, "" );
+ case 5:
+ return _sourceCuiCounts.getOrDefault( source, "" );
}
return "ERROR";
}
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/TuiTableModel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/TuiTableModel.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/TuiTableModel.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/umls/TuiTableModel.java Fri Sep 25 00:55:08 2020
@@ -1,5 +1,7 @@
package org.apache.ctakes.gui.dictionary.umls;
+import org.apache.ctakes.core.util.annotation.SemanticGroup;
+import org.apache.ctakes.core.util.annotation.SemanticTui;
import org.apache.log4j.Logger;
import javax.swing.event.EventListenerList;
@@ -9,7 +11,8 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.EnumSet;
-import static org.apache.ctakes.gui.dictionary.umls.Tui.*;
+import static org.apache.ctakes.core.util.annotation.SemanticTui.*;
+
/**
* @author SPF , chip-nlp
@@ -20,28 +23,37 @@ final public class TuiTableModel impleme
static private final Logger LOGGER = Logger.getLogger( "TuiTableModel" );
- static public final Tui[] CTAKES_ANAT = { T021, T022, T023, T024, T025, T026, T029, T030 };
- static private final Tui[] CTAKES_DISO = { T019, T020, T037, T047, T048, T049, T050, T190, T191 };
- static private final Tui[] CTAKES_FIND = { T033, T034, T040, T041, T042, T043, T044, T045, T046, T056, T057, T184 };
- static private final Tui[] CTAKES_PROC = { T059, T060, T061 };
- static public final Tui[] CTAKES_DRUG = { T109, T110, T114, T115, T116, T118, T119, T121, T122, T123, T124,
- T125, T126, T127, T129, T130, T131, T195, T196, T197, T200, T203 };
+// static public final Tui[] CTAKES_ANAT = { T021, T022, T023, T024, T025, T026, T029, T030 };
+// static public final Tui[] CTAKES_DRUG = { T109, T110, T114, T115, T116, T118, T119, T121, T122, T123, T124,
+// T125, T126, T127, T129, T130, T131, T195, T196, T197, T200, T203 };
+
+ // Semantic Types that are in the normal ctakes semantic groups but are still not wanted. e.g. "Cell"
+ static private final Collection<SemanticTui> UNWANTED_TUIS
+ = EnumSet.of( T116, T087, T123, T118, T026, T043, T025, T103, T120, T104, T077, T049, T088, T065, T196,
+ T050, T018, T126, T168, T045, T028, T125, T078, T129, T055, T197, T170, T130, T119, T063,
+ T066, T041, T073, T044, T085, T114, T124, T086, T115, T109, T040, T042, T046, T039,
+ T192, T062, T075, T054, UNKNOWN );
- static private final String[] COLUMN_NAMES = { "Use", "TUI", "Semantic Type" };
- static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, String.class, String.class };
+ static private final String[] COLUMN_NAMES = { "Use", "TUI", "Semantic Type", "Semantic Group" };
+ static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, String.class, String.class, String.class };
private final EventListenerList _listenerList = new EventListenerList();
- private final Collection<Tui> _wantedTuis = EnumSet.noneOf( Tui.class );
+ private final Collection<SemanticTui> _wantedTuis = EnumSet.noneOf( SemanticTui.class );
public TuiTableModel() {
- _wantedTuis.addAll( Arrays.asList( CTAKES_ANAT ) );
- _wantedTuis.addAll( Arrays.asList( CTAKES_DISO ) );
- _wantedTuis.addAll( Arrays.asList( CTAKES_FIND ) );
- _wantedTuis.addAll( Arrays.asList( CTAKES_PROC ) );
- _wantedTuis.addAll( Arrays.asList( CTAKES_DRUG ) );
+ final EnumSet<SemanticGroup> wantedGroups
+ = EnumSet.of( SemanticGroup.ANATOMY,
+ SemanticGroup.DISORDER,
+ SemanticGroup.FINDING,
+ SemanticGroup.PROCEDURE,
+ SemanticGroup.DRUG );
+ Arrays.stream( SemanticTui.values() )
+ .filter( t -> !UNWANTED_TUIS.contains( t ) )
+ .filter( t -> wantedGroups.contains( t.getGroup() ) )
+ .forEach( _wantedTuis::add );
}
- public Collection<Tui> getWantedTuis() {
+ public Collection<SemanticTui> getWantedTuis() {
return _wantedTuis;
}
@@ -50,7 +62,7 @@ final public class TuiTableModel impleme
*/
@Override
public int getRowCount() {
- return Tui.values().length;
+ return SemanticTui.values().length;
}
/**
@@ -58,7 +70,7 @@ final public class TuiTableModel impleme
*/
@Override
public int getColumnCount() {
- return 3;
+ return 4;
}
/**
@@ -90,19 +102,21 @@ final public class TuiTableModel impleme
*/
@Override
public Object getValueAt( final int rowIndex, final int columnIndex ) {
- final Tui tui = Tui.values()[ rowIndex ];
+ final SemanticTui tui = SemanticTui.values()[ rowIndex ];
switch ( columnIndex ) {
case 0:
return isTuiEnabled( tui );
case 1:
return tui.name();
case 2:
- return tui.getDescription();
+ return tui.getSemanticType();
+ case 3:
+ return tui.getGroupName();
}
return "ERROR";
}
- private boolean isTuiEnabled( final Tui tui ) {
+ private boolean isTuiEnabled( final SemanticTui tui ) {
return _wantedTuis.contains( tui );
}
@@ -112,7 +126,7 @@ final public class TuiTableModel impleme
@Override
public void setValueAt( final Object aValue, final int rowIndex, final int columnIndex ) {
if ( aValue instanceof Boolean && columnIndex == 0 ) {
- final Tui tui = Tui.values()[ rowIndex ];
+ final SemanticTui tui = SemanticTui.values()[ rowIndex ];
if ( (Boolean)aValue ) {
_wantedTuis.add( tui );
} else {
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/JdbcUtil.java Fri Sep 25 00:55:08 2020
@@ -2,10 +2,7 @@ package org.apache.ctakes.gui.dictionary
import org.apache.log4j.Logger;
-import java.sql.Connection;
-import java.sql.Driver;
-import java.sql.DriverManager;
-import java.sql.SQLException;
+import java.sql.*;
/**
* Author: SPF
@@ -46,6 +43,7 @@ final public class JdbcUtil {
LOGGER.error( sqlE.getMessage() );
System.exit( 1 );
}
+ registerShutdownHook( connection );
return connection;
}
@@ -82,4 +80,26 @@ final public class JdbcUtil {
return sb.toString();
}
+ /**
+ * register a shutdown hook that will shut down the database, removing temporary and lock files.
+ *
+ * @param connection -
+ */
+ static private void registerShutdownHook( final Connection connection ) {
+ // Registers a shutdown hook for the Hsql instance so that it
+ // shuts down nicely and any temporary or lock files are cleaned up.
+ Runtime.getRuntime().addShutdownHook( new Thread( () -> {
+ try {
+ final Statement shutdown = connection.createStatement();
+ shutdown.execute( "SHUTDOWN" );
+ shutdown.close();
+ // The db is read-only, so there should be no need to roll back any transactions.
+ connection.clearWarnings();
+ connection.close();
+ } catch ( SQLException sqlE ) {
+ // ignore
+ }
+ } ) );
+ }
+
}
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordDbWriter.java Fri Sep 25 00:55:08 2020
@@ -1,7 +1,7 @@
package org.apache.ctakes.gui.dictionary.util;
+import org.apache.ctakes.core.util.annotation.SemanticTui;
import org.apache.ctakes.gui.dictionary.umls.Concept;
-import org.apache.ctakes.gui.dictionary.umls.Tui;
import org.apache.ctakes.gui.dictionary.umls.VocabularyStore;
import org.apache.log4j.Logger;
@@ -92,9 +92,9 @@ final public class RareWordDbWriter {
continue;
}
// write tui table
- for ( Tui tui : concept.getTuis() ) {
+ for ( SemanticTui tui : concept.getTuis() ) {
tuiStatement.setLong( CuiTermsField.CUI.__index, cui );
- tuiStatement.setInt( 2, tui.getIntValue() );
+ tuiStatement.setInt( 2, tui.getCode() );
tuiStatement.executeUpdate();
tuiTableCount = incrementCount( "Tui", tuiTableCount );
}
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/RareWordUtil.java Fri Sep 25 00:55:08 2020
@@ -93,7 +93,7 @@ final public class RareWordUtil {
break;
}
}
- return hasLetter && !BAD_POS_TERM_SET.contains( token );
+ return hasLetter && !BAD_POS_TERM_SET.contains( token.toLowerCase() );
}
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TextTokenizer.java Fri Sep 25 00:55:08 2020
@@ -94,7 +94,7 @@ final public class TextTokenizer {
static private boolean isPrefix( final String word ) {
final String prefixQ = word + "-";
- return PREFIX_SET.contains( prefixQ );
+ return PREFIX_SET.contains( prefixQ.toLowerCase() );
}
static private boolean isSuffix( final String word, final int startIndex ) {
@@ -106,11 +106,11 @@ final public class TextTokenizer {
return false;
}
final String suffixQ = "-" + nextCharTerm;
- return SUFFIX_SET.contains( suffixQ );
+ return SUFFIX_SET.contains( suffixQ.toLowerCase() );
}
static private boolean isOwnerApostrophe( final CharSequence word, final int startIndex ) {
- return word.length() == startIndex + 1 && word.charAt( startIndex ) == 's';
+ return word.length() == startIndex + 1 && (word.charAt( startIndex ) == 's' || word.charAt( startIndex ) == 'S');
}
static private boolean isNumberDecimal( final CharSequence word, final int startIndex ) {
@@ -179,7 +179,8 @@ final public class TextTokenizer {
if ( text.isEmpty() ) {
return text;
}
- final String[] splits = WHITESPACE.split( text.toLowerCase() );
+// final String[] splits = WHITESPACE.split( text.toLowerCase() );
+ final String[] splits = WHITESPACE.split( text );
if ( splits.length == 0 ) {
return "";
}
@@ -189,7 +190,7 @@ final public class TextTokenizer {
splits[ splits.length - 1 ] = lastSplit.substring( 0, lastSplit.length() - 1 );
}
return Arrays.stream( splits )
- .map( s -> getTokens( s, separateDigits ) )
+ .map( s -> getTokens( s, separateDigits ) )
.flatMap( Collection::stream )
.collect( Collectors.joining( " " ) );
}
Modified: ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java?rev=1881993&r1=1881992&r2=1881993&view=diff
==============================================================================
--- ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java (original)
+++ ctakes/trunk/ctakes-gui/src/main/java/org/apache/ctakes/gui/dictionary/util/TokenUtil.java Fri Sep 25 00:55:08 2020
@@ -1,8 +1,9 @@
package org.apache.ctakes.gui.dictionary.util;
-import java.util.ArrayList;
+import org.apache.ctakes.core.util.StringUtil;
+
+import java.util.Arrays;
import java.util.Collection;
-import java.util.Collections;
import java.util.List;
/**
@@ -28,23 +29,24 @@ final public class TokenUtil {
}
static private List<String> getSeparatedValueItems( final String line, final char separator ) {
- if ( line == null || line.trim().isEmpty() ) {
- return Collections.emptyList();
- }
- final List<String> tokens = new ArrayList<>();
- int startIndex = 0;
- int stopIndex = line.indexOf( separator );
- while ( stopIndex > 0 && stopIndex < line.length() ) {
- tokens.add( line.substring( startIndex, stopIndex ) );
- startIndex = stopIndex + 1;
- stopIndex = line.indexOf( separator, startIndex );
- }
- if ( startIndex < line.length() - 1 ) {
- tokens.add( line.substring( startIndex ) );
- } else {
- tokens.add( "" );
- }
- return tokens;
+ return Arrays.asList( StringUtil.fastSplit( line, separator ) );
+// if ( line == null || line.trim().isEmpty() ) {
+// return Collections.emptyList();
+// }
+// final List<String> tokens = new ArrayList<>();
+// int startIndex = 0;
+// int stopIndex = line.indexOf( separator );
+// while ( stopIndex > 0 && stopIndex < line.length() ) {
+// tokens.add( line.substring( startIndex, stopIndex ) );
+// startIndex = stopIndex + 1;
+// stopIndex = line.indexOf( separator, startIndex );
+// }
+// if ( startIndex < line.length() - 1 ) {
+// tokens.add( line.substring( startIndex ) );
+// } else {
+// tokens.add( "" );
+// }
+// return tokens;
}