You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2019/11/30 18:35:17 UTC
svn commit: r1870634 - in /ctakes/trunk:
ctakes-core-res/src/main/resources/org/apache/ctakes/core/list/
ctakes-core-res/src/main/resources/org/apache/ctakes/core/sections/
ctakes-core/src/main/java/org/apache/ctakes/core/ae/
ctakes-core/src/main/java/...
Author: seanfinan
Date: Sat Nov 30 18:35:17 2019
New Revision: 1870634
URL: http://svn.apache.org/viewvc?rev=1870634&view=rev
Log:
Cleanup, some minor improvements
Modified:
ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/list/DefaultListRegex.bsv
ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sections/DefaultSectionRegex.bsv
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListEntryNegator.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetectorAnnotatorBIO.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiLookupLister.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/PatientNoteCollector.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java
Modified: ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/list/DefaultListRegex.bsv
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/list/DefaultListRegex.bsv?rev=1870634&r1=1870633&r2=1870634&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/list/DefaultListRegex.bsv (original)
+++ ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/list/DefaultListRegex.bsv Sat Nov 30 18:35:17 2019
@@ -2,9 +2,9 @@ Numbered||(?:^[\t ]*[\d]{1,2}(?::|\.)[\t
Alpha Sentence||(?:^[\t ]*[A-Z](?::|\.)+\)?[\t ]+(?:[^\t\n\.]+(?:\.|\n))+\r?\n){2,}||^[\t ]*[A-Z](?::|\.)+\)?[\t ]+(?:[^\t\n\.]+(?:\.|\n))+\r?\n
// Name Value||(?:^[^\t\r\n]{2,50}:[\t ]+(?:[^\t\r\n:]+\r?\n)+){3,}||^[^\t\r\n]{2,50}:[\t ]+(?:[^\t\r\n:]+\r?\n)+
Name Value||(?:^[^\r\n:]{2,}:[\s]+[^\r\n]+\r?\n){2,}||^[^\r\n:]{2,80}:[\s]+[^\r\n]+\r?\n
-Multi Column||(?:^(?:[^\s:]+(?: [^\s:]+)*(?:\t+| {3,}))+(?:[^\s]+(?: [^\s]+)*)[\t ]*\r?\n){3,}||\r?\n
+// Multi Column||(?:^(?:[^\s:]+(?: [^\s:]+)*(?:\t+| {3,}))+(?:[^\s]+(?: [^\s]+)*)[\t ]*\r?\n){3,}||\r?\n
// Mixed Column||(?:^(?:[^\s:]+(?: [^\s:]+)*(?:\t+| {3,}))+(?:[^\s]+(?: [^\s]+)*)[\t ]*\r?\n[\t ]*(?:[^\s]+(?: [^\s]+)*)[\t ]*\r?\n){3,}||[\r\n][\t ]*(?:[^\s]+(?: [^\s]+)*)[\t ]*\r?\n
// Header||(?:^[^\t\r\n\.]+\.{3,}[^\t\r\n\.]+\r?\n){3,}||\r?\n
Dash||(?:^[\t ]*-{1,3}[\t ]+(?:(?:[^\t\r\n-]+-?)+\r?\n){1,3}){2,}||^[\t ]*-{1,3}[\t ]+(?:(?:[^\t\r\n-]+-?)+\r?\n)+
-Document Header||(?:^[^\t\r\n\.]+\.{3,}[^\t\r\n\.]+\r?\n){3,}||\r?\n
+// Document Header||(?:^[^\t\r\n\.]+\.{3,}[^\t\r\n\.]+\r?\n){3,}||\r?\n
Checkbox||(?:^(?:[^\r\n:]{2,80}:\r?\n)?(?:[\t ]*\[[XYN _]*\][^\r\n]+\r?\n)+)+||^(?:[^\r\n:]{2,80}:\r?\n)?(?:[\t ]*\[[XYN _]*\][^\r\n]+\r?\n)+
Modified: ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sections/DefaultSectionRegex.bsv
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sections/DefaultSectionRegex.bsv?rev=1870634&r1=1870633&r2=1870634&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sections/DefaultSectionRegex.bsv (original)
+++ ctakes/trunk/ctakes-core-res/src/main/resources/org/apache/ctakes/core/sections/DefaultSectionRegex.bsv Sat Nov 30 18:35:17 2019
@@ -1,7 +1,7 @@
History of Present Illness||^[\t ]*(?:(?:CC\/HPI:)|(?:S:)|(?:(?:HISTORY OF (?:THE )?(?:PRESENT |PHYSICAL )?ILLNESS)(?: \(HPI(?:, PROBLEM BY PROBLEM)?\))?[\t ]*:?))[\t ]*$
-Past Medical History||^[\t ]*(?:(?:HISTORY OF (?:THE )?PAST ILLNESS)|(?:PAST MEDICAL HISTORY))[\t ]*:?[\t ]*$
+Past Medical History||^[\t ]*(?:(?:PMHX?)|(?:HISTORY OF (?:THE )?PAST ILLNESS)|(?:PAST MEDICAL HISTORY))[\t ]*:?[\t ]*$
Chief Complaint||^[\t ]*(?:CHIEF|PRIMARY) COMPLAINTS?[\t ]*:?[\t ]*$
-Patient History||^[\t ]*(?:(?:(?:PERSONAL|PATIENT) (?:(?:AND )?SOCIAL )?HISTORY)|(?:(?:PSYCHO)?SOC(?:IAL)? HISTORY)|(?:HISTORY (?:OF )(?:OTHER )?SOCIAL (?:FUNCTIONs?|FACTORS?))|(?:PSO)|(?:P?SHX))[\t ]*:?[\t ]*$
+Patient History||^[\t ]*(?:(?:PERSONAL|PATIENT) (?:(?:AND )?SOCIAL )?HISTORY)|(?:(?:PSYCHO)?SOC(?:IAL)? HISTORY)|(?:HISTORY (?:OF )(?:OTHER )?SOCIAL (?:FUNCTIONs?|FACTORS?))|(?:PSO)|(?:P?SHX)|(?:HISTORY:)[\t ]*:?[\t ]*$
Review of Systems||^[\t ]*(?:(?:ROS:)|(?:(?:REVIEW (?:OF )?SYSTEMS?)|(?:SYSTEMS? REVIEW)[\t ]*:?))[\t ]*$
Family Medical History||^[\t ]*(?:FAMILY (?:MEDICAL )?HISTORY)|(?:HISTORY (?:OF )?FAMILY MEMBER DISEASES?)|(?:FAM HX)|FH|FMH|FMHX|FHX[\t ]*:?[\t ]*$
Medications||^[\t ]*(?:CURRENT )?MEDICATIONS?[\t ]*:?[\t ]*$
@@ -64,7 +64,7 @@ Hospital Course||^[\t ]*(?:BRIEF|HISTORY
Histology Summary||^[\t ]*HISTO(?:LOGY)? (?:TISSUE )?SUMMARY[\t ]*:?[\t ]*$
Addendum||^[\t ]*ADDEND(?:A|UM)[\t ]*:?[\t ]*$
Medications at Transfer||^[\t ]*MEDICATIONS?(?: AT)? TRANSFER[\t ]*:?[\t ]*$
-Findings||^[\t ]*(?:DIAGNOSTIC )?(?:INDICATIONS? ?\/? )?FINDINGS?(?: (?:AT )?SURGERY)?[\t ]*:?[\t ]*$
+Findings||^[\t ]*(?:(?:DIAGNOSTIC )?(?:INDICATIONS? ?\/? )?FINDINGS?(?: (?:AT )?SURGERY)?)|(?:INDICATIONS?:)[\t ]*:?[\t ]*$
Instructions||^[\t ]*INSTRUCTIONS?[\t ]*:?[\t ]*$
Current Antibiotics||^[\t ]*CURRENT ANTIBIOTICS?[\t ]*:?[\t ]*$
Ethanol Use||^[\t ]*(?:HISTORY (?:OF )?)?(?:ALCOHOL|ETHANOL|ETOH)(?: USE)?[\t ]*:?[\t ]*$
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java?rev=1870634&r1=1870633&r2=1870634&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListAnnotator.java Sat Nov 30 18:35:17 2019
@@ -125,7 +125,7 @@ final public class ListAnnotator extends
final Map<Pair<Integer>, ListType> uniqueListTypes = getUniqueListTypes( listTypes );
createLists( jcas, uniqueListTypes, paragraph.getCoveredText(), paragraph.getBegin() );
} catch ( StringIndexOutOfBoundsException oobE ) {
- // I'm not sure how this ever happens. Paragraph bounds from the dPheParagraphAnnotator are always valid.
+ // I'm not sure how this ever happens. Paragraph bounds from the ParagraphAnnotator are always valid.
// I have run ~1000 notes without problem, but one note in Seer causes problems. Ignore.
}
}
@@ -179,14 +179,12 @@ final public class ListAnnotator extends
if ( boundsJ.getValue1() >= boundsI.getValue1() && boundsJ.getValue1() <= boundsI.getValue2() ) {
removalTypeBounds.add( boundsJ );
if ( boundsJ.getValue2() > boundsI.getValue2() ) {
-// newTypeBounds.put( new Pair<>( boundsI.getValue1(), boundsJ.getValue2() ), boundsI );
// Add J as a second list
newTypeBounds.put( new Pair<>( boundsI.getValue2(), boundsJ.getValue2() ), boundsJ );
}
} else if ( boundsJ.getValue2() >= boundsI.getValue1() && boundsJ.getValue2() <= boundsI.getValue2() ) {
removalTypeBounds.add( boundsJ );
if ( boundsJ.getValue1() < boundsI.getValue1() ) {
-// newTypeBounds.put( new Pair<>( boundsJ.getValue1(), boundsI.getValue2() ), boundsI );
// Add J as a second list
newTypeBounds.put( new Pair<>( boundsJ.getValue1(), boundsI.getValue1() ), boundsJ );
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListEntryNegator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListEntryNegator.java?rev=1870634&r1=1870633&r2=1870634&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListEntryNegator.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListEntryNegator.java Sat Nov 30 18:35:17 2019
@@ -16,6 +16,7 @@ import org.apache.uima.jcas.JCas;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Comparator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -61,11 +62,11 @@ public class ListEntryNegator extends JC
if ( listEntries.isEmpty() ) {
return;
}
- listEntries.sort( ( a1, a2 ) -> a1.getBegin() - a2.getBegin() );
+ listEntries.sort( Comparator.comparingInt( AnnotationFS::getBegin ) );
final java.util.List<IdentifiedAnnotation> negatables = new ArrayList<>();
negatables.addAll( JCasUtil.selectCovered( jCas, DiseaseDisorderMention.class, list ) );
negatables.addAll( JCasUtil.selectCovered( jCas, SignSymptomMention.class, list ) );
- negatables.sort( ( a1, a2 ) -> a1.getBegin() - a2.getBegin() );
+ negatables.sort( Comparator.comparingInt( AnnotationFS::getBegin ) );
if ( negatables.isEmpty() ) {
return;
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java?rev=1870634&r1=1870633&r2=1870634&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/ListSentenceFixer.java Sat Nov 30 18:35:17 2019
@@ -9,6 +9,7 @@ import org.apache.uima.analysis_engine.A
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
import java.util.*;
import java.util.regex.Pattern;
@@ -28,6 +29,7 @@ final public class ListSentenceFixer ext
static private final Logger LOGGER = Logger.getLogger( "ListSentenceFixer" );
+ static private final Pattern WHITESPACE = Pattern.compile( "\\s+" );
/**
* Where Sentence annotations and List entry annotation ends overlap, Sentences are abbreviated.
@@ -50,7 +52,7 @@ final public class ListSentenceFixer ext
static private void adjustListEntrySentences( final JCas jCas ) {
final Collection<ListEntry> listEntries = JCasUtil.select( jCas, ListEntry.class );
final java.util.List<Sentence> allSentences = new ArrayList<>( JCasUtil.select( jCas, Sentence.class ) );
- allSentences.sort( ( s1, s2 ) -> s1.getBegin() - s2.getBegin() );
+ allSentences.sort( Comparator.comparingInt( Annotation::getBegin ) );
// gather map of sentences that cross boundaries of list entries
final Map<Sentence, Collection<Integer>> sentenceCrossBounds = new HashMap<>();
for ( ListEntry entry : listEntries ) {
@@ -91,7 +93,4 @@ final public class ListSentenceFixer ext
}
}
-
- static private final Pattern WHITESPACE = Pattern.compile( "\\s+" );
-
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetectorAnnotatorBIO.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetectorAnnotatorBIO.java?rev=1870634&r1=1870633&r2=1870634&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetectorAnnotatorBIO.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetectorAnnotatorBIO.java Sat Nov 30 18:35:17 2019
@@ -1,13 +1,6 @@
package org.apache.ctakes.core.ae;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Scanner;
-
+import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
@@ -34,6 +27,10 @@ import org.cleartk.ml.jar.DirectoryDataW
import org.cleartk.ml.jar.GenericJarClassifierFactory;
import org.cleartk.util.ViewUriUtil;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.*;
+
/**
* Prose sentence detector.
@@ -51,6 +48,14 @@ import org.cleartk.util.ViewUriUtil;
* @see org.apache.ctakes.core.ae.ParagraphSentenceFixer
* @see org.apache.ctakes.core.ae.SentenceDetector
*/
+@PipeBitInfo(
+ name = "Prose Sentence Detector",
+ description = "Sentence detector that uses B I O for determination. " +
+ "Useful for documents in which newlines may not indicate sentence boundaries.",
+ role = PipeBitInfo.Role.ANNOTATOR,
+ dependencies = PipeBitInfo.TypeProduct.SECTION,
+ products = PipeBitInfo.TypeProduct.SENTENCE
+)
public class SentenceDetectorAnnotatorBIO extends CleartkAnnotator<String>{
private Logger logger = Logger.getLogger(SentenceDetectorAnnotatorBIO.class);
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiLookupLister.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiLookupLister.java?rev=1870634&r1=1870633&r2=1870634&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiLookupLister.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiLookupLister.java Sat Nov 30 18:35:17 2019
@@ -40,7 +40,7 @@ public class CuiLookupLister extends Abs
final int sentenceBegin = entry.getKey().getBegin();
final int sentenceEnd = entry.getKey().getEnd();
for ( IdentifiedAnnotation annotation : entry.getValue() ) {
- if ( annotation.getBegin() == sentenceBegin && annotation.getEnd() == sentenceEnd ) {
+ if ( annotation.getBegin() >= sentenceBegin && annotation.getEnd() <= sentenceEnd ) {
for ( UmlsConcept umls : OntologyConceptUtil.getUmlsConcepts( annotation ) ) {
writer.write( umls.getCui() + '|' + umls.getTui() + '|' + annotation.getCoveredText() + '\n' );
}
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java?rev=1870634&r1=1870633&r2=1870634&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/jdbc/AbstractJdbcWriter.java Sat Nov 30 18:35:17 2019
@@ -66,6 +66,7 @@ abstract public class AbstractJdbcWriter
mandatory = false
)
private String _batchSize;
+// TODO Should batchSize be an int ? Are we getting an exception from ConfigurationParameter?
// Maximum row count for prepared statement batches
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java?rev=1870634&r1=1870633&r2=1870634&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/AbstractFileTreeReader.java Sat Nov 30 18:35:17 2019
@@ -246,6 +246,7 @@ abstract public class AbstractFileTreeRe
try {
_rootDir = FileLocator.getFile( _rootDirPath );
} catch ( FileNotFoundException fnfE ) {
+ LOGGER.error( "No Directory found at " + _rootDirPath );
throw new ResourceInitializationException( fnfE );
}
_validExtensions = createValidExtensions( _explicitExtensions );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/PatientNoteCollector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/PatientNoteCollector.java?rev=1870634&r1=1870633&r2=1870634&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/PatientNoteCollector.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/PatientNoteCollector.java Sat Nov 30 18:35:17 2019
@@ -31,8 +31,8 @@ final public class PatientNoteCollector
*/
@Override
public void process( final JCas jCas ) throws AnalysisEngineProcessException {
- LOGGER.info( "Caching Document " + PatientNoteStore.getInstance().getDefaultDocumentId( jCas )
- + " into Patient " + PatientNoteStore.getInstance().getDefaultPatientId( jCas ) + " ..." );
+ LOGGER.info( "Caching Document " + PatientNoteStore.getDefaultDocumentId( jCas )
+ + " into Patient " + PatientNoteStore.getDefaultPatientId( jCas ) + " ..." );
PatientNoteStore.getInstance().storeAllViews( jCas );
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java?rev=1870634&r1=1870633&r2=1870634&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineBuilder.java Sat Nov 30 18:35:17 2019
@@ -356,6 +356,10 @@ final public class PipelineBuilder {
return this;
}
+ public int getThreadCount() {
+ return _threadCount;
+ }
+
/**
* Initialize a pipeline that can be used repeatedly using {@link #run} and {@link #run(String)}.
* A pipeline can be extended between builds, but the full pipeline will be rebuilt on each call.