You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2019/09/23 00:43:52 UTC
svn commit: r1867363 - in
/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr:
FileTreeReader.java JCasBuilder.java TextBySectionBuilder.java
TextBySentenceBuilder.java
Author: seanfinan
Date: Mon Sep 23 00:43:52 2019
New Revision: 1867363
URL: http://svn.apache.org/viewvc?rev=1867363&view=rev
Log:
FileTreeReader : Attempt to read with dumb stream reader, no encoding specified.
JCasBuilder : A Builder and Facade to populate a JCas with any desired metadata and text.
TextBySectionBuilder : Can fill a JCas with Sections and Text.
TextBySentenceBuilder : Can fill a JCas with Sections, Sentences and Text.
Added:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySectionBuilder.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySentenceBuilder.java
Modified:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java?rev=1867363&r1=1867362&r2=1867363&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java Mon Sep 23 00:43:52 2019
@@ -67,6 +67,11 @@ final public class FileTreeReader extend
LOGGER.warn( "Bad characters in " + file.getPath() );
}
}
+ try {
+ return readByStreamReader( file );
+ } catch ( IOException ioE ) {
+ // ignore for now, try to read by buffer.
+ }
return readByBuffer( file );
}
@@ -122,6 +127,29 @@ final public class FileTreeReader extend
}
} catch ( FileNotFoundException fnfE ) {
throw new IOException( fnfE );
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Reads file using a stream reader
+ *
+ * @param file file to read
+ * @return text in file
+ * @throws IOException if the file could not be read
+ */
+ private String readByStreamReader( final File file ) throws IOException {
+ final StringBuilder sb = new StringBuilder();
+ final CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput( CodingErrorAction.IGNORE );
+ try ( BufferedReader reader
+ = new BufferedReader( new InputStreamReader( Files.newInputStream( file.toPath() ), decoder ) ) ) {
+ int i = reader.read();
+ while ( i != -1 ) {
+ sb.append( i );
+ i = reader.read();
+ }
+ } catch ( FileNotFoundException fnfE ) {
+ throw new IOException( fnfE );
}
return sb.toString();
}
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java?rev=1867363&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java Mon Sep 23 00:43:52 2019
@@ -0,0 +1,242 @@
+package org.apache.ctakes.core.cr;
+
+
+import org.apache.ctakes.core.note.NoteSpecs;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.core.util.SourceMetadataUtil;
+import org.apache.ctakes.typesystem.type.structured.*;
+import org.apache.uima.UIMAException;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+
+/**
+ * Facade to "easily" populate a JCas with creator, patient and note information.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/22/2019
+ */
+final public class JCasBuilder {
+
+ // For compatibility with sql db : Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff]
+ static private final DateFormat DATE_FORMAT = new SimpleDateFormat( "yyyy-MM-dd hh:mm:ss" );
+
+ static private final String UNKNOWN_DATE = "UnknownDate";
+ static private final String UNKNOWN_GENDER = "UnknownGender";
+ static private final String UNKNOWN = "Unknown";
+
+
+ private String _institutionId = UNKNOWN;
+ private String _authorSpecialty = UNKNOWN;
+
+ private String _patientId = SourceMetadataUtil.UNKNOWN_PATIENT;
+ private long _patientNum = SourceMetadataUtil.UNKNOWN_PATIENT_NUM;
+
+ private String _firstName = UNKNOWN;
+ private String _middleName = UNKNOWN;
+ private String _lastName = UNKNOWN;
+
+ private String _birthday = UNKNOWN_DATE;
+ private String _deathday = UNKNOWN_DATE;
+ private String _gender = UNKNOWN_GENDER;
+
+
+ private String _instanceId = "";
+ // private long _instanceNum = -1;
+ private String _encounterId = "";
+// private int _encounterNum = -1;
+
+ private String _docId = DocumentIDAnnotationUtil.NO_DOCUMENT_ID;
+ private String _docIdPrefix = DocumentIDAnnotationUtil.NO_DOCUMENT_ID_PREFIX;
+ private String _docType = NoteSpecs.ID_NAME_CLINICAL_NOTE;
+ private String _docSubType = "";
+ private String _docStandard = "";
+ private int _docRevisionNum = 1;
+ private String _docTime = DATE_FORMAT.format( System.currentTimeMillis() );
+ private String _docPath = "";
+
+
+ private String _docText = "";
+
+
+ public JCasBuilder setInstitutionId( final String institutionId ) {
+ _institutionId = institutionId;
+ return this;
+ }
+
+ public JCasBuilder setAuthorSpecialty( final String authorSpecialty ) {
+ _authorSpecialty = authorSpecialty;
+ return this;
+ }
+
+ public JCasBuilder setPatientId( final String patientId ) {
+ _patientId = patientId;
+ return this;
+ }
+
+ public JCasBuilder setPatientNum( final long patientNum ) {
+ _patientNum = patientNum;
+ return this;
+ }
+
+ public JCasBuilder setFirstName( final String firstName ) {
+ _firstName = firstName;
+ return this;
+ }
+
+ public JCasBuilder setMiddleName( final String middleName ) {
+ _middleName = middleName;
+ return this;
+ }
+
+ public JCasBuilder setLastName( final String lastName ) {
+ _lastName = lastName;
+ return this;
+ }
+
+ public JCasBuilder setBirthDay( final String birthday ) {
+ _birthday = birthday;
+ return this;
+ }
+
+ public JCasBuilder setDeathday( final String deathday ) {
+ _deathday = deathday;
+ return this;
+ }
+
+ public JCasBuilder setGender( final String gender ) {
+ _gender = gender;
+ return this;
+ }
+
+ public JCasBuilder setInstanceId( final String instanceId ) {
+ _instanceId = instanceId;
+ return this;
+ }
+
+ public JCasBuilder setEncounterId( final String encounterId ) {
+ _encounterId = encounterId;
+ return this;
+ }
+
+// public JCasBuilder setEncounterNum( final int encounterNum ) {
+// _encounterNum = encounterNum;
+// return this;
+// }
+
+ public JCasBuilder setDocId( final String docId ) {
+ _docId = docId;
+ return this;
+ }
+
+ public JCasBuilder setDocIdPrefix( final String docIdPrefix ) {
+ _docIdPrefix = docIdPrefix;
+ return this;
+ }
+
+ public JCasBuilder setDocType( final String docType ) {
+ _docType = docType;
+ return this;
+ }
+
+ public JCasBuilder setDocSubType( final String docSubType ) {
+ _docSubType = docSubType;
+ return this;
+ }
+
+ public JCasBuilder setDocStandard( final String docStandard ) {
+ _docStandard = docStandard;
+ return this;
+ }
+
+ public JCasBuilder setDocRevisionNum( final int docRevisionNum ) {
+ _docRevisionNum = docRevisionNum;
+ return this;
+ }
+
+ public JCasBuilder setDocTime( final String docTime ) {
+ _docTime = docTime;
+ return this;
+ }
+
+ public JCasBuilder setDocPath( final String docPath ) {
+ _docPath = docPath;
+ return this;
+ }
+
+ public JCasBuilder setDocText( final String docText ) {
+ _docText = docText;
+ return this;
+ }
+
+ /**
+ * @return a jcas created from scratch and populated with data added in this builder.
+ * @throws UIMAException is the fresh jcas cannot be created.
+ */
+ public JCas build() throws UIMAException {
+ return populate( JCasFactory.createJCas() );
+ }
+
+ /**
+ * @param jCas ye olde ...
+ * @return a jcas that has been reset (emptied of previous information) and populated with data added in this builder.
+ */
+ public JCas build( final JCas jCas ) {
+ jCas.reset();
+ return populate( jCas );
+ }
+
+ /**
+ * @param jCas ye olde ...
+ * @return the given jcas populated with the data added in this builder.
+ */
+ public JCas populate( final JCas jCas ) {
+ final Metadata metadata = SourceMetadataUtil.getOrCreateMetadata( jCas );
+
+ SourceMetadataUtil.setPatientIdentifier( jCas, _patientId );
+ metadata.setPatientID( _patientNum );
+
+ final Demographics demographics = new Demographics( jCas );
+ metadata.setDemographics( demographics );
+ demographics.setFirstName( _firstName );
+ demographics.setMiddleName( _middleName );
+ demographics.setLastName( _lastName );
+ demographics.setBirthDate( _birthday );
+ demographics.setDeathDate( _deathday );
+ demographics.setGender( _gender );
+
+ final SourceData sourceData = SourceMetadataUtil.getOrCreateSourceData( jCas );
+ sourceData.setSourceInstitution( _institutionId );
+ sourceData.setAuthorSpecialty( _authorSpecialty );
+
+ sourceData.setSourceEncounterId( _encounterId );
+ sourceData.setSourceInstanceId( _instanceId );
+
+ final DocumentID documentId = new DocumentID( jCas );
+ documentId.setDocumentID( _docId );
+ documentId.addToIndexes();
+
+ final DocumentIdPrefix documentIdPrefix = new DocumentIdPrefix( jCas );
+ documentIdPrefix.setDocumentIdPrefix( _docIdPrefix );
+ documentIdPrefix.addToIndexes();
+
+ sourceData.setNoteTypeCode( _docType );
+ sourceData.setNoteSubTypeCode( _docSubType );
+ sourceData.setDocumentStandard( _docStandard );
+
+ sourceData.setSourceRevisionDate( _docTime );
+ sourceData.setSourceRevisionNbr( _docRevisionNum );
+
+ final DocumentPath documentPath = new DocumentPath( jCas );
+ documentPath.setDocumentPath( _docPath );
+ documentPath.addToIndexes();
+
+ jCas.setDocumentText( _docText );
+
+ return jCas;
+ }
+
+}
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySectionBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySectionBuilder.java?rev=1867363&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySectionBuilder.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySectionBuilder.java Mon Sep 23 00:43:52 2019
@@ -0,0 +1,68 @@
+package org.apache.ctakes.core.cr;
+
+
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.uima.UIMAException;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Builds a new cas or populates an existing jcas with sections, their names and text.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/22/2019
+ */
+final public class TextBySectionBuilder {
+
+ private final List<String> _sectionNames = new ArrayList<>();
+ private final List<String> _sectionTexts = new ArrayList<>();
+
+
+ /**
+ * @param name name of the section. This will be placed in the text and used to create an id: name_#
+ * @param text text content of the section.
+ * @return this builder.
+ */
+ public TextBySectionBuilder addSection( final String name, final String text ) {
+ _sectionNames.add( name );
+ _sectionTexts.add( text );
+ return this;
+ }
+
+ /**
+ * @return a jcas created from scratch and populated with the sections added in this builder.
+ * @throws UIMAException is the fresh jcas cannot be created.
+ */
+ public JCas build() throws UIMAException {
+ return populate( JCasFactory.createJCas() );
+ }
+
+ /**
+ * @param jCas ye olde ...
+ * @return the given jcas populated with the sections added in this builder.
+ */
+ public JCas populate( final JCas jCas ) {
+ final StringBuilder sb = new StringBuilder();
+ final int sectionCount = _sectionNames.size();
+ int sectionNum = 1;
+ for ( int i = 0; i < sectionCount; i++ ) {
+ final String name = _sectionNames.get( i );
+ final Segment section = new Segment( jCas );
+ section.setTagText( name );
+ section.setPreferredText( name );
+ section.setId( name + '_' + sectionNum );
+ section.setBegin( sb.length() );
+ sb.append( name ).append( "\n" );
+ sb.append( _sectionTexts.get( i ) ).append( "\n\n" );
+ section.setEnd( sb.length() );
+ sectionNum++;
+ }
+ jCas.setDocumentText( sb.toString() );
+ return jCas;
+ }
+
+}
Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySentenceBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySentenceBuilder.java?rev=1867363&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySentenceBuilder.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySentenceBuilder.java Mon Sep 23 00:43:52 2019
@@ -0,0 +1,113 @@
+package org.apache.ctakes.core.cr;
+
+
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UIMAException;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Builds a new cas or populates an existing jcas with sections, their names and sentence text.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/22/2019
+ */
+final public class TextBySentenceBuilder {
+
+ static private final String DEFAULT_SEGMENT_ID = "SIMPLE_SEGMENT";
+
+ private final List<SentenceSection> _sentenceSections = new ArrayList<>();
+ private SentenceSection _currentSection;
+
+ /**
+ * @param name name of the section. This will be placed in the text and used to create an id: name_#
+ * @return this builder.
+ */
+ public TextBySentenceBuilder startSection( final String name ) {
+ if ( _currentSection != null ) {
+ _sentenceSections.add( _currentSection );
+ }
+ _currentSection = new SentenceSection( name );
+ return this;
+ }
+
+ /**
+ * Places the given sentence text in the current section.
+ *
+ * @param text text content of the sentence.
+ * @return this builder.
+ */
+ public TextBySentenceBuilder addSentence( final String text ) {
+ if ( _currentSection == null ) {
+ _currentSection = new SentenceSection( DEFAULT_SEGMENT_ID );
+ }
+ _currentSection.addSentence( text );
+ return this;
+ }
+
+ /**
+ * @return a jcas created from scratch and populated with the sections and sentences added in this builder.
+ * @throws UIMAException is the fresh jcas cannot be created.
+ */
+ public JCas build() throws UIMAException {
+ return populate( JCasFactory.createJCas() );
+ }
+
+ /**
+ * @param jCas ye olde ...
+ * @return the given jcas populated with the sections and sentences added in this builder.
+ */
+ public JCas populate( final JCas jCas ) {
+ _sentenceSections.add( _currentSection );
+ final StringBuilder sb = new StringBuilder();
+ int sectionNum = 1;
+ int sentenceNum = 1;
+ for ( SentenceSection sentenceSection : _sentenceSections ) {
+ final String name = sentenceSection._name;
+ final Segment section = new Segment( jCas );
+ section.setTagText( name );
+ section.setPreferredText( name );
+ final String sectionId = name + '_' + sectionNum;
+ section.setId( sectionId );
+ section.setBegin( sb.length() );
+ sb.append( name ).append( "\n" );
+
+ for ( String sentenceText : sentenceSection._sentences ) {
+ final Sentence sentence = new Sentence( jCas );
+ sentence.setSegmentId( sectionId );
+ sentence.setSentenceNumber( sentenceNum );
+ sentence.setBegin( sb.length() );
+ sb.append( sentenceText ).append( "\n" );
+ sentence.setEnd( sb.length() );
+ sentenceNum++;
+ }
+ sb.append( "\n\n" );
+ section.setEnd( sb.length() );
+ sectionNum++;
+ }
+ jCas.setDocumentText( sb.toString() );
+ return jCas;
+ }
+
+ /**
+ * internal storage device.
+ */
+ static private final class SentenceSection {
+ private final String _name;
+ private final List<String> _sentences = new ArrayList<>();
+
+ private SentenceSection( final String name ) {
+ _name = name;
+ }
+
+ private void addSentence( final String text ) {
+ _sentences.add( text );
+ }
+ }
+
+}