You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by se...@apache.org on 2019/09/23 00:43:52 UTC

svn commit: r1867363 - in /ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr: FileTreeReader.java JCasBuilder.java TextBySectionBuilder.java TextBySentenceBuilder.java

Author: seanfinan
Date: Mon Sep 23 00:43:52 2019
New Revision: 1867363

URL: http://svn.apache.org/viewvc?rev=1867363&view=rev
Log:
FileTreeReader : Attempt to read with dumb stream reader, no encoding specified.
JCasBuilder : A Builder and Facade to populate a JCas with any desired metadata and text.
TextBySectionBuilder : Can fill a JCas with Sections and Text.
TextBySentenceBuilder : Can fill a JCas with Sections, Sentences and Text.

Added:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySectionBuilder.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySentenceBuilder.java
Modified:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java?rev=1867363&r1=1867362&r2=1867363&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java Mon Sep 23 00:43:52 2019
@@ -67,6 +67,11 @@ final public class FileTreeReader extend
             LOGGER.warn( "Bad characters in " + file.getPath() );
          }
       }
+      try {
+         return readByStreamReader( file );
+      } catch ( IOException ioE ) {
+         // ignore for now, try to read by buffer.
+      }
       return readByBuffer( file );
    }
 
@@ -122,6 +127,29 @@ final public class FileTreeReader extend
          }
       } catch ( FileNotFoundException fnfE ) {
          throw new IOException( fnfE );
+      }
+      return sb.toString();
+   }
+
+   /**
+    * Reads file using a stream reader
+    *
+    * @param file file to read
+    * @return text in file
+    * @throws IOException if the file could not be read
+    */
+   private String readByStreamReader( final File file ) throws IOException {
+      final StringBuilder sb = new StringBuilder();
+      final CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput( CodingErrorAction.IGNORE );
+      try ( BufferedReader reader
+                  = new BufferedReader( new InputStreamReader( Files.newInputStream( file.toPath() ), decoder ) ) ) {
+         int i = reader.read();
+         while ( i != -1 ) {
+            sb.append( i );
+            i = reader.read();
+         }
+      } catch ( FileNotFoundException fnfE ) {
+         throw new IOException( fnfE );
       }
       return sb.toString();
    }

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java?rev=1867363&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/JCasBuilder.java Mon Sep 23 00:43:52 2019
@@ -0,0 +1,242 @@
+package org.apache.ctakes.core.cr;
+
+
+import org.apache.ctakes.core.note.NoteSpecs;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.core.util.SourceMetadataUtil;
+import org.apache.ctakes.typesystem.type.structured.*;
+import org.apache.uima.UIMAException;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+
+/**
+ * Facade to "easily" populate a JCas with creator, patient and note information.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/22/2019
+ */
+final public class JCasBuilder {
+
+   //   For compatibility with sql db : Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff]
+   static private final DateFormat DATE_FORMAT = new SimpleDateFormat( "yyyy-MM-dd hh:mm:ss" );
+
+   static private final String UNKNOWN_DATE = "UnknownDate";
+   static private final String UNKNOWN_GENDER = "UnknownGender";
+   static private final String UNKNOWN = "Unknown";
+
+
+   private String _institutionId = UNKNOWN;
+   private String _authorSpecialty = UNKNOWN;
+
+   private String _patientId = SourceMetadataUtil.UNKNOWN_PATIENT;
+   private long _patientNum = SourceMetadataUtil.UNKNOWN_PATIENT_NUM;
+
+   private String _firstName = UNKNOWN;
+   private String _middleName = UNKNOWN;
+   private String _lastName = UNKNOWN;
+
+   private String _birthday = UNKNOWN_DATE;
+   private String _deathday = UNKNOWN_DATE;
+   private String _gender = UNKNOWN_GENDER;
+
+
+   private String _instanceId = "";
+   //   private long _instanceNum = -1;
+   private String _encounterId = "";
+//   private int _encounterNum = -1;
+
+   private String _docId = DocumentIDAnnotationUtil.NO_DOCUMENT_ID;
+   private String _docIdPrefix = DocumentIDAnnotationUtil.NO_DOCUMENT_ID_PREFIX;
+   private String _docType = NoteSpecs.ID_NAME_CLINICAL_NOTE;
+   private String _docSubType = "";
+   private String _docStandard = "";
+   private int _docRevisionNum = 1;
+   private String _docTime = DATE_FORMAT.format( System.currentTimeMillis() );
+   private String _docPath = "";
+
+
+   private String _docText = "";
+
+
+   public JCasBuilder setInstitutionId( final String institutionId ) {
+      _institutionId = institutionId;
+      return this;
+   }
+
+   public JCasBuilder setAuthorSpecialty( final String authorSpecialty ) {
+      _authorSpecialty = authorSpecialty;
+      return this;
+   }
+
+   public JCasBuilder setPatientId( final String patientId ) {
+      _patientId = patientId;
+      return this;
+   }
+
+   public JCasBuilder setPatientNum( final long patientNum ) {
+      _patientNum = patientNum;
+      return this;
+   }
+
+   public JCasBuilder setFirstName( final String firstName ) {
+      _firstName = firstName;
+      return this;
+   }
+
+   public JCasBuilder setMiddleName( final String middleName ) {
+      _middleName = middleName;
+      return this;
+   }
+
+   public JCasBuilder setLastName( final String lastName ) {
+      _lastName = lastName;
+      return this;
+   }
+
+   public JCasBuilder setBirthDay( final String birthday ) {
+      _birthday = birthday;
+      return this;
+   }
+
+   public JCasBuilder setDeathday( final String deathday ) {
+      _deathday = deathday;
+      return this;
+   }
+
+   public JCasBuilder setGender( final String gender ) {
+      _gender = gender;
+      return this;
+   }
+
+   public JCasBuilder setInstanceId( final String instanceId ) {
+      _instanceId = instanceId;
+      return this;
+   }
+
+   public JCasBuilder setEncounterId( final String encounterId ) {
+      _encounterId = encounterId;
+      return this;
+   }
+
+//   public JCasBuilder setEncounterNum( final int encounterNum ) {
+//      _encounterNum = encounterNum;
+//      return this;
+//   }
+
+   public JCasBuilder setDocId( final String docId ) {
+      _docId = docId;
+      return this;
+   }
+
+   public JCasBuilder setDocIdPrefix( final String docIdPrefix ) {
+      _docIdPrefix = docIdPrefix;
+      return this;
+   }
+
+   public JCasBuilder setDocType( final String docType ) {
+      _docType = docType;
+      return this;
+   }
+
+   public JCasBuilder setDocSubType( final String docSubType ) {
+      _docSubType = docSubType;
+      return this;
+   }
+
+   public JCasBuilder setDocStandard( final String docStandard ) {
+      _docStandard = docStandard;
+      return this;
+   }
+
+   public JCasBuilder setDocRevisionNum( final int docRevisionNum ) {
+      _docRevisionNum = docRevisionNum;
+      return this;
+   }
+
+   public JCasBuilder setDocTime( final String docTime ) {
+      _docTime = docTime;
+      return this;
+   }
+
+   public JCasBuilder setDocPath( final String docPath ) {
+      _docPath = docPath;
+      return this;
+   }
+
+   public JCasBuilder setDocText( final String docText ) {
+      _docText = docText;
+      return this;
+   }
+
+   /**
+    * @return a jcas created from scratch and populated with data added in this builder.
+    * @throws UIMAException is the fresh jcas cannot be created.
+    */
+   public JCas build() throws UIMAException {
+      return populate( JCasFactory.createJCas() );
+   }
+
+   /**
+    * @param jCas ye olde ...
+    * @return a jcas  that has been reset (emptied of previous information) and populated with data added in this builder.
+    */
+   public JCas build( final JCas jCas ) {
+      jCas.reset();
+      return populate( jCas );
+   }
+
+   /**
+    * @param jCas ye olde ...
+    * @return the given jcas populated with the data added in this builder.
+    */
+   public JCas populate( final JCas jCas ) {
+      final Metadata metadata = SourceMetadataUtil.getOrCreateMetadata( jCas );
+
+      SourceMetadataUtil.setPatientIdentifier( jCas, _patientId );
+      metadata.setPatientID( _patientNum );
+
+      final Demographics demographics = new Demographics( jCas );
+      metadata.setDemographics( demographics );
+      demographics.setFirstName( _firstName );
+      demographics.setMiddleName( _middleName );
+      demographics.setLastName( _lastName );
+      demographics.setBirthDate( _birthday );
+      demographics.setDeathDate( _deathday );
+      demographics.setGender( _gender );
+
+      final SourceData sourceData = SourceMetadataUtil.getOrCreateSourceData( jCas );
+      sourceData.setSourceInstitution( _institutionId );
+      sourceData.setAuthorSpecialty( _authorSpecialty );
+
+      sourceData.setSourceEncounterId( _encounterId );
+      sourceData.setSourceInstanceId( _instanceId );
+
+      final DocumentID documentId = new DocumentID( jCas );
+      documentId.setDocumentID( _docId );
+      documentId.addToIndexes();
+
+      final DocumentIdPrefix documentIdPrefix = new DocumentIdPrefix( jCas );
+      documentIdPrefix.setDocumentIdPrefix( _docIdPrefix );
+      documentIdPrefix.addToIndexes();
+
+      sourceData.setNoteTypeCode( _docType );
+      sourceData.setNoteSubTypeCode( _docSubType );
+      sourceData.setDocumentStandard( _docStandard );
+
+      sourceData.setSourceRevisionDate( _docTime );
+      sourceData.setSourceRevisionNbr( _docRevisionNum );
+
+      final DocumentPath documentPath = new DocumentPath( jCas );
+      documentPath.setDocumentPath( _docPath );
+      documentPath.addToIndexes();
+
+      jCas.setDocumentText( _docText );
+
+      return jCas;
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySectionBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySectionBuilder.java?rev=1867363&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySectionBuilder.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySectionBuilder.java Mon Sep 23 00:43:52 2019
@@ -0,0 +1,68 @@
+package org.apache.ctakes.core.cr;
+
+
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.uima.UIMAException;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Builds a new cas or populates an existing jcas with sections, their names and text.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/22/2019
+ */
+final public class TextBySectionBuilder {
+
+   private final List<String> _sectionNames = new ArrayList<>();
+   private final List<String> _sectionTexts = new ArrayList<>();
+
+
+   /**
+    * @param name name of the section.  This will be placed in the text and used to create an id: name_#
+    * @param text text content of the section.
+    * @return this builder.
+    */
+   public TextBySectionBuilder addSection( final String name, final String text ) {
+      _sectionNames.add( name );
+      _sectionTexts.add( text );
+      return this;
+   }
+
+   /**
+    * @return a jcas created from scratch and populated with the sections added in this builder.
+    * @throws UIMAException is the fresh jcas cannot be created.
+    */
+   public JCas build() throws UIMAException {
+      return populate( JCasFactory.createJCas() );
+   }
+
+   /**
+    * @param jCas ye olde ...
+    * @return the given jcas populated with the sections added in this builder.
+    */
+   public JCas populate( final JCas jCas ) {
+      final StringBuilder sb = new StringBuilder();
+      final int sectionCount = _sectionNames.size();
+      int sectionNum = 1;
+      for ( int i = 0; i < sectionCount; i++ ) {
+         final String name = _sectionNames.get( i );
+         final Segment section = new Segment( jCas );
+         section.setTagText( name );
+         section.setPreferredText( name );
+         section.setId( name + '_' + sectionNum );
+         section.setBegin( sb.length() );
+         sb.append( name ).append( "\n" );
+         sb.append( _sectionTexts.get( i ) ).append( "\n\n" );
+         section.setEnd( sb.length() );
+         sectionNum++;
+      }
+      jCas.setDocumentText( sb.toString() );
+      return jCas;
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySentenceBuilder.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySentenceBuilder.java?rev=1867363&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySentenceBuilder.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/TextBySentenceBuilder.java Mon Sep 23 00:43:52 2019
@@ -0,0 +1,113 @@
+package org.apache.ctakes.core.cr;
+
+
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UIMAException;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Builds a new cas or populates an existing jcas with sections, their names and sentence text.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/22/2019
+ */
+final public class TextBySentenceBuilder {
+
+   static private final String DEFAULT_SEGMENT_ID = "SIMPLE_SEGMENT";
+
+   private final List<SentenceSection> _sentenceSections = new ArrayList<>();
+   private SentenceSection _currentSection;
+
+   /**
+    * @param name name of the section.  This will be placed in the text and used to create an id: name_#
+    * @return this builder.
+    */
+   public TextBySentenceBuilder startSection( final String name ) {
+      if ( _currentSection != null ) {
+         _sentenceSections.add( _currentSection );
+      }
+      _currentSection = new SentenceSection( name );
+      return this;
+   }
+
+   /**
+    * Places the given sentence text in the current section.
+    *
+    * @param text text content of the sentence.
+    * @return this builder.
+    */
+   public TextBySentenceBuilder addSentence( final String text ) {
+      if ( _currentSection == null ) {
+         _currentSection = new SentenceSection( DEFAULT_SEGMENT_ID );
+      }
+      _currentSection.addSentence( text );
+      return this;
+   }
+
+   /**
+    * @return a jcas created from scratch and populated with the sections and sentences added in this builder.
+    * @throws UIMAException is the fresh jcas cannot be created.
+    */
+   public JCas build() throws UIMAException {
+      return populate( JCasFactory.createJCas() );
+   }
+
+   /**
+    * @param jCas ye olde ...
+    * @return the given jcas populated with the sections and sentences added in this builder.
+    */
+   public JCas populate( final JCas jCas ) {
+      _sentenceSections.add( _currentSection );
+      final StringBuilder sb = new StringBuilder();
+      int sectionNum = 1;
+      int sentenceNum = 1;
+      for ( SentenceSection sentenceSection : _sentenceSections ) {
+         final String name = sentenceSection._name;
+         final Segment section = new Segment( jCas );
+         section.setTagText( name );
+         section.setPreferredText( name );
+         final String sectionId = name + '_' + sectionNum;
+         section.setId( sectionId );
+         section.setBegin( sb.length() );
+         sb.append( name ).append( "\n" );
+
+         for ( String sentenceText : sentenceSection._sentences ) {
+            final Sentence sentence = new Sentence( jCas );
+            sentence.setSegmentId( sectionId );
+            sentence.setSentenceNumber( sentenceNum );
+            sentence.setBegin( sb.length() );
+            sb.append( sentenceText ).append( "\n" );
+            sentence.setEnd( sb.length() );
+            sentenceNum++;
+         }
+         sb.append( "\n\n" );
+         section.setEnd( sb.length() );
+         sectionNum++;
+      }
+      jCas.setDocumentText( sb.toString() );
+      return jCas;
+   }
+
+   /**
+    * internal storage device.
+    */
+   static private final class SentenceSection {
+      private final String _name;
+      private final List<String> _sentences = new ArrayList<>();
+
+      private SentenceSection( final String name ) {
+         _name = name;
+      }
+
+      private void addSentence( final String text ) {
+         _sentences.add( text );
+      }
+   }
+
+}