You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2013/05/30 23:36:21 UTC

svn commit: r1488020 - in /ctakes/sandbox/ctakes-sectionizer/src: main/java/org/apache/ctakes/core/ae/ main/resources/org/apache/ctakes/core/sections/ test/java/org/apache/ctakes/core/ae/

Author: chenpei
Date: Thu May 30 21:36:21 2013
New Revision: 1488020

URL: http://svn.apache.org/r1488020
Log:
CTAKES-200 Updated HL7 sectionizer and mappings that used to be in the SectionSegmentAnnotator template.xml.
fixed to add the correct begin and end span of the segment.

Modified:
    ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java
    ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt
    ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java

Modified: ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java?rev=1488020&r1=1488019&r2=1488020&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java (original)
+++ ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java Thu May 30 21:36:21 2013
@@ -23,7 +23,10 @@ import java.io.BufferedReader;
 import java.io.InputStreamReader;
 import java.net.URI;
 import java.net.URL;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -48,8 +51,6 @@ public class CDASegmentAnnotator extends
 	Logger logger = Logger.getLogger(this.getClass());
 	protected static HashMap<String, Pattern> patterns = new HashMap<String, Pattern>();
 	protected static final String DEFAULT_SECTION_FILE_NAME = "org/apache/ctakes/core/sections/ccda_sections.txt";
-	// Field seperator assumes the sections mapping file is comma delimited
-	// unlikely to change.
 	public static final String PARAM_FIELD_SEPERATOR = ",";
 	public static final String PARAM_COMMENT = "#";
 	public static final String PARAM_SECTIONS_FILE = "sections_file";
@@ -109,6 +110,8 @@ public class CDASegmentAnnotator extends
 	private static Pattern buildPattern(String[] line) {
 		StringBuffer sb = new StringBuffer();
 		for (int i = 1; i < line.length; i++) {
+			// Build the RegEx pattern for each comma delimited header name
+			// Suffixed with a aggregator pipe
 			sb.append(line[i].trim());
 			if (i != line.length - 1) {
 				sb.append("|");
@@ -122,8 +125,9 @@ public class CDASegmentAnnotator extends
 		String text = jCas.getDocumentText();
 		if (text == null) {
 			String docId = DocumentIDAnnotationUtil.getDocumentID(jCas);
-			logger.info("text is null for docId=" + docId, null);
+			logger.info("text is null for docId=" + docId);
 		} else {
+			ArrayList<Segment> sorted_segments = new ArrayList<Segment>();
 			for (String id : patterns.keySet()) {
 				Pattern p = patterns.get(id);
 				// System.out.println("Pattern" + p);
@@ -131,12 +135,39 @@ public class CDASegmentAnnotator extends
 				while (m.find()) {
 					Segment segment = new Segment(jCas);
 					segment.setBegin(m.start());
-					// TODO: Need to figure out the end of the section
 					segment.setEnd(m.end());
 					segment.setId(id);
-					segment.addToIndexes();
+					sorted_segments.add(segment);
 				}
 			}
+			// TODO: this is kinda redundant, but needed the sections in sorted
+			// Order to determine the end of section which is assumed to be the
+			// beginning of the next section
+			Collections.sort(sorted_segments, new Comparator<Segment>() {
+				public int compare(Segment s1, Segment s2) {
+					return s1.getBegin() - (s2.getBegin());
+				}
+			});
+			int index = 0;
+			for (Segment s : sorted_segments) {
+				int prevEnd = s.getEnd();
+				int nextBegin = text.length();
+				if (index > 0) {
+					//handle case for first section
+					sorted_segments.get(index - 1).getEnd();
+				}
+				if (index + 1 < sorted_segments.size()) {
+					//handle case for last section
+					nextBegin = sorted_segments.get(index + 1).getBegin();
+				}
+				Segment segment = new Segment(jCas);
+				segment.setBegin(prevEnd);
+				segment.setEnd(nextBegin);
+				segment.setId(s.getId());
+				segment.addToIndexes();
+				index++;
+			}
 		}
 	}
+
 }

Modified: ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt?rev=1488020&r1=1488019&r2=1488020&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt (original)
+++ ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt Thu May 30 21:36:21 2013
@@ -4,31 +4,42 @@
 # http://bluebuttonplus.org/healthrecords.html
 # http://cdatools.org/infocenter/index.jsp
 # The format is as follows:
-# HL7 template id, LOINC Section Code, n list of header names
+# HL7 template id,LOINC Section Code,n list of header names
 # Custom ones can be added to the below mapping file
-# By Default, they are case insenstive and spaces trimmed. 
+# By Default,they are case insenstive and spaces trimmed. 
 
-2.16.840.1.113883.10.20.22.1.1, 34133-9, Header, Patient information and demographics
-2.16.840.1.113883.10.20.22.2.6.1, 48765-2, Allergies, Adverse Reactions, Alerts
-2.16.840.1.113883.10.20.22.2.22.1, 46240-8, History of encounters, Encounters,Surgeries, ED visits
-2.16.840.1.113883.10.20.22.2.2.1, 11369-6, History of immunizations, Immunizations,Immunizations and vaccines
-2.16.840.1.113883.10.20.22.2.1.1, 10160-0, HISTORY OF MEDICATION USE, Medications
-2.16.840.1.113883.10.20.22.2.10, 18776-5, Treatment plan, Care Plan
-2.16.840.1.113883.10.20.22.2.11.1, 10183-2, HOSPITAL DISCHARGE MEDICATIONS, Discharge Medications
-1.3.6.1.4.1.19376.1.5.3.1.3.1, 42349-1, Reason for Referral
-2.16.840.1.113883.10.20.22.2.5.1, 11450-4, PROBLEMS, Problem List, Concerns, complaints, observations
-2.16.840.1.113883.10.20.22.2.7.1, 47519-4, Procedures,	History of procedures
-2.16.840.1.113883.10.20.22.2.14, 47420-5, FUNCTIONAL STATUS, Functional and Cognitive Status, impairments
-2.16.840.1.113883.10.20.22.2.3.1, 30954-2, Results, laboratory tests, LABORATORY INFORMATION
-2.16.840.1.113883.10.20.22.2.17, 29762-2, Social History, Observations like smoking, drinking
-2.16.840.1.113883.10.20.22.2.4.1, 8716-3, Vital Signs,height, weight, blood pressure
-2.16.840.1.113883.10.20.22.2.41, 8653-8, HOSPITAL DISCHARGE INSTRUCTIONS, Discharge Instructions,	Written discharge instructions
-
-2.16.840.1.113883.10.20.22.2.15, 10157-6, Family History
-1.3.6.1.4.1.19376.1.5.3.1.1.13.2.1, 10154-3, CHIEF COMPLAINT
-2.16.840.1.113883.10.20.22.2.37,55109-3, Complications
-2.16.840.1.113883.10.20.22.2.20, 11348-0, HISTORY OF PAST ILLNESS
-1.3.6.1.4.1.19376.1.5.3.1.3.4, 10164-2, HISTORY OF PRESENT ILLNESS
-2.16.840.1.113883.10.20.2.5, 10210-3, GENERAL STATUS
-2.16.840.1.113883.10.20.22.2.24, 11535-2, Hospital Discharge Diagnosis
-2.16.840.1.113883.10.20.22.2.16, 11493-4, Hospital Discharge Studies Summary
\ No newline at end of file
+2.16.840.1.113883.10.20.22.2.21,42348-3,Advance Directives
+2.16.840.1.113883.10.20.22.2.6.1,48765-2,Allergies,Adverse Reactions,allergy
+2.16.840.1.113883.10.20.22.2.25,59774-0,Anesthesia Section
+2.16.840.1.113883.10.20.22.2.9,51847-2,ASSESSMENT AND PLAN
+2.16.840.1.113883.10.20.22.2.8,51848-0,Assessments 
+2.16.840.1.113883.10.20.22.2.13,46239-0,Chief Complaint and Reason for Visit
+1.3.6.1.4.1.19376.1.5.3.1.1.13.2.1,10154-3,CHIEF COMPLAINT,admit diagnosis,principal discharge diagnosis,principal diagnosis,principal diagnoses,secondary diagnosis,other medical issues considered at this time 
+2.16.840.1.113883.10.20.22.2.37,55109-3,Complications
+1.3.6.1.4.1.19376.1.5.3.1.3.33,42344-2,Discharge Diet
+2.16.840.1.113883.10.20.22.2.22.1,46240-8,Encounters,History of encounters,Surgeries,ED visits
+2.16.840.1.113883.10.20.22.2.15,10157-6,Family History
+2.16.840.1.113883.10.20.22.2.14,47420-5,FUNCTIONAL STATUS,Functional and Cognitive Status,impairments
+2.16.840.1.113883.10.20.2.5,10210-3,GENERAL STATUS,CURRENT HEALTH STATUS
+2.16.840.1.113883.10.20.22.1.1,34133-9,Header,Patient information and demographics,IDENTIFYING DATA,identification,record
+2.16.840.1.113883.10.20.22.2.20,11348-0,HISTORY OF PAST ILLNESS,PAST MEDICAL HISTORY
+1.3.6.1.4.1.19376.1.5.3.1.3.4,10164-2,HISTORY OF PRESENT ILLNESS,brief history of physical illness,history of present illness,history of the present illness
+2.16.840.1.113883.10.20.22.2.2.1,11369-6,History of immunizations,Immunizations,Immunizations and vaccines
+2.16.840.1.113883.10.20.22.2.1.1,10160-0,HISTORY OF MEDICATION USE,Medications,current medications
+2.16.840.1.113883.10.20.22.2.43,46241-6,HOSPITAL ADMISSION DX,rx on admit
+2.16.840.1.113883.10.20.22.2.41,8653-8,HOSPITAL DISCHARGE INSTRUCTIONS,Discharge Instructions,	Written discharge instructions
+2.16.840.1.113883.10.20.22.2.24,11535-2,Hospital Discharge Diagnosis,discharge diagnosis,FINAL DIAGNOSIS
+2.16.840.1.113883.10.20.22.2.16,11493-4,Hospital Discharge Studies Summary
+2.16.840.1.113883.10.20.22.2.45,69730-0,Instructions 
+2.16.840.1.113883.10.20.22.2.10,18776-5,Treatment plan,Care Plan
+2.16.840.1.113883.10.20.22.2.11.1,10183-2,HOSPITAL DISCHARGE MEDICATIONS,Discharge Medications
+1.3.6.1.4.1.19376.1.5.3.1.3.1,42349-1,Reason for Referral
+2.16.840.1.113883.10.20.7.12,10216-0,Operative Note Fluids 
+2.16.840.1.113883.10.20.7.14,10223-6,Operative Note Surgical  
+2.16.840.1.113883.10.20.2.10,29545-1,PHYSICAL EXAMINATION,physical exam
+2.16.840.1.113883.10.20.22.2.18,48768-6,Payers 
+2.16.840.1.113883.10.20.22.2.5.1,11450-4,PROBLEMS,Problem List,Concerns,complaints,observations
+2.16.840.1.113883.10.20.22.2.7.1,47519-4,Procedures,	History of procedures
+2.16.840.1.113883.10.20.22.2.3.1,30954-2,Results,laboratory tests,LABORATORY INFORMATION,laboratory data,laboratories
+2.16.840.1.113883.10.20.22.2.17,29762-2,Social History,Observations like smoking,drinking
+2.16.840.1.113883.10.20.22.2.4.1,8716-3,Vital Signs
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java?rev=1488020&r1=1488019&r2=1488020&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java (original)
+++ ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java Thu May 30 21:36:21 2013
@@ -18,6 +18,8 @@
  */
 package org.apache.ctakes.core.ae;
 
+import junit.framework.Assert;
+
 import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -30,7 +32,7 @@ import org.uimafit.component.JCasAnnotat
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.CollectionReaderFactory;
 import org.uimafit.factory.TypeSystemDescriptionFactory;
-import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.pipeline.JCasIterable;
 import org.uimafit.util.JCasUtil;
 
 public class TestCDASegmentAnnotator {
@@ -39,11 +41,10 @@ public class TestCDASegmentAnnotator {
 
 	@Test
 	public void TestCDASegmentPipeLine() throws Exception {
-
 		TypeSystemDescription typeSystem = TypeSystemDescriptionFactory
 				.createTypeSystemDescription();
 
-		CollectionReader reader1 = CollectionReaderFactory
+		CollectionReader reader = CollectionReaderFactory
 				.createCollectionReader(FilesCollectionReader.class,
 						typeSystem, FilesCollectionReader.PARAM_ROOT_FILE,
 						INPUT_FILE);
@@ -52,14 +53,38 @@ public class TestCDASegmentAnnotator {
 				.createPrimitive(CDASegmentAnnotator.class, typeSystem);
 		AnalysisEngine dumpOutput = AnalysisEngineFactory.createPrimitive(
 				DumpOutputAE.class, typeSystem);
+		// SimplePipeline.runPipeline(reader, sectionAnnotator, dumpOutput);
+		JCasIterable casIter = new JCasIterable(reader, sectionAnnotator,
+				dumpOutput);
+		final String expected_hpi_section = "1.3.6.1.4.1.19376.1.5.3.1.3.4";
+		final int expected_begin = 219;
+		final int expected_end = 1612;
+		boolean section_exists = false;
+		int section_begin = 0;
+		int section_end = 0;
 
-		SimplePipeline.runPipeline(reader1, sectionAnnotator, dumpOutput);
+		while (casIter.hasNext()) {
+			JCas jCas = casIter.next();
+			for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
+				if (expected_hpi_section.equalsIgnoreCase(segment.getId())) {
+					section_exists = true;
+					section_begin = segment.getBegin();
+					section_end = segment.getEnd();
+					break;
+				}
+			}
+		}
+		Assert.assertEquals(section_exists, true);
+		Assert.assertEquals(expected_begin, section_begin);
+		Assert.assertEquals(expected_end, section_end);
 	}
 
 	public static class DumpOutputAE extends JCasAnnotator_ImplBase {
 		public void process(JCas jCas) throws AnalysisEngineProcessException {
 			for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
-				System.out.println("Segment:" + segment.getId());
+				System.out.println("Segment:" + segment.getId() + " Begin:"
+						+ segment.getBegin() + " End:" + segment.getEnd());
+				// System.out.println("Text" + segment.getCoveredText());
 			}
 		}
 	}