You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ch...@apache.org on 2013/05/30 23:36:21 UTC
svn commit: r1488020 - in /ctakes/sandbox/ctakes-sectionizer/src:
main/java/org/apache/ctakes/core/ae/
main/resources/org/apache/ctakes/core/sections/
test/java/org/apache/ctakes/core/ae/
Author: chenpei
Date: Thu May 30 21:36:21 2013
New Revision: 1488020
URL: http://svn.apache.org/r1488020
Log:
CTAKES-200 Updated HL7 sectionizer and mappings that used to be in the SectionSegmentAnnotator template.xml.
fixed to add the correct begin and end span of the segment.
Modified:
ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java
ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt
ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
Modified: ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java?rev=1488020&r1=1488019&r2=1488020&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java (original)
+++ ctakes/sandbox/ctakes-sectionizer/src/main/java/org/apache/ctakes/core/ae/CDASegmentAnnotator.java Thu May 30 21:36:21 2013
@@ -23,7 +23,10 @@ import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URL;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -48,8 +51,6 @@ public class CDASegmentAnnotator extends
Logger logger = Logger.getLogger(this.getClass());
protected static HashMap<String, Pattern> patterns = new HashMap<String, Pattern>();
protected static final String DEFAULT_SECTION_FILE_NAME = "org/apache/ctakes/core/sections/ccda_sections.txt";
- // Field seperator assumes the sections mapping file is comma delimited
- // unlikely to change.
public static final String PARAM_FIELD_SEPERATOR = ",";
public static final String PARAM_COMMENT = "#";
public static final String PARAM_SECTIONS_FILE = "sections_file";
@@ -109,6 +110,8 @@ public class CDASegmentAnnotator extends
private static Pattern buildPattern(String[] line) {
StringBuffer sb = new StringBuffer();
for (int i = 1; i < line.length; i++) {
+ // Build the RegEx pattern for each comma delimited header name
+ // Suffixed with a aggregator pipe
sb.append(line[i].trim());
if (i != line.length - 1) {
sb.append("|");
@@ -122,8 +125,9 @@ public class CDASegmentAnnotator extends
String text = jCas.getDocumentText();
if (text == null) {
String docId = DocumentIDAnnotationUtil.getDocumentID(jCas);
- logger.info("text is null for docId=" + docId, null);
+ logger.info("text is null for docId=" + docId);
} else {
+ ArrayList<Segment> sorted_segments = new ArrayList<Segment>();
for (String id : patterns.keySet()) {
Pattern p = patterns.get(id);
// System.out.println("Pattern" + p);
@@ -131,12 +135,39 @@ public class CDASegmentAnnotator extends
while (m.find()) {
Segment segment = new Segment(jCas);
segment.setBegin(m.start());
- // TODO: Need to figure out the end of the section
segment.setEnd(m.end());
segment.setId(id);
- segment.addToIndexes();
+ sorted_segments.add(segment);
}
}
+ // TODO: this is kinda redundant, but needed the sections in sorted
+ // Order to determine the end of section which is assumed to be the
+ // beginning of the next section
+ Collections.sort(sorted_segments, new Comparator<Segment>() {
+ public int compare(Segment s1, Segment s2) {
+ return s1.getBegin() - (s2.getBegin());
+ }
+ });
+ int index = 0;
+ for (Segment s : sorted_segments) {
+ int prevEnd = s.getEnd();
+ int nextBegin = text.length();
+ if (index > 0) {
+ //handle case for first section
+ sorted_segments.get(index - 1).getEnd();
+ }
+ if (index + 1 < sorted_segments.size()) {
+ //handle case for last section
+ nextBegin = sorted_segments.get(index + 1).getBegin();
+ }
+ Segment segment = new Segment(jCas);
+ segment.setBegin(prevEnd);
+ segment.setEnd(nextBegin);
+ segment.setId(s.getId());
+ segment.addToIndexes();
+ index++;
+ }
}
}
+
}
Modified: ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt?rev=1488020&r1=1488019&r2=1488020&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt (original)
+++ ctakes/sandbox/ctakes-sectionizer/src/main/resources/org/apache/ctakes/core/sections/ccda_sections.txt Thu May 30 21:36:21 2013
@@ -4,31 +4,42 @@
# http://bluebuttonplus.org/healthrecords.html
# http://cdatools.org/infocenter/index.jsp
# The format is as follows:
-# HL7 template id, LOINC Section Code, n list of header names
+# HL7 template id,LOINC Section Code,n list of header names
# Custom ones can be added to the below mapping file
-# By Default, they are case insenstive and spaces trimmed.
+# By Default,they are case insenstive and spaces trimmed.
-2.16.840.1.113883.10.20.22.1.1, 34133-9, Header, Patient information and demographics
-2.16.840.1.113883.10.20.22.2.6.1, 48765-2, Allergies, Adverse Reactions, Alerts
-2.16.840.1.113883.10.20.22.2.22.1, 46240-8, History of encounters, Encounters,Surgeries, ED visits
-2.16.840.1.113883.10.20.22.2.2.1, 11369-6, History of immunizations, Immunizations,Immunizations and vaccines
-2.16.840.1.113883.10.20.22.2.1.1, 10160-0, HISTORY OF MEDICATION USE, Medications
-2.16.840.1.113883.10.20.22.2.10, 18776-5, Treatment plan, Care Plan
-2.16.840.1.113883.10.20.22.2.11.1, 10183-2, HOSPITAL DISCHARGE MEDICATIONS, Discharge Medications
-1.3.6.1.4.1.19376.1.5.3.1.3.1, 42349-1, Reason for Referral
-2.16.840.1.113883.10.20.22.2.5.1, 11450-4, PROBLEMS, Problem List, Concerns, complaints, observations
-2.16.840.1.113883.10.20.22.2.7.1, 47519-4, Procedures, History of procedures
-2.16.840.1.113883.10.20.22.2.14, 47420-5, FUNCTIONAL STATUS, Functional and Cognitive Status, impairments
-2.16.840.1.113883.10.20.22.2.3.1, 30954-2, Results, laboratory tests, LABORATORY INFORMATION
-2.16.840.1.113883.10.20.22.2.17, 29762-2, Social History, Observations like smoking, drinking
-2.16.840.1.113883.10.20.22.2.4.1, 8716-3, Vital Signs,height, weight, blood pressure
-2.16.840.1.113883.10.20.22.2.41, 8653-8, HOSPITAL DISCHARGE INSTRUCTIONS, Discharge Instructions, Written discharge instructions
-
-2.16.840.1.113883.10.20.22.2.15, 10157-6, Family History
-1.3.6.1.4.1.19376.1.5.3.1.1.13.2.1, 10154-3, CHIEF COMPLAINT
-2.16.840.1.113883.10.20.22.2.37,55109-3, Complications
-2.16.840.1.113883.10.20.22.2.20, 11348-0, HISTORY OF PAST ILLNESS
-1.3.6.1.4.1.19376.1.5.3.1.3.4, 10164-2, HISTORY OF PRESENT ILLNESS
-2.16.840.1.113883.10.20.2.5, 10210-3, GENERAL STATUS
-2.16.840.1.113883.10.20.22.2.24, 11535-2, Hospital Discharge Diagnosis
-2.16.840.1.113883.10.20.22.2.16, 11493-4, Hospital Discharge Studies Summary
\ No newline at end of file
+2.16.840.1.113883.10.20.22.2.21,42348-3,Advance Directives
+2.16.840.1.113883.10.20.22.2.6.1,48765-2,Allergies,Adverse Reactions,allergy
+2.16.840.1.113883.10.20.22.2.25,59774-0,Anesthesia Section
+2.16.840.1.113883.10.20.22.2.9,51847-2,ASSESSMENT AND PLAN
+2.16.840.1.113883.10.20.22.2.8,51848-0,Assessments
+2.16.840.1.113883.10.20.22.2.13,46239-0,Chief Complaint and Reason for Visit
+1.3.6.1.4.1.19376.1.5.3.1.1.13.2.1,10154-3,CHIEF COMPLAINT,admit diagnosis,principal discharge diagnosis,principal diagnosis,principal diagnoses,secondary diagnosis,other medical issues considered at this time
+2.16.840.1.113883.10.20.22.2.37,55109-3,Complications
+1.3.6.1.4.1.19376.1.5.3.1.3.33,42344-2,Discharge Diet
+2.16.840.1.113883.10.20.22.2.22.1,46240-8,Encounters,History of encounters,Surgeries,ED visits
+2.16.840.1.113883.10.20.22.2.15,10157-6,Family History
+2.16.840.1.113883.10.20.22.2.14,47420-5,FUNCTIONAL STATUS,Functional and Cognitive Status,impairments
+2.16.840.1.113883.10.20.2.5,10210-3,GENERAL STATUS,CURRENT HEALTH STATUS
+2.16.840.1.113883.10.20.22.1.1,34133-9,Header,Patient information and demographics,IDENTIFYING DATA,identification,record
+2.16.840.1.113883.10.20.22.2.20,11348-0,HISTORY OF PAST ILLNESS,PAST MEDICAL HISTORY
+1.3.6.1.4.1.19376.1.5.3.1.3.4,10164-2,HISTORY OF PRESENT ILLNESS,brief history of physical illness,history of present illness,history of the present illness
+2.16.840.1.113883.10.20.22.2.2.1,11369-6,History of immunizations,Immunizations,Immunizations and vaccines
+2.16.840.1.113883.10.20.22.2.1.1,10160-0,HISTORY OF MEDICATION USE,Medications,current medications
+2.16.840.1.113883.10.20.22.2.43,46241-6,HOSPITAL ADMISSION DX,rx on admit
+2.16.840.1.113883.10.20.22.2.41,8653-8,HOSPITAL DISCHARGE INSTRUCTIONS,Discharge Instructions, Written discharge instructions
+2.16.840.1.113883.10.20.22.2.24,11535-2,Hospital Discharge Diagnosis,discharge diagnosis,FINAL DIAGNOSIS
+2.16.840.1.113883.10.20.22.2.16,11493-4,Hospital Discharge Studies Summary
+2.16.840.1.113883.10.20.22.2.45,69730-0,Instructions
+2.16.840.1.113883.10.20.22.2.10,18776-5,Treatment plan,Care Plan
+2.16.840.1.113883.10.20.22.2.11.1,10183-2,HOSPITAL DISCHARGE MEDICATIONS,Discharge Medications
+1.3.6.1.4.1.19376.1.5.3.1.3.1,42349-1,Reason for Referral
+2.16.840.1.113883.10.20.7.12,10216-0,Operative Note Fluids
+2.16.840.1.113883.10.20.7.14,10223-6,Operative Note Surgical
+2.16.840.1.113883.10.20.2.10,29545-1,PHYSICAL EXAMINATION,physical exam
+2.16.840.1.113883.10.20.22.2.18,48768-6,Payers
+2.16.840.1.113883.10.20.22.2.5.1,11450-4,PROBLEMS,Problem List,Concerns,complaints,observations
+2.16.840.1.113883.10.20.22.2.7.1,47519-4,Procedures, History of procedures
+2.16.840.1.113883.10.20.22.2.3.1,30954-2,Results,laboratory tests,LABORATORY INFORMATION,laboratory data,laboratories
+2.16.840.1.113883.10.20.22.2.17,29762-2,Social History,Observations like smoking,drinking
+2.16.840.1.113883.10.20.22.2.4.1,8716-3,Vital Signs
\ No newline at end of file
Modified: ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java?rev=1488020&r1=1488019&r2=1488020&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java (original)
+++ ctakes/sandbox/ctakes-sectionizer/src/test/java/org/apache/ctakes/core/ae/TestCDASegmentAnnotator.java Thu May 30 21:36:21 2013
@@ -18,6 +18,8 @@
*/
package org.apache.ctakes.core.ae;
+import junit.framework.Assert;
+
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -30,7 +32,7 @@ import org.uimafit.component.JCasAnnotat
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.CollectionReaderFactory;
import org.uimafit.factory.TypeSystemDescriptionFactory;
-import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.pipeline.JCasIterable;
import org.uimafit.util.JCasUtil;
public class TestCDASegmentAnnotator {
@@ -39,11 +41,10 @@ public class TestCDASegmentAnnotator {
@Test
public void TestCDASegmentPipeLine() throws Exception {
-
TypeSystemDescription typeSystem = TypeSystemDescriptionFactory
.createTypeSystemDescription();
- CollectionReader reader1 = CollectionReaderFactory
+ CollectionReader reader = CollectionReaderFactory
.createCollectionReader(FilesCollectionReader.class,
typeSystem, FilesCollectionReader.PARAM_ROOT_FILE,
INPUT_FILE);
@@ -52,14 +53,38 @@ public class TestCDASegmentAnnotator {
.createPrimitive(CDASegmentAnnotator.class, typeSystem);
AnalysisEngine dumpOutput = AnalysisEngineFactory.createPrimitive(
DumpOutputAE.class, typeSystem);
+ // SimplePipeline.runPipeline(reader, sectionAnnotator, dumpOutput);
+ JCasIterable casIter = new JCasIterable(reader, sectionAnnotator,
+ dumpOutput);
+ final String expected_hpi_section = "1.3.6.1.4.1.19376.1.5.3.1.3.4";
+ final int expected_begin = 219;
+ final int expected_end = 1612;
+ boolean section_exists = false;
+ int section_begin = 0;
+ int section_end = 0;
- SimplePipeline.runPipeline(reader1, sectionAnnotator, dumpOutput);
+ while (casIter.hasNext()) {
+ JCas jCas = casIter.next();
+ for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
+ if (expected_hpi_section.equalsIgnoreCase(segment.getId())) {
+ section_exists = true;
+ section_begin = segment.getBegin();
+ section_end = segment.getEnd();
+ break;
+ }
+ }
+ }
+ Assert.assertEquals(section_exists, true);
+ Assert.assertEquals(expected_begin, section_begin);
+ Assert.assertEquals(expected_end, section_end);
}
public static class DumpOutputAE extends JCasAnnotator_ImplBase {
public void process(JCas jCas) throws AnalysisEngineProcessException {
for (Segment segment : JCasUtil.select(jCas, Segment.class)) {
- System.out.println("Segment:" + segment.getId());
+ System.out.println("Segment:" + segment.getId() + " Begin:"
+ + segment.getBegin() + " End:" + segment.getEnd());
+ // System.out.println("Text" + segment.getCoveredText());
}
}
}